@getmikk/core 2.0.14 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +4 -4
  2. package/package.json +2 -1
  3. package/src/analysis/type-flow.ts +1 -1
  4. package/src/cache/incremental-cache.ts +86 -80
  5. package/src/contract/contract-reader.ts +1 -0
  6. package/src/contract/lock-compiler.ts +95 -13
  7. package/src/contract/schema.ts +2 -0
  8. package/src/error-handler.ts +2 -1
  9. package/src/graph/cluster-detector.ts +2 -4
  10. package/src/graph/dead-code-detector.ts +303 -117
  11. package/src/graph/graph-builder.ts +21 -161
  12. package/src/graph/impact-analyzer.ts +1 -0
  13. package/src/graph/index.ts +2 -0
  14. package/src/graph/rich-function-index.ts +1080 -0
  15. package/src/graph/symbol-table.ts +252 -0
  16. package/src/hash/hash-store.ts +1 -0
  17. package/src/index.ts +2 -0
  18. package/src/parser/base-extractor.ts +19 -0
  19. package/src/parser/boundary-checker.ts +31 -12
  20. package/src/parser/error-recovery.ts +5 -4
  21. package/src/parser/function-body-extractor.ts +248 -0
  22. package/src/parser/go/go-extractor.ts +249 -676
  23. package/src/parser/index.ts +132 -318
  24. package/src/parser/language-registry.ts +57 -0
  25. package/src/parser/oxc-parser.ts +166 -28
  26. package/src/parser/oxc-resolver.ts +179 -11
  27. package/src/parser/parser-constants.ts +1 -0
  28. package/src/parser/rust/rust-extractor.ts +109 -0
  29. package/src/parser/tree-sitter/parser.ts +369 -62
  30. package/src/parser/tree-sitter/queries.ts +106 -10
  31. package/src/parser/types.ts +20 -1
  32. package/src/search/bm25.ts +21 -8
  33. package/src/search/direct-search.ts +472 -0
  34. package/src/search/embedding-provider.ts +249 -0
  35. package/src/search/index.ts +12 -0
  36. package/src/search/semantic-search.ts +435 -0
  37. package/src/utils/artifact-transaction.ts +1 -0
  38. package/src/utils/atomic-write.ts +1 -0
  39. package/src/utils/errors.ts +89 -4
  40. package/src/utils/fs.ts +104 -50
  41. package/src/utils/json.ts +1 -0
  42. package/src/utils/language-registry.ts +84 -6
  43. package/src/utils/path.ts +26 -0
  44. package/tests/dead-code.test.ts +3 -2
  45. package/tests/direct-search.test.ts +435 -0
  46. package/tests/error-recovery.test.ts +143 -0
  47. package/tests/fixtures/simple-api/src/index.ts +1 -1
  48. package/tests/go-parser.test.ts +19 -335
  49. package/tests/js-parser.test.ts +18 -1089
  50. package/tests/language-registry-all.test.ts +276 -0
  51. package/tests/language-registry.test.ts +6 -4
  52. package/tests/parse-diagnostics.test.ts +9 -96
  53. package/tests/parser.test.ts +42 -771
  54. package/tests/polyglot-parser.test.ts +117 -0
  55. package/tests/rich-function-index.test.ts +703 -0
  56. package/tests/tree-sitter-parser.test.ts +108 -80
  57. package/tests/ts-parser.test.ts +8 -8
  58. package/tests/verification.test.ts +175 -0
  59. package/src/parser/base-parser.ts +0 -16
  60. package/src/parser/go/go-parser.ts +0 -43
  61. package/src/parser/javascript/js-extractor.ts +0 -278
  62. package/src/parser/javascript/js-parser.ts +0 -101
  63. package/src/parser/typescript/ts-extractor.ts +0 -447
  64. package/src/parser/typescript/ts-parser.ts +0 -36
@@ -0,0 +1,109 @@
1
+ /* eslint-disable @typescript-eslint/no-explicit-any */
2
+ import { hashContent } from '../../hash/file-hasher.js'
3
+ import { BaseExtractor } from '../base-extractor.js'
4
+ import { LanguageRegistry } from '../language-registry.js'
5
+ import type { ParsedFile, ParsedFunction, ParsedClass, ParsedImport, ParsedExport } from '../types.js'
6
+
7
+ export class RustExtractor extends BaseExtractor {
8
+ constructor() {
9
+ super();
10
+ }
11
+
12
+ async extract(filePath: string, content: string): Promise<ParsedFile> {
13
+ const lines = content.split('\n');
14
+ const functions: ParsedFunction[] = [];
15
+ const classes: ParsedClass[] = []; // Structs/Enums
16
+ const imports: ParsedImport[] = [];
17
+ const exports: ParsedExport[] = [];
18
+
19
+ for (let i = 0; i < lines.length; i++) {
20
+ const line = lines[i].trim();
21
+
22
+ // Minimal Function Detection: pub fn name(...)
23
+ const fnMatch = /^(?:pub(?:\([^)]+\))?\s+)?fn\s+([a-z_][a-z0-9_]*)/.exec(line);
24
+ if (fnMatch) {
25
+ const name = fnMatch[1];
26
+ functions.push({
27
+ id: `fn:${filePath}:${name}`,
28
+ name,
29
+ file: filePath,
30
+ startLine: i + 1,
31
+ endLine: i + 1, // Placeholder
32
+ params: [],
33
+ returnType: 'unknown',
34
+ isExported: line.startsWith('pub'),
35
+ isAsync: line.includes('async fn'),
36
+ calls: [],
37
+ hash: hashContent(line),
38
+ purpose: '',
39
+ edgeCasesHandled: [],
40
+ errorHandling: [],
41
+ detailedLines: []
42
+ });
43
+ if (line.startsWith('pub')) {
44
+ exports.push({ name, type: 'function', file: filePath });
45
+ }
46
+ }
47
+
48
+ // Minimal Import Detection: use path::to::pkg;
49
+ const useMatch = /^use\s+([^;]+);/.exec(line);
50
+ if (useMatch) {
51
+ const path = useMatch[1].trim();
52
+ const parts = path.split('::');
53
+ imports.push({
54
+ source: path,
55
+ resolvedPath: '',
56
+ names: [parts[parts.length - 1]],
57
+ isDefault: false,
58
+ isDynamic: false
59
+ });
60
+ }
61
+
62
+ // Minimal Struct Detection: pub struct Name { ... }
63
+ const structMatch = /^(?:pub(?:\([^)]+\))?\s+)?(?:struct|enum|trait)\s+([A-Z][A-Za-z0-9_]*)/.exec(line);
64
+ if (structMatch) {
65
+ const name = structMatch[1];
66
+ classes.push({
67
+ id: `cls:${filePath}:${name}`,
68
+ name,
69
+ file: filePath,
70
+ startLine: i + 1,
71
+ endLine: i + 1, // Placeholder
72
+ isExported: line.startsWith('pub'),
73
+ methods: [],
74
+ properties: [],
75
+ hash: hashContent(line),
76
+ purpose: ''
77
+ });
78
+ if (line.startsWith('pub')) {
79
+ const type = line.includes('struct') ? 'class' : 'interface';
80
+ exports.push({ name, type, file: filePath });
81
+ }
82
+ }
83
+ }
84
+
85
+ return {
86
+ path: filePath.replace(/\\/g, '/'),
87
+ language: 'rust' as any,
88
+ functions,
89
+ classes,
90
+ variables: [],
91
+ generics: [],
92
+ imports,
93
+ exports,
94
+ routes: [],
95
+ calls: [],
96
+ hash: hashContent(content),
97
+ parsedAt: Date.now()
98
+ };
99
+ }
100
+ }
101
+
102
+ // Automatically register with the LanguageRegistry
103
+ LanguageRegistry.getInstance().register({
104
+ name: 'rust',
105
+ extensions: ['.rs'],
106
+ treeSitterGrammar: '',
107
+ extractor: new RustExtractor(),
108
+ semanticFeatures: { hasTypeSystem: true, hasGenerics: true, hasMacros: true, hasAnnotations: false, hasPatternMatching: true }
109
+ });
@@ -1,8 +1,10 @@
1
+ /* eslint-disable @typescript-eslint/no-explicit-any */
1
2
  import * as path from 'node:path'
2
3
  import { TreeSitterResolver } from './resolver.js'
4
+ import { LanguageRegistry } from '../language-registry.js'
3
5
  import { createRequire } from 'node:module'
4
6
  import { hashContent } from '../../hash/file-hasher.js'
5
- import { BaseParser } from '../base-parser.js'
7
+ import { BaseExtractor } from '../base-extractor.js'
6
8
  import type { ParsedFile, ParsedFunction, ParsedClass, ParsedParam, ParsedImport, ParsedGeneric } from '../types.js'
7
9
  import * as Queries from './queries.js'
8
10
 
@@ -15,20 +17,106 @@ const _require = getRequire()
15
17
  let Parser: any = null
16
18
  let Language: any = null
17
19
  let initialized = false
18
- let initPromise: Promise<void> | null = null
19
-
20
- try {
21
- const ParserModule = _require('web-tree-sitter')
22
- Parser = ParserModule
23
- if (ParserModule.init) {
24
- initPromise = ParserModule.init().then(() => {
25
- Language = ParserModule.Language
26
- initialized = true
27
- }).catch(() => { /* ignore */ })
28
- } else if (ParserModule.default?.Language) {
29
- Language = ParserModule.default.Language
20
+ let initPromise: Promise<boolean> | null = null
21
+
22
+ function isValidTreeSitterModule(module: any): boolean {
23
+ if (!module) return false
24
+ if (module.HEAP8 || module.HEAP16 || module.HEAP32) return false
25
+ const hasInit = typeof module.init === 'function'
26
+ const hasLanguage = typeof module.Language !== 'undefined'
27
+ const hasDefault = module.default?.Language
28
+ const isFunctionWithInit = typeof module === 'function' && module.prototype?.init
29
+ const isFunctionWithLanguage = typeof module === 'function' && module.prototype?.Language
30
+ return hasInit || hasLanguage || !!hasDefault || isFunctionWithInit || isFunctionWithLanguage
31
+ }
32
+
33
+ async function ensureInitialized(): Promise<boolean> {
34
+ if (initialized && Language) return true
35
+
36
+ const attemptInit = async (): Promise<boolean> => {
37
+ try {
38
+ let ParserModule = _require('web-tree-sitter')
39
+ if (!ParserModule) return false
40
+
41
+ if (typeof ParserModule === 'function') {
42
+ if (ParserModule.prototype?.Language) {
43
+ Language = ParserModule.prototype.Language
44
+ Parser = ParserModule
45
+ initialized = true
46
+ return true
47
+ }
48
+ if (ParserModule.prototype?.init) {
49
+ await ParserModule.prototype.init()
50
+ Language = ParserModule.prototype.Language
51
+ Parser = ParserModule
52
+ initialized = !!Language
53
+ return initialized
54
+ }
55
+ }
56
+
57
+ if (!isValidTreeSitterModule(ParserModule)) {
58
+ ParserModule = ParserModule?.default || ParserModule
59
+ }
60
+
61
+ if (!isValidTreeSitterModule(ParserModule)) {
62
+ const moduleCache = _require.cache
63
+ const keys = Object.keys(moduleCache || {})
64
+ for (const key of keys) {
65
+ if (key.includes('web-tree-sitter')) {
66
+ delete moduleCache[key]
67
+ }
68
+ }
69
+ ParserModule = _require('web-tree-sitter')
70
+ if (typeof ParserModule === 'function' && ParserModule.prototype?.Language) {
71
+ Language = ParserModule.prototype.Language
72
+ Parser = ParserModule
73
+ initialized = true
74
+ return true
75
+ }
76
+ if (!isValidTreeSitterModule(ParserModule)) {
77
+ ParserModule = ParserModule?.default || ParserModule
78
+ }
79
+ }
80
+
81
+ if (!isValidTreeSitterModule(ParserModule)) {
82
+ return false
83
+ }
84
+
85
+ Parser = ParserModule
86
+
87
+ if (typeof ParserModule.init === 'function') {
88
+ await ParserModule.init()
89
+ Language = ParserModule.Language
90
+ initialized = !!Language
91
+ } else if (ParserModule.default?.Language) {
92
+ Language = ParserModule.default.Language
93
+ initialized = true
94
+ } else if (ParserModule.Language) {
95
+ Language = ParserModule.Language
96
+ initialized = true
97
+ }
98
+
99
+ return initialized && !!Language
100
+ } catch (err) {
101
+ console.warn('[tree-sitter] Initialization failed:', err)
102
+ return false
103
+ }
30
104
  }
31
- } catch { /* web-tree-sitter not installed */ }
105
+
106
+ if (initPromise) {
107
+ await initPromise.catch(() => {})
108
+ if (!initialized && Language) {
109
+ initPromise = attemptInit()
110
+ const retryResult = await initPromise
111
+ return retryResult
112
+ }
113
+ return initialized && !!Language
114
+ }
115
+
116
+ initPromise = attemptInit()
117
+ const result = await initPromise
118
+ return result
119
+ }
32
120
 
33
121
  function isExportedByLanguage(ext: string, name: string, nodeText: string): boolean {
34
122
  switch (ext) {
@@ -119,7 +207,35 @@ function findAllChildren(node: any, predicate: (n: any) => boolean): any[] {
119
207
  return results
120
208
  }
121
209
 
122
- function extractGenericsFromNode(defNode: any, filePath: string): ParsedGeneric[] {
210
+ function extractDecoratorsFromNode(defNode: any): string[] {
211
+ const decorators: string[] = []
212
+ if (!defNode?.children) return decorators
213
+
214
+ for (const child of defNode.children) {
215
+ if (child.type === 'decorator' || child.type === 'attribute' || child.type === 'annotation') {
216
+ const nameNode = findFirstChild(child, n =>
217
+ n.type === 'identifier' ||
218
+ n.type === 'attribute' ||
219
+ n.type === 'decorator_target'
220
+ )
221
+ if (nameNode?.text) {
222
+ decorators.push(nameNode.text)
223
+ }
224
+ }
225
+ if (child.type === 'expression_statement') {
226
+ const innerChild = findFirstChild(child, n => n.type === 'decorator' || n.type === 'attribute')
227
+ if (innerChild) {
228
+ const nameNode = findFirstChild(innerChild, n => n.type === 'identifier')
229
+ if (nameNode?.text) {
230
+ decorators.push(nameNode.text)
231
+ }
232
+ }
233
+ }
234
+ }
235
+ return decorators
236
+ }
237
+
238
+ function _extractGenericsFromNode(defNode: any, filePath: string): ParsedGeneric[] {
123
239
  const generics: ParsedGeneric[] = []
124
240
  if (!defNode) return generics
125
241
 
@@ -179,7 +295,7 @@ function assignCallsToFunctions(
179
295
  return unassigned
180
296
  }
181
297
 
182
- export class TreeSitterParser extends BaseParser {
298
+ export class TreeSitterParser extends BaseExtractor {
183
299
  private parser: any = null
184
300
  private languages = new Map<string, any>()
185
301
  private nameCounter = new Map<string, number>()
@@ -190,10 +306,9 @@ export class TreeSitterParser extends BaseParser {
190
306
  }
191
307
 
192
308
  private async init() {
193
- if (!this.parser) {
194
- if (!Parser || !initPromise) return
195
- await initPromise.catch(() => {})
196
- if (!Language) return
309
+ if (this.parser) return
310
+ const ready = await ensureInitialized()
311
+ if (ready && Parser) {
197
312
  this.parser = new Parser()
198
313
  }
199
314
  }
@@ -203,39 +318,57 @@ export class TreeSitterParser extends BaseParser {
203
318
  return Boolean(this.parser)
204
319
  }
205
320
 
206
- async parse(filePath: string, content: string): Promise<ParsedFile> {
207
- this.nameCounter.clear()
321
+ async extract(filePath: string, content: string): Promise<ParsedFile> {
208
322
  await this.init()
209
- const ext = path.extname(filePath).toLowerCase()
210
-
323
+
211
324
  if (!this.parser) {
212
- return this.buildEmptyFile(filePath, content, ext)
325
+ console.warn('[tree-sitter] Parser not initialized')
326
+ return this.buildEmptyFile(filePath, content, path.extname(filePath))
213
327
  }
214
328
 
329
+ const ext = path.extname(filePath).toLowerCase()
215
330
  const config = await this.getLanguageConfig(ext)
216
-
331
+
217
332
  if (!config || !config.lang) {
333
+ console.warn('[tree-sitter] Language not available for', ext)
218
334
  return this.buildEmptyFile(filePath, content, ext)
219
335
  }
220
336
 
221
- try {
222
- return this.parseWithConfig(filePath, content, ext, config)
223
- } catch (err) {
224
- console.warn(`Parse error for ${filePath}:`, err)
225
- return this.buildEmptyFile(filePath, content, ext)
226
- }
337
+ const result = await this.parseWithConfig(filePath, content, ext, config)
338
+
339
+ return result
227
340
  }
228
341
 
229
342
  private async parseWithConfig(filePath: string, content: string, ext: string, config: any): Promise<ParsedFile> {
230
343
  this.parser!.setLanguage(config.lang)
231
- const tree = this.parser!.parse(content)
232
- const query = config.lang.query(config.query)
233
344
 
345
+ let tree: any = null
346
+ try {
347
+ tree = this.parser!.parse(content)
348
+ } catch (parseErr) {
349
+ console.warn(`[tree-sitter] Parse failed for ${ext}:`, parseErr instanceof Error ? parseErr.message : String(parseErr))
350
+ return this.buildEmptyFile(filePath, content, ext)
351
+ }
352
+
353
+ let query: any = null
354
+ try {
355
+ query = config.lang.query(config.query)
356
+ } catch (queryErr) {
357
+ console.warn(`[tree-sitter] Query compilation failed for ${ext}:`, queryErr instanceof Error ? queryErr.message : String(queryErr))
358
+ return this.buildEmptyFile(filePath, content, ext)
359
+ }
360
+
234
361
  if (!query) {
235
362
  return this.buildEmptyFile(filePath, content, ext)
236
363
  }
237
364
 
238
- const matches = query.matches(tree.rootNode)
365
+ let matches: any[] = []
366
+ try {
367
+ matches = query.matches(tree.rootNode)
368
+ } catch (matchErr) {
369
+ console.warn(`[tree-sitter] Query execution failed for ${ext}:`, matchErr instanceof Error ? matchErr.message : String(matchErr))
370
+ return this.buildEmptyFile(filePath, content, ext)
371
+ }
239
372
 
240
373
  const functions: ParsedFunction[] = []
241
374
  const classesMap = new Map<string, ParsedClass>()
@@ -321,7 +454,6 @@ export class TreeSitterParser extends BaseParser {
321
454
 
322
455
  // --- Generic types ---
323
456
  if (captures['generic.name'] || captures['generic.arg']) {
324
- const genName = captures['generic.name']?.text || ''
325
457
  const genArg = captures['generic.arg']?.text || ''
326
458
  if (genArg && !generics.some(g => g.name === genArg)) {
327
459
  generics.push({
@@ -361,6 +493,7 @@ export class TreeSitterParser extends BaseParser {
361
493
 
362
494
  const returnType = extractReturnType(ext, defNode, nodeText)
363
495
  const params = extractParamsFromNode(defNode)
496
+ const decorators = extractDecoratorsFromNode(defNode)
364
497
 
365
498
  functions.push({
366
499
  id: fnId,
@@ -373,6 +506,7 @@ export class TreeSitterParser extends BaseParser {
373
506
  isExported: exported,
374
507
  isAsync,
375
508
  calls: [],
509
+ decorators: decorators.length > 0 ? decorators : undefined,
376
510
  hash: hashContent(nodeText),
377
511
  purpose: extractDocComment(content, startLine),
378
512
  edgeCasesHandled: [],
@@ -402,10 +536,6 @@ export class TreeSitterParser extends BaseParser {
402
536
  const clsId = `class:${filePath}:${clsName}`
403
537
 
404
538
  if (!classesMap.has(clsId)) {
405
- const isEnum = type === 'definition.enum'
406
- const isStruct = type === 'definition.struct'
407
- const isUnion = type === 'definition.union'
408
-
409
539
  classesMap.set(clsId, {
410
540
  id: clsId,
411
541
  name: clsName,
@@ -533,13 +663,25 @@ export class TreeSitterParser extends BaseParser {
533
663
  baseDirs.add(path.join(parentDir, 'node_modules'))
534
664
 
535
665
  // Also check sibling directories in the parent for monorepo setups
536
- // (e.g., metis and Mesh are siblings under the same parent)
666
+ // Also check subdirs like Mesh/packages/core/node_modules
537
667
  try {
538
668
  const fs = await import('node:fs')
539
669
  const entries = fs.readdirSync(parentDir, { withFileTypes: true })
540
670
  for (const entry of entries) {
541
671
  if (entry.isDirectory() && entry.name !== path.basename(current)) {
542
672
  baseDirs.add(path.join(parentDir, entry.name, 'node_modules'))
673
+ // Also check for packages/*/node_modules patterns
674
+ const subPath = path.join(parentDir, entry.name, 'packages')
675
+ if (fs.existsSync(subPath)) {
676
+ try {
677
+ const pkgEntries = fs.readdirSync(subPath, { withFileTypes: true })
678
+ for (const pkg of pkgEntries) {
679
+ if (pkg.isDirectory()) {
680
+ baseDirs.add(path.join(subPath, pkg.name, 'node_modules'))
681
+ }
682
+ }
683
+ } catch { /* skip */ }
684
+ }
543
685
  }
544
686
  }
545
687
  } catch { /* skip */ }
@@ -550,9 +692,27 @@ export class TreeSitterParser extends BaseParser {
550
692
  const possiblePaths: string[] = []
551
693
  for (const baseDir of baseDirs) {
552
694
  if (!baseDir) continue
553
- possiblePaths.push(
554
- path.join(baseDir, 'node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
555
- )
695
+ const p = path.join(baseDir, 'node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`)
696
+ possiblePaths.push(p)
697
+
698
+ // Also check Mesh/packages/*/node_modules directly
699
+ const meshBase = path.join(baseDir, '..', 'Mesh', 'packages')
700
+ try {
701
+ const fs = await import('node:fs')
702
+ if (fs.existsSync(meshBase)) {
703
+ const entries = fs.readdirSync(meshBase, { withFileTypes: true })
704
+ for (const entry of entries) {
705
+ if (entry.isDirectory()) {
706
+ possiblePaths.push(path.join(meshBase, entry.name, 'node_modules', 'tree-sitter-wasms', 'out', `tree-sitter-${nameForFile}.wasm`))
707
+ }
708
+ }
709
+ }
710
+ } catch { /* skip */ }
711
+ }
712
+
713
+ if (process.env.MIKK_DEBUG) {
714
+ console.log('[tree-sitter] Searching for:', name)
715
+ console.log('[tree-sitter] Paths checked:', possiblePaths.slice(0, 5).map(p => p.slice(-50)))
556
716
  }
557
717
 
558
718
  let wasmPath = ''
@@ -623,6 +783,14 @@ export class TreeSitterParser extends BaseParser {
623
783
  }
624
784
 
625
785
  private async getLanguageConfig(ext: string) {
786
+ // Ensure parser is initialized first
787
+ await this.init()
788
+
789
+ if (!this.parser) {
790
+ console.warn('[tree-sitter] Parser not initialized, returning null for', ext)
791
+ return null
792
+ }
793
+
626
794
  switch (ext) {
627
795
  case '.py':
628
796
  return { lang: await this.loadLang('python'), query: Queries.PYTHON_QUERIES }
@@ -631,8 +799,13 @@ export class TreeSitterParser extends BaseParser {
631
799
  case '.kt':
632
800
  case '.kts':
633
801
  return { lang: await this.loadLang('kotlin'), query: Queries.KOTLIN_QUERIES }
802
+ case '.scala':
803
+ case '.sc':
804
+ return { lang: await this.loadLang('scala'), query: Queries.SCALA_QUERIES }
634
805
  case '.swift':
635
806
  return { lang: await this.loadLang('swift'), query: Queries.SWIFT_QUERIES }
807
+ case '.dart':
808
+ return { lang: await this.loadLang('dart'), query: Queries.SWIFT_QUERIES }
636
809
  case '.c':
637
810
  case '.h':
638
811
  return { lang: await this.loadLang('c'), query: Queries.C_QUERIES }
@@ -649,35 +822,169 @@ export class TreeSitterParser extends BaseParser {
649
822
  return { lang: await this.loadLang('go'), query: Queries.GO_QUERIES }
650
823
  case '.rs':
651
824
  return { lang: await this.loadLang('rust'), query: Queries.RUST_QUERIES }
825
+ case '.zig':
826
+ return { lang: await this.loadLang('zig'), query: Queries.ZIG_QUERIES }
652
827
  case '.php':
653
828
  return { lang: await this.loadLang('php'), query: Queries.PHP_QUERIES }
654
829
  case '.rb':
655
830
  return { lang: await this.loadLang('ruby'), query: Queries.RUBY_QUERIES }
831
+ case '.hs':
832
+ return { lang: await this.loadLang('haskell'), query: Queries.HASKELL_QUERIES }
833
+ case '.ex':
834
+ case '.exs':
835
+ return { lang: await this.loadLang('elixir'), query: Queries.ELIXIR_QUERIES }
836
+ case '.clj':
837
+ case '.cljs':
838
+ case '.cljc':
839
+ return { lang: await this.loadLang('clojure'), query: Queries.CLOJURE_QUERIES }
840
+ case '.fs':
841
+ case '.fsx':
842
+ case '.fsi':
843
+ return { lang: await this.loadLang('fsharp'), query: Queries.FSHARP_QUERIES }
844
+ case '.ml':
845
+ case '.mli':
846
+ return { lang: await this.loadLang('ocaml'), query: Queries.OCAML_QUERIES }
847
+ case '.pl':
848
+ case '.pm':
849
+ return { lang: await this.loadLang('perl'), query: Queries.PERL_QUERIES }
850
+ case '.r':
851
+ case '.R':
852
+ return { lang: await this.loadLang('r'), query: Queries.R_QUERIES }
853
+ case '.jl':
854
+ return { lang: await this.loadLang('julia'), query: Queries.JULIA_QUERIES }
855
+ case '.lua':
856
+ return { lang: await this.loadLang('lua'), query: Queries.LUA_QUERIES }
857
+ case '.sql':
858
+ return { lang: await this.loadLang('sql'), query: Queries.SQL_QUERIES }
859
+ case '.tf':
860
+ return { lang: await this.loadLang('hcl'), query: Queries.HCL_QUERIES }
861
+ case '.sh':
862
+ case '.bash':
863
+ case '.zsh':
864
+ return { lang: await this.loadLang('bash'), query: Queries.BASH_QUERIES }
656
865
  default:
657
866
  return null
658
867
  }
659
868
  }
660
869
  }
661
870
 
871
+ // Register Tree-sitter for all supported languages (22+ languages)
872
+ const tsParser = new TreeSitterParser();
873
+ const registry = LanguageRegistry.getInstance();
874
+
875
+ const standardFeatures = {
876
+ hasTypeSystem: true,
877
+ hasGenerics: true,
878
+ hasMacros: false,
879
+ hasAnnotations: false,
880
+ hasPatternMatching: true,
881
+ };
882
+
883
+ const functionalFeatures = {
884
+ hasTypeSystem: true,
885
+ hasGenerics: true,
886
+ hasMacros: false,
887
+ hasAnnotations: false,
888
+ hasPatternMatching: false,
889
+ };
890
+
891
+ const scriptingFeatures = {
892
+ hasTypeSystem: false,
893
+ hasGenerics: false,
894
+ hasMacros: false,
895
+ hasAnnotations: false,
896
+ hasPatternMatching: false,
897
+ };
898
+
899
+ // All 22+ supported languages
900
+ const languages: Array<{ name: string; extensions: string[]; features: typeof standardFeatures }> = [
901
+ // JVM Languages
902
+ { name: 'java', extensions: ['.java'], features: standardFeatures },
903
+ { name: 'kotlin', extensions: ['.kt', '.kts'], features: standardFeatures },
904
+ { name: 'scala', extensions: ['.scala', '.sc'], features: standardFeatures },
905
+
906
+ // Apple Languages
907
+ { name: 'swift', extensions: ['.swift'], features: standardFeatures },
908
+ { name: 'dart', extensions: ['.dart'], features: standardFeatures },
909
+
910
+ // C Family
911
+ { name: 'c', extensions: ['.c', '.h'], features: standardFeatures },
912
+ { name: 'cpp', extensions: ['.cpp', '.cc', '.cxx', '.hpp', '.hxx', '.hh'], features: standardFeatures },
913
+ { name: 'csharp', extensions: ['.cs'], features: standardFeatures },
914
+
915
+ // Systems Languages
916
+ { name: 'rust', extensions: ['.rs'], features: standardFeatures },
917
+ { name: 'zig', extensions: ['.zig'], features: standardFeatures },
918
+
919
+ // Web Languages
920
+ { name: 'php', extensions: ['.php'], features: standardFeatures },
921
+ { name: 'ruby', extensions: ['.rb'], features: scriptingFeatures },
922
+
923
+ // Scripting Languages
924
+ { name: 'python', extensions: ['.py', '.pyw'], features: scriptingFeatures },
925
+ { name: 'lua', extensions: ['.lua'], features: scriptingFeatures },
926
+
927
+ // Functional Languages
928
+ { name: 'haskell', extensions: ['.hs'], features: functionalFeatures },
929
+ { name: 'elixir', extensions: ['.ex', '.exs'], features: functionalFeatures },
930
+ { name: 'clojure', extensions: ['.clj', '.cljs', '.cljc'], features: functionalFeatures },
931
+
932
+ // .NET Family
933
+ { name: 'fsharp', extensions: ['.fs', '.fsx', '.fsi'], features: standardFeatures },
934
+
935
+ // ML Family
936
+ { name: 'ocaml', extensions: ['.ml', '.mli'], features: functionalFeatures },
937
+
938
+ // Other Languages
939
+ { name: 'perl', extensions: ['.pl', '.pm'], features: scriptingFeatures },
940
+ { name: 'r', extensions: ['.r', '.R'], features: scriptingFeatures },
941
+ { name: 'julia', extensions: ['.jl'], features: scriptingFeatures },
942
+
943
+ // Config/Special Purpose
944
+ { name: 'sql', extensions: ['.sql'], features: scriptingFeatures },
945
+ { name: 'terraform', extensions: ['.tf'], features: scriptingFeatures },
946
+ { name: 'shell', extensions: ['.sh', '.bash', '.zsh'], features: scriptingFeatures },
947
+ ];
948
+
949
+ for (const lang of languages) {
950
+ registry.register({
951
+ ...lang,
952
+ treeSitterGrammar: `tree-sitter-${lang.name}`,
953
+ extractor: tsParser,
954
+ semanticFeatures: lang.features
955
+ });
956
+ }
957
+
662
958
  function extensionToLanguage(ext: string): ParsedFile['language'] {
663
- switch (ext) {
664
- case '.py': return 'python'
665
- case '.java': return 'java'
666
- case '.kt':
667
- case '.kts':
668
- return 'kotlin'
669
- case '.swift':
670
- return 'swift'
671
- case '.c': case '.h': return 'c'
672
- case '.cpp': case '.cc': case '.hpp': return 'cpp'
673
- case '.cxx': case '.hxx': case '.hh': return 'cpp'
674
- case '.cs': return 'csharp'
675
- case '.go': return 'go'
676
- case '.rs': return 'rust'
677
- case '.php': return 'php'
678
- case '.rb': return 'ruby'
679
- default: return 'unknown'
680
- }
959
+ const langMap: Record<string, ParsedFile['language']> = {
960
+ '.py': 'python', '.pyw': 'python',
961
+ '.java': 'java',
962
+ '.kt': 'kotlin', '.kts': 'kotlin',
963
+ '.scala': 'scala', '.sc': 'scala',
964
+ '.swift': 'swift',
965
+ '.dart': 'dart',
966
+ '.c': 'c', '.h': 'c',
967
+ '.cpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp', '.hpp': 'cpp', '.hxx': 'cpp', '.hh': 'cpp',
968
+ '.cs': 'csharp',
969
+ '.rs': 'rust',
970
+ '.zig': 'rust',
971
+ '.php': 'php',
972
+ '.rb': 'ruby',
973
+ '.go': 'go',
974
+ '.hs': 'haskell',
975
+ '.ex': 'elixir', '.exs': 'elixir',
976
+ '.clj': 'clojure', '.cljs': 'clojure', '.cljc': 'clojure',
977
+ '.fs': 'csharp', '.fsx': 'csharp', '.fsi': 'csharp',
978
+ '.ml': 'ocaml', '.mli': 'ocaml',
979
+ '.pl': 'perl', '.pm': 'perl',
980
+ '.r': 'r', '.R': 'r',
981
+ '.jl': 'julia',
982
+ '.lua': 'lua',
983
+ '.sql': 'sql',
984
+ '.tf': 'terraform',
985
+ '.sh': 'shell', '.bash': 'shell', '.zsh': 'shell',
986
+ };
987
+ return langMap[ext] ?? 'unknown';
681
988
  }
682
989
 
683
990
  function extractReturnType(ext: string, defNode: any, nodeText: string): string {