@getmikk/core 2.0.10 → 2.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,11 @@
1
1
  import * as path from 'node:path'
2
+ import { TreeSitterResolver } from './resolver.js'
2
3
  import { createRequire } from 'node:module'
3
4
  import { hashContent } from '../../hash/file-hasher.js'
4
5
  import { BaseParser } from '../base-parser.js'
5
- import type { ParsedFile, ParsedFunction, ParsedClass, ParsedParam, ParsedImport } from '../types.js'
6
+ import type { ParsedFile, ParsedFunction, ParsedClass, ParsedParam, ParsedImport, ParsedGeneric } from '../types.js'
6
7
  import * as Queries from './queries.js'
7
8
 
8
- // Safely require web-tree-sitter via CJS.
9
- // Wrapped in try/catch so that importing this module never throws when the
10
- // package is absent — callers receive an empty ParsedFile instead.
11
9
  const getRequire = () => {
12
10
  if (typeof require !== 'undefined') return require
13
11
  return createRequire(import.meta.url)
@@ -15,62 +13,68 @@ const getRequire = () => {
15
13
  const _require = getRequire()
16
14
 
17
15
  let Parser: any = null
16
+ let Language: any = null
17
+ let initialized = false
18
+ let initPromise: Promise<void> | null = null
19
+
18
20
  try {
19
21
  const ParserModule = _require('web-tree-sitter')
20
- Parser = ParserModule.Parser ?? ParserModule
21
- } catch { /* web-tree-sitter not installed — Parser stays null */ }
22
-
23
- // ---------------------------------------------------------------------------
24
- // Language-specific export visibility rules
25
- // ---------------------------------------------------------------------------
26
-
27
- /**
28
- * Determine whether a function node is exported based on language conventions.
29
- * Python: public if name does not start with underscore.
30
- * Java/C#/Rust: requires an explicit visibility keyword in the node text.
31
- * Go: exported if name starts with an uppercase letter.
32
- * All others (C, C++, PHP, Ruby): default to false (no reliable static rule).
33
- */
22
+ Parser = ParserModule
23
+ if (ParserModule.init) {
24
+ initPromise = ParserModule.init().then(() => {
25
+ Language = ParserModule.Language
26
+ initialized = true
27
+ }).catch(() => { /* ignore */ })
28
+ } else if (ParserModule.default?.Language) {
29
+ Language = ParserModule.default.Language
30
+ }
31
+ } catch { /* web-tree-sitter not installed */ }
32
+
34
33
  function isExportedByLanguage(ext: string, name: string, nodeText: string): boolean {
35
34
  switch (ext) {
36
35
  case '.py':
37
36
  return !name.startsWith('_')
38
37
  case '.java':
39
- case '.cs':
40
38
  return /\bpublic\b/.test(nodeText)
39
+ case '.cs':
40
+ return /\bpublic\b/.test(nodeText) && !/\binternal\b/.test(nodeText)
41
41
  case '.go':
42
42
  return name.length > 0 && name[0] === name[0].toUpperCase() && name[0] !== name[0].toLowerCase()
43
43
  case '.rs':
44
- return /\bpub\b/.test(nodeText)
44
+ return /\bpub\b/.test(nodeText) || /\bpub\s*\(crate\)/.test(nodeText)
45
+ case '.php':
46
+ return !/\bprivate\b/.test(nodeText) && !/\bprotected\b/.test(nodeText)
47
+ case '.rb':
48
+ if (name.startsWith('private_') || name.startsWith('protected_')) return false
49
+ if (/\bprivate\b/.test(nodeText.split('\n')[0] || '')) return false
50
+ if (/\bprotected\b/.test(nodeText.split('\n')[0] || '')) return false
51
+ return true
52
+ case '.c':
53
+ case '.h':
54
+ return true
55
+ case '.cpp':
56
+ case '.cc':
57
+ case '.hpp':
58
+ case '.hh':
59
+ if (/\bprivate\b/.test(nodeText) || /\bprotected\b/.test(nodeText)) return false
60
+ return true
45
61
  default:
46
62
  return false
47
63
  }
48
64
  }
49
65
 
50
- // ---------------------------------------------------------------------------
51
- // Parameter extraction from tree-sitter nodes
52
- // ---------------------------------------------------------------------------
53
-
54
- /**
55
- * Best-effort parameter extraction from a function definition node.
56
- * Walks child nodes looking for parameter/formal_parameter identifiers.
57
- * Returns an empty array on failure — never throws.
58
- */
59
66
  function extractParamsFromNode(defNode: any): ParsedParam[] {
60
67
  const params: ParsedParam[] = []
61
68
  if (!defNode || !defNode.children) return params
62
69
 
63
- // Walk all descendants looking for parameter-like nodes
64
70
  const walk = (node: any) => {
65
71
  if (!node) return
66
72
  const t = node.type ?? ''
67
- // Common parameter node type names across tree-sitter grammars
68
73
  if (
69
74
  t === 'parameter' || t === 'formal_parameter' || t === 'simple_parameter' ||
70
75
  t === 'variadic_parameter' || t === 'typed_parameter' || t === 'typed_default_parameter' ||
71
76
  t === 'keyword_argument' || t === 'field_declaration'
72
77
  ) {
73
- // Try to find the identifier within this param node
74
78
  const identNode = findFirstChild(node, n => n.type === 'identifier' || n.type === 'name')
75
79
  const typeNode = findFirstChild(node, n =>
76
80
  n.type === 'type' || n.type === 'type_annotation' ||
@@ -79,9 +83,9 @@ function extractParamsFromNode(defNode: any): ParsedParam[] {
79
83
  const name = identNode?.text ?? node.text ?? ''
80
84
  const type = typeNode?.text ?? 'any'
81
85
  if (name && name !== '' && !params.some(p => p.name === name)) {
82
- params.push({ name, type, optional: false })
86
+ params.push({ name, type, optional: /\?/.test(type) })
83
87
  }
84
- return // Don't recurse into parameter children
88
+ return
85
89
  }
86
90
  if (node.children) {
87
91
  for (const child of node.children) walk(child)
@@ -100,25 +104,54 @@ function findFirstChild(node: any, predicate: (n: any) => boolean): any {
100
104
  return null
101
105
  }
102
106
 
103
- // ---------------------------------------------------------------------------
104
- // Scope-aware call resolver
105
- // ---------------------------------------------------------------------------
106
-
107
- /**
108
- * Given the ordered list of functions (with startLine/endLine already set)
109
- * and a map of callName → line, assign each call to the innermost function
110
- * whose line range contains that call's line.
111
- *
112
- * Returns an array of call names that were NOT assigned to any function scope
113
- * (these are module-scope calls).
114
- */
107
+ function findAllChildren(node: any, predicate: (n: any) => boolean): any[] {
108
+ const results: any[] = []
109
+ if (!node?.children) return results
110
+ for (const child of node.children) {
111
+ if (predicate(child)) results.push(child)
112
+ results.push(...findAllChildren(child, predicate))
113
+ }
114
+ return results
115
+ }
116
+
117
+ function extractGenericsFromNode(defNode: any, filePath: string): ParsedGeneric[] {
118
+ const generics: ParsedGeneric[] = []
119
+ if (!defNode) return generics
120
+
121
+ const typeParamNodes = findAllChildren(defNode, n =>
122
+ n.type === 'type_parameter' || n.type === 'type_parameters'
123
+ )
124
+
125
+ for (const tpNode of typeParamNodes) {
126
+ if (tpNode.type === 'type_parameters') {
127
+ const params = findAllChildren(tpNode, n => n.type === 'type_parameter')
128
+ for (const param of params) {
129
+ const paramName = findFirstChild(param, n => n.type === 'type_identifier' || n.type === 'identifier')
130
+ if (paramName) {
131
+ generics.push({
132
+ id: `generic:${filePath}:${paramName.text}`,
133
+ name: paramName.text,
134
+ type: 'type',
135
+ file: filePath,
136
+ startLine: param.startPosition?.row + 1 || 0,
137
+ endLine: param.endPosition?.row + 1 || 0,
138
+ isExported: false,
139
+ hash: '',
140
+ })
141
+ }
142
+ }
143
+ }
144
+ }
145
+
146
+ return generics
147
+ }
148
+
115
149
  function assignCallsToFunctions(
116
150
  functions: ParsedFunction[],
117
151
  callEntries: Array<{ name: string; line: number }>
118
152
  ): Array<{ name: string; line: number }> {
119
153
  const unassigned: Array<{ name: string; line: number }> = []
120
154
  for (const { name, line } of callEntries) {
121
- // Find the innermost (smallest range) function that contains this line
122
155
  let best: ParsedFunction | null = null
123
156
  let bestRange = Infinity
124
157
  for (const fn of functions) {
@@ -141,14 +174,11 @@ function assignCallsToFunctions(
141
174
  return unassigned
142
175
  }
143
176
 
144
- // ---------------------------------------------------------------------------
145
- // Main parser class
146
- // ---------------------------------------------------------------------------
147
-
148
177
  export class TreeSitterParser extends BaseParser {
149
178
  private parser: any = null
150
179
  private languages = new Map<string, any>()
151
180
  private nameCounter = new Map<string, number>()
181
+ private wasmLoadError = false
152
182
 
153
183
  getSupportedExtensions(): string[] {
154
184
  return ['.py', '.java', '.c', '.cpp', '.cc', '.h', '.hpp', '.cs', '.go', '.rs', '.php', '.rb']
@@ -156,8 +186,9 @@ export class TreeSitterParser extends BaseParser {
156
186
 
157
187
  private async init() {
158
188
  if (!this.parser) {
159
- if (!Parser) return // web-tree-sitter not available
160
- await Parser.init()
189
+ if (!Parser || !initPromise) return
190
+ await initPromise.catch(() => {})
191
+ if (!Language) return
161
192
  this.parser = new Parser()
162
193
  }
163
194
  }
@@ -168,7 +199,6 @@ export class TreeSitterParser extends BaseParser {
168
199
  const ext = path.extname(filePath).toLowerCase()
169
200
 
170
201
  if (!this.parser) {
171
- // web-tree-sitter unavailable — return structurally valid empty file
172
202
  return this.buildEmptyFile(filePath, content, ext)
173
203
  }
174
204
 
@@ -178,18 +208,32 @@ export class TreeSitterParser extends BaseParser {
178
208
  return this.buildEmptyFile(filePath, content, ext)
179
209
  }
180
210
 
211
+ try {
212
+ return this.parseWithConfig(filePath, content, ext, config)
213
+ } catch (err) {
214
+ console.warn(`Parse error for ${filePath}:`, err)
215
+ return this.buildEmptyFile(filePath, content, ext)
216
+ }
217
+ }
218
+
219
+ private async parseWithConfig(filePath: string, content: string, ext: string, config: any): Promise<ParsedFile> {
181
220
  this.parser!.setLanguage(config.lang)
182
221
  const tree = this.parser!.parse(content)
183
222
  const query = config.lang.query(config.query)
223
+
224
+ if (!query) {
225
+ return this.buildEmptyFile(filePath, content, ext)
226
+ }
227
+
184
228
  const matches = query.matches(tree.rootNode)
185
229
 
186
230
  const functions: ParsedFunction[] = []
187
231
  const classesMap = new Map<string, ParsedClass>()
188
232
  const imports: ParsedImport[] = []
189
- // callEntries stores name + line so we can scope them to the right function
233
+ const generics: ParsedGeneric[] = []
190
234
  const callEntries: Array<{ name: string; line: number }> = []
191
- // Track processed function IDs to avoid collisions from overloads
192
235
  const seenFnIds = new Set<string>()
236
+ const routes: Array<{ method: string; path: string; handler: string; line: number }> = []
193
237
 
194
238
  for (const match of matches) {
195
239
  const captures: Record<string, any> = {}
@@ -197,11 +241,53 @@ export class TreeSitterParser extends BaseParser {
197
241
  captures[c.name] = c.node
198
242
  }
199
243
 
200
- // --- Calls: record name and line position ---
244
+ // --- Routes ---
245
+ if (captures['route.name'] || captures['call.name']) {
246
+ let routeName = ''
247
+ let routePath = '/'
248
+ let method = 'GET'
249
+ let routeLine = 0
250
+
251
+ if (captures['route.name']) {
252
+ routeName = captures['route.name'].text ?? ''
253
+ routeLine = (captures['route.name'].startPosition?.row ?? 0) + 1
254
+ } else if (captures['call.name']) {
255
+ routeName = captures['call.name'].text ?? ''
256
+ routeLine = (captures['call.name'].startPosition?.row ?? 0) + 1
257
+ }
258
+
259
+ if (routeName && /^(get|post|put|delete|patch|options|head|resource|apiResource|any)$/i.test(routeName)) {
260
+ method = routeName.toUpperCase()
261
+
262
+ if (captures['route.path']) {
263
+ routePath = captures['route.path'].text?.replace(/['"]/g, '') || '/'
264
+ } else {
265
+ const args = findAllChildren(match.node, n => n.type === 'argument_list')
266
+ for (const arg of args) {
267
+ const str = findFirstChild(arg, n => n.type === 'string' || n.type === 'string_content')
268
+ if (str) {
269
+ routePath = str.text?.replace(/['"]/g, '') || '/'
270
+ break
271
+ }
272
+ }
273
+ }
274
+
275
+ if (routePath !== '/' && routePath !== '') {
276
+ routes.push({ method, path: routePath, handler: '', line: routeLine })
277
+ }
278
+ }
279
+ }
280
+
281
+ // --- Base routes for class-level route ---
282
+ if (captures['route.basepath']) {
283
+ // Store base path for class-level routes
284
+ }
285
+
286
+ // --- Calls ---
201
287
  if (captures['call.name']) {
202
288
  const callNode = captures['call.name']
203
- const name = callNode.text
204
- if (name) {
289
+ const name = callNode?.text
290
+ if (name && !/^(get|post|put|delete|patch|options|head|resource)$/i.test(name)) {
205
291
  callEntries.push({
206
292
  name,
207
293
  line: (callNode.startPosition?.row ?? 0) + 1,
@@ -212,7 +298,7 @@ export class TreeSitterParser extends BaseParser {
212
298
 
213
299
  // --- Imports ---
214
300
  if (captures['import.source']) {
215
- const src = captures['import.source'].text.replace(/['"]/g, '')
301
+ const src = captures['import.source'].text?.replace(/['"]/g, '') || ''
216
302
  imports.push({
217
303
  source: src,
218
304
  resolvedPath: '',
@@ -223,6 +309,24 @@ export class TreeSitterParser extends BaseParser {
223
309
  continue
224
310
  }
225
311
 
312
+ // --- Generic types ---
313
+ if (captures['generic.name'] || captures['generic.arg']) {
314
+ const genName = captures['generic.name']?.text || ''
315
+ const genArg = captures['generic.arg']?.text || ''
316
+ if (genArg && !generics.some(g => g.name === genArg)) {
317
+ generics.push({
318
+ id: `generic:${filePath}:${genArg}`,
319
+ name: genArg,
320
+ type: 'type',
321
+ file: filePath,
322
+ startLine: (captures['generic.arg']?.startPosition?.row ?? 0) + 1,
323
+ endLine: (captures['generic.arg']?.endPosition?.row ?? 0) + 1,
324
+ isExported: false,
325
+ hash: '',
326
+ })
327
+ }
328
+ }
329
+
226
330
  // --- Functions / Methods ---
227
331
  if (captures['definition.function'] || captures['definition.method']) {
228
332
  const nameNode = captures['name']
@@ -236,7 +340,6 @@ export class TreeSitterParser extends BaseParser {
236
340
  const count = (this.nameCounter.get(fnName) ?? 0) + 1
237
341
  this.nameCounter.set(fnName, count)
238
342
 
239
- // Unique ID: use stable format with counter for collisions
240
343
  const fnId = count === 1 ? `fn:${filePath}:${fnName}` : `fn:${filePath}:${fnName}#${count}`
241
344
  if (seenFnIds.has(fnId)) {
242
345
  continue
@@ -246,9 +349,7 @@ export class TreeSitterParser extends BaseParser {
246
349
  const exported = isExportedByLanguage(ext, fnName, nodeText)
247
350
  const isAsync = /\basync\b/.test(nodeText)
248
351
 
249
- // Detect return type language-specific heuristics
250
- const returnType = extractReturnType(ext, defNode)
251
-
352
+ const returnType = extractReturnType(ext, defNode, nodeText)
252
353
  const params = extractParamsFromNode(defNode)
253
354
 
254
355
  functions.push({
@@ -261,7 +362,7 @@ export class TreeSitterParser extends BaseParser {
261
362
  returnType,
262
363
  isExported: exported,
263
364
  isAsync,
264
- calls: [], // populated after all functions are collected
365
+ calls: [],
265
366
  hash: hashContent(nodeText),
266
367
  purpose: extractDocComment(content, startLine),
267
368
  edgeCasesHandled: [],
@@ -271,46 +372,49 @@ export class TreeSitterParser extends BaseParser {
271
372
  }
272
373
  }
273
374
 
274
- // --- Classes / Structs / Interfaces ---
275
- if (
276
- captures['definition.class'] ||
277
- captures['definition.struct'] ||
278
- captures['definition.interface']
279
- ) {
280
- const nameNode = captures['name']
281
- const defNode =
282
- captures['definition.class'] ||
283
- captures['definition.struct'] ||
284
- captures['definition.interface']
285
-
286
- if (nameNode && defNode) {
287
- const clsName = nameNode.text
288
- const startLine = defNode.startPosition.row + 1
289
- const endLine = defNode.endPosition.row + 1
290
- const nodeText = defNode.text ?? ''
291
- const clsId = `class:${filePath}:${clsName}` // consistent with ts-extractor
292
-
293
- if (!classesMap.has(clsId)) {
294
- classesMap.set(clsId, {
295
- id: clsId,
296
- name: clsName,
297
- file: filePath,
298
- startLine,
299
- endLine,
300
- methods: [],
301
- properties: [],
302
- isExported: isExportedByLanguage(ext, clsName, nodeText),
303
- hash: hashContent(nodeText),
304
- })
375
+ // --- Classes / Structs / Interfaces / Enums / Unions ---
376
+ const classTypes = [
377
+ 'definition.class', 'definition.struct', 'definition.interface',
378
+ 'definition.enum', 'definition.union', 'definition.trait',
379
+ 'definition.record', 'definition.module', 'definition.namespace'
380
+ ]
381
+
382
+ for (const type of classTypes) {
383
+ if (captures[type]) {
384
+ const nameNode = captures['name']
385
+ const defNode = captures[type]
386
+
387
+ if (nameNode && defNode) {
388
+ const clsName = nameNode.text
389
+ const startLine = defNode.startPosition.row + 1
390
+ const endLine = defNode.endPosition.row + 1
391
+ const nodeText = defNode.text ?? ''
392
+ const clsId = `class:${filePath}:${clsName}`
393
+
394
+ if (!classesMap.has(clsId)) {
395
+ const isEnum = type === 'definition.enum'
396
+ const isStruct = type === 'definition.struct'
397
+ const isUnion = type === 'definition.union'
398
+
399
+ classesMap.set(clsId, {
400
+ id: clsId,
401
+ name: clsName,
402
+ file: filePath,
403
+ startLine,
404
+ endLine,
405
+ methods: [],
406
+ properties: [],
407
+ isExported: isExportedByLanguage(ext, clsName, nodeText),
408
+ hash: hashContent(nodeText),
409
+ })
410
+ }
305
411
  }
306
412
  }
307
413
  }
308
414
  }
309
415
 
310
- // Assign calls to their enclosing function scopes.
311
416
  const unassignedCalls = assignCallsToFunctions(functions, callEntries)
312
417
 
313
- // Only add a synthetic module-level function if there are actually calls made outside any function.
314
418
  if (unassignedCalls.length > 0) {
315
419
  const lineCount = content.split('\n').length
316
420
  functions.push({
@@ -321,7 +425,7 @@ export class TreeSitterParser extends BaseParser {
321
425
  endLine: lineCount || 1,
322
426
  params: [],
323
427
  returnType: 'void',
324
- isExported: false, // Don't export the synthetic module function
428
+ isExported: false,
325
429
  isAsync: false,
326
430
  calls: unassignedCalls.map(c => ({ name: c.name, line: c.line, type: 'function' })),
327
431
  hash: '',
@@ -333,9 +437,6 @@ export class TreeSitterParser extends BaseParser {
333
437
  }
334
438
 
335
439
  const finalLang = extensionToLanguage(ext)
336
-
337
- // Link methods: functions whose names contain '.' belong to a class
338
- // (Go receiver methods, Java/C# member methods detected via method capture)
339
440
  linkMethodsToClasses(functions, classesMap)
340
441
 
341
442
  return {
@@ -343,14 +444,21 @@ export class TreeSitterParser extends BaseParser {
343
444
  language: finalLang,
344
445
  functions,
345
446
  classes: Array.from(classesMap.values()),
346
- generics: [],
447
+ generics,
347
448
  imports,
348
449
  exports: functions.filter(f => f.isExported).map(f => ({
349
450
  name: f.name,
350
451
  type: 'function' as const,
351
452
  file: filePath,
352
453
  })),
353
- routes: [],
454
+ routes: routes.map(r => ({
455
+ method: r.method,
456
+ path: r.path,
457
+ handler: r.handler || '',
458
+ middlewares: [],
459
+ file: filePath,
460
+ line: r.line,
461
+ })),
354
462
  variables: [],
355
463
  calls: [],
356
464
  hash: hashContent(content),
@@ -358,10 +466,22 @@ export class TreeSitterParser extends BaseParser {
358
466
  }
359
467
  }
360
468
 
361
- async resolveImports(files: ParsedFile[], _projectRoot: string): Promise<ParsedFile[]> {
362
- // Tree-sitter resolver: no cross-file resolution implemented.
363
- // Imports are left with resolvedPath = '' which signals unresolved to the graph builder.
364
- // A future pass can resolve Go/Python/Java imports using language-specific rules.
469
+ async resolveImports(files: ParsedFile[], projectRoot: string): Promise<ParsedFile[]> {
470
+ if (files.length === 0) return files
471
+
472
+ const ext = path.extname(files[0].path).toLowerCase()
473
+ const language = extensionToLanguage(ext)
474
+ const resolver = new TreeSitterResolver(projectRoot, language)
475
+
476
+ const allFiles = files.map(f => f.path)
477
+
478
+ for (const file of files) {
479
+ if (file.imports.length > 0) {
480
+ const resolved = resolver.resolveAll(file.imports, file.path, allFiles)
481
+ file.imports = resolved
482
+ }
483
+ }
484
+
365
485
  return files
366
486
  }
367
487
 
@@ -384,13 +504,82 @@ export class TreeSitterParser extends BaseParser {
384
504
 
385
505
  private async loadLang(name: string): Promise<any> {
386
506
  if (this.languages.has(name)) return this.languages.get(name)
507
+ if (this.wasmLoadError) return null
508
+
387
509
  try {
388
- const tcPath = _require.resolve('tree-sitter-wasms/package.json')
389
- const wasmPath = path.join(path.dirname(tcPath), 'out', `tree-sitter-${name}.wasm`)
390
- const lang = await Parser.Language.load(wasmPath)
391
- this.languages.set(name, lang)
392
- return lang
510
+ const nameForFile = name.replace(/-/g, '_')
511
+
512
+ // Try multiple possible WASM locations
513
+ const possiblePaths = [
514
+ path.resolve('node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
515
+ path.resolve('./node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
516
+ path.resolve(process.cwd(), 'node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
517
+ path.resolve(process.cwd(), 'node_modules', 'tree-sitter-wasms', 'out', `tree-sitter-${nameForFile}.wasm`),
518
+ ]
519
+
520
+ let wasmPath = ''
521
+ for (const p of possiblePaths) {
522
+ try {
523
+ const fs = await import('node:fs')
524
+ if (fs.existsSync(p)) {
525
+ wasmPath = p
526
+ break
527
+ }
528
+ } catch { /* skip */ }
529
+ }
530
+
531
+ if (!wasmPath) {
532
+ // Try common variations of the language name
533
+ const variations = [
534
+ nameForFile,
535
+ name.replace(/_/g, '-'),
536
+ name,
537
+ ]
538
+
539
+ for (const variant of variations) {
540
+ for (const base of possiblePaths) {
541
+ const testPath = base.replace(/tree-sitter-[^/]+\.wasm/, `tree-sitter-${variant}.wasm`)
542
+ try {
543
+ const fs = await import('node:fs')
544
+ if (fs.existsSync(testPath)) {
545
+ wasmPath = testPath
546
+ break
547
+ }
548
+ } catch { /* skip */ }
549
+ }
550
+ if (wasmPath) break
551
+ }
552
+ }
553
+
554
+ if (!wasmPath) {
555
+ // WASM not found - but don't mark as permanent error, just skip this language
556
+ console.warn(`Tree-sitter WASM not found for ${name}`)
557
+ return null
558
+ }
559
+
560
+ // Try to load the WASM file with error handling
561
+ let lang: any = null
562
+ try {
563
+ lang = await Language.load(wasmPath)
564
+ } catch (loadErr) {
565
+ console.warn(`Failed to load WASM for ${name} at ${wasmPath}:`, loadErr)
566
+ // Try with dynamic import as fallback
567
+ try {
568
+ const wasmModule = await import(wasmPath)
569
+ if (wasmModule.default) {
570
+ lang = await wasmModule.default()
571
+ }
572
+ } catch { /* skip */ }
573
+ }
574
+
575
+ if (lang) {
576
+ this.languages.set(name, lang)
577
+ return lang
578
+ }
579
+
580
+ return null
393
581
  } catch (err) {
582
+ // Only mark as permanent error after all retries exhausted
394
583
  console.warn(`Failed to load Tree-sitter WASM for ${name}:`, err)
395
584
  return null
396
585
  }
@@ -408,6 +597,7 @@ export class TreeSitterParser extends BaseParser {
408
597
  case '.cpp':
409
598
  case '.cc':
410
599
  case '.hpp':
600
+ case '.hh':
411
601
  return { lang: await this.loadLang('cpp'), query: Queries.CPP_QUERIES }
412
602
  case '.cs':
413
603
  return { lang: await this.loadLang('c-sharp'), query: Queries.CSHARP_QUERIES }
@@ -425,10 +615,6 @@ export class TreeSitterParser extends BaseParser {
425
615
  }
426
616
  }
427
617
 
428
- // ---------------------------------------------------------------------------
429
- // Helpers
430
- // ---------------------------------------------------------------------------
431
-
432
618
  function extensionToLanguage(ext: string): ParsedFile['language'] {
433
619
  switch (ext) {
434
620
  case '.py': return 'python'
@@ -444,45 +630,88 @@ function extensionToLanguage(ext: string): ParsedFile['language'] {
444
630
  }
445
631
  }
446
632
 
447
- /**
448
- * Extract a simple return type hint from the function node text.
449
- * Falls back to 'unknown' rather than 'any' to distinguish "not parsed"
450
- * from "genuinely untyped".
451
- */
452
- function extractReturnType(ext: string, defNode: any): string {
453
- const text: string = defNode?.text ?? ''
454
- // TypeScript/Go/Rust: look for "-> Type" or ": Type" after parameters
455
- const arrowMatch = text.match(/\)\s*->\s*([^\s{]+)/)
456
- if (arrowMatch) return arrowMatch[1].trim()
457
- // Java/C# style: "public int foo(" — type precedes the name
458
- // This is too fragile to do reliably here; return 'unknown'
633
+ function extractReturnType(ext: string, defNode: any, nodeText: string): string {
634
+ if (!defNode && !nodeText) return 'unknown'
635
+ const text = nodeText || defNode?.text || ''
636
+
637
+ // Try to find return type from AST node directly first
638
+ if (defNode?.children) {
639
+ const returnTypeNode = findFirstChild(defNode, n =>
640
+ n.type === 'type' ||
641
+ n.type === 'type_annotation' ||
642
+ n.type === 'return_type' ||
643
+ n.type === 'result_type'
644
+ )
645
+ if (returnTypeNode?.text) {
646
+ return returnTypeNode.text.trim()
647
+ }
648
+ }
649
+
650
+ // Arrow return type (Rust, TS, Go)
651
+ const arrowMatch = text.match(/\)\s*(->|=>)\s*([^\s{;]+)/)
652
+ if (arrowMatch && arrowMatch[3]) {
653
+ const ret = arrowMatch[3].trim()
654
+ if (ret && ret !== 'void' && ret !== 'null') return ret
655
+ }
656
+
657
+ // Go: "func foo() (int, error)" or "func foo() error"
459
658
  if (ext === '.go') {
460
- // Go: "func foo() (int, error)" or "func foo() error"
461
659
  const goReturnTuple = text.match(/\)\s+(\([^)]+\))/)
462
- if (goReturnTuple) return goReturnTuple[1].trim()
660
+ if (goReturnTuple && goReturnTuple[1]) return goReturnTuple[1].trim()
463
661
  const goReturn = text.match(/\)\s+([^\s{(]+)/)
464
- if (goReturn) return goReturn[1].trim()
662
+ if (goReturn && goReturn[1]) return goReturn[1].trim()
465
663
  }
664
+
665
+ // Java/C#/TypeScript: "public int foo(" - type before name
666
+ const javaMatch = text.match(/(?:public|private|protected|internal)?\s*(?:static\s*)?(?:async\s*)?([\w<>[\],\s]+?)\s+\w+\s*\(/)
667
+ if (javaMatch && javaMatch[1]) {
668
+ const ret = javaMatch[1].trim()
669
+ if (ret && ret !== 'void' && ret !== 'public' && ret !== 'private' && ret !== 'protected') {
670
+ return ret
671
+ }
672
+ }
673
+
674
+ // Python type annotations
675
+ const pyMatch = text.match(/def\s+\w+.*?\)\s*->\s*([^\s:]+)/)
676
+ if (pyMatch && pyMatch[1]) return pyMatch[1].trim()
677
+
678
+ // Python: try to find return type from type comment
679
+ const pyTypeComment = text.match(/#\s*type:\s*([^\n]+)/)
680
+ if (pyTypeComment && pyTypeComment[1]) return pyTypeComment[1].trim()
681
+
682
+ // PHP return type
683
+ const phpMatch = text.match(/function\s+\w+.*?\)\s*:\s*(\??[\w\\]+)/)
684
+ if (phpMatch && phpMatch[1]) return phpMatch[1].trim()
685
+
686
+ // Ruby return type
687
+ const rubyMatch = text.match(/def\s+\w+.*?\s+(->\s*[\w?]+)?/)
688
+ if (rubyMatch && rubyMatch[1]) return rubyMatch[1].replace('->', '').trim()
689
+
690
+ // C/C++ return type
691
+ const cMatch = text.match(/^[\w*&\s]+\s+(\w+)\s*\(/m)
692
+ if (cMatch && cMatch[1] && cMatch[1] !== 'if' && cMatch[1] !== 'while') {
693
+ return cMatch[1]
694
+ }
695
+
696
+ // Rust: try to find return type from node directly
697
+ if (ext === '.rs') {
698
+ const rustMatch = text.match(/fn\s+\w+.*?\s*->\s*([^\s{]+)/)
699
+ if (rustMatch && rustMatch[1]) return rustMatch[1].trim()
700
+ }
701
+
466
702
  return 'unknown'
467
703
  }
468
704
 
469
- /**
470
- * Extract a single-line doc comment immediately preceding the given line.
471
- * Scans backwards from startLine looking for `#`, `//`, `/**`, or `"""` comments.
472
- */
473
705
  function extractDocComment(content: string, startLine: number): string {
474
706
  const lines = content.split('\n')
475
- const targetIdx = startLine - 2 // 0-indexed line before the function
707
+ const targetIdx = startLine - 2
476
708
  if (targetIdx < 0) return ''
477
709
 
478
710
  const prev = lines[targetIdx]?.trim() ?? ''
479
- // Single-line comment styles
480
711
  for (const prefix of ['# ', '// ', '/// ']) {
481
712
  if (prev.startsWith(prefix)) return prev.slice(prefix.length).trim()
482
713
  }
483
- // JSDoc / block comment end
484
714
  if (prev === '*/') {
485
- // Walk back to find the first meaningful JSDoc line
486
715
  for (let i = targetIdx - 1; i >= 0; i--) {
487
716
  const line = lines[i].trim()
488
717
  const cleaned = line.replace(/^\*+\s?/, '')
@@ -492,12 +721,6 @@ function extractDocComment(content: string, startLine: number): string {
492
721
  return ''
493
722
  }
494
723
 
495
- /**
496
- * Move functions that are class methods (identified by having a receiver or
497
- * by being within the line range of a class) into the class's methods array.
498
- * This is a best-effort heuristic; direct tree-sitter capture of method
499
- * declarations already places them correctly in most languages.
500
- */
501
724
  function linkMethodsToClasses(
502
725
  functions: ParsedFunction[],
503
726
  classesMap: Map<string, ParsedClass>
@@ -506,18 +729,14 @@ function linkMethodsToClasses(
506
729
  if (classes.length === 0) return
507
730
 
508
731
  for (const fn of functions) {
509
- // Already categorised if name contains "." (e.g. "MyClass.method")
510
- // and never link the synthetic <module> function to a class.
511
732
  if (fn.name === '<module>' || fn.name.includes('.')) continue
512
733
 
513
- // Skip functions nested inside other functions (local helpers)
514
734
  const isNestedInFunction = functions.some(f =>
515
735
  f.id !== fn.id &&
516
736
  fn.startLine >= f.startLine && fn.endLine <= f.endLine
517
737
  )
518
738
  if (isNestedInFunction) continue
519
739
 
520
- // Find the innermost (smallest range) class that contains this function
521
740
  let bestCls: ParsedClass | null = null
522
741
  let bestRange = Infinity
523
742
  for (const cls of classes) {