@getmikk/core 2.0.0 → 2.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,70 +1,80 @@
1
1
  import * as path from 'node:path'
2
+ import { TreeSitterResolver } from './resolver.js'
2
3
  import { createRequire } from 'node:module'
3
4
  import { hashContent } from '../../hash/file-hasher.js'
4
5
  import { BaseParser } from '../base-parser.js'
5
- import type { ParsedFile, ParsedFunction, ParsedClass, ParsedParam, ParsedImport } from '../types.js'
6
+ import type { ParsedFile, ParsedFunction, ParsedClass, ParsedParam, ParsedImport, ParsedGeneric } from '../types.js'
6
7
  import * as Queries from './queries.js'
7
8
 
8
- // Safely require web-tree-sitter via CJS
9
9
  const getRequire = () => {
10
10
  if (typeof require !== 'undefined') return require
11
11
  return createRequire(import.meta.url)
12
12
  }
13
13
  const _require = getRequire()
14
- const ParserModule = _require('web-tree-sitter')
15
- const Parser = ParserModule.Parser || ParserModule
16
-
17
- // ---------------------------------------------------------------------------
18
- // Language-specific export visibility rules
19
- // ---------------------------------------------------------------------------
20
-
21
- /**
22
- * Determine whether a function node is exported based on language conventions.
23
- * Python: public if name does not start with underscore.
24
- * Java/C#/Rust: requires an explicit visibility keyword in the node text.
25
- * Go: exported if name starts with an uppercase letter.
26
- * All others (C, C++, PHP, Ruby): default to false (no reliable static rule).
27
- */
14
+
15
+ let Parser: any = null
16
+ let Language: any = null
17
+ let initialized = false
18
+ let initPromise: Promise<void> | null = null
19
+
20
+ try {
21
+ const ParserModule = _require('web-tree-sitter')
22
+ Parser = ParserModule
23
+ if (ParserModule.init) {
24
+ initPromise = ParserModule.init().then(() => {
25
+ Language = ParserModule.Language
26
+ initialized = true
27
+ }).catch(() => { /* ignore */ })
28
+ } else if (ParserModule.default?.Language) {
29
+ Language = ParserModule.default.Language
30
+ }
31
+ } catch { /* web-tree-sitter not installed */ }
32
+
28
33
  function isExportedByLanguage(ext: string, name: string, nodeText: string): boolean {
29
34
  switch (ext) {
30
35
  case '.py':
31
36
  return !name.startsWith('_')
32
37
  case '.java':
33
- case '.cs':
34
38
  return /\bpublic\b/.test(nodeText)
39
+ case '.cs':
40
+ return /\bpublic\b/.test(nodeText) && !/\binternal\b/.test(nodeText)
35
41
  case '.go':
36
42
  return name.length > 0 && name[0] === name[0].toUpperCase() && name[0] !== name[0].toLowerCase()
37
43
  case '.rs':
38
- return /\bpub\b/.test(nodeText)
44
+ return /\bpub\b/.test(nodeText) || /\bpub\s*\(crate\)/.test(nodeText)
45
+ case '.php':
46
+ return !/\bprivate\b/.test(nodeText) && !/\bprotected\b/.test(nodeText)
47
+ case '.rb':
48
+ if (name.startsWith('private_') || name.startsWith('protected_')) return false
49
+ if (/\bprivate\b/.test(nodeText.split('\n')[0] || '')) return false
50
+ if (/\bprotected\b/.test(nodeText.split('\n')[0] || '')) return false
51
+ return true
52
+ case '.c':
53
+ case '.h':
54
+ return true
55
+ case '.cpp':
56
+ case '.cc':
57
+ case '.hpp':
58
+ case '.hh':
59
+ if (/\bprivate\b/.test(nodeText) || /\bprotected\b/.test(nodeText)) return false
60
+ return true
39
61
  default:
40
62
  return false
41
63
  }
42
64
  }
43
65
 
44
- // ---------------------------------------------------------------------------
45
- // Parameter extraction from tree-sitter nodes
46
- // ---------------------------------------------------------------------------
47
-
48
- /**
49
- * Best-effort parameter extraction from a function definition node.
50
- * Walks child nodes looking for parameter/formal_parameter identifiers.
51
- * Returns an empty array on failure — never throws.
52
- */
53
66
  function extractParamsFromNode(defNode: any): ParsedParam[] {
54
67
  const params: ParsedParam[] = []
55
68
  if (!defNode || !defNode.children) return params
56
69
 
57
- // Walk all descendants looking for parameter-like nodes
58
70
  const walk = (node: any) => {
59
71
  if (!node) return
60
72
  const t = node.type ?? ''
61
- // Common parameter node type names across tree-sitter grammars
62
73
  if (
63
74
  t === 'parameter' || t === 'formal_parameter' || t === 'simple_parameter' ||
64
75
  t === 'variadic_parameter' || t === 'typed_parameter' || t === 'typed_default_parameter' ||
65
76
  t === 'keyword_argument' || t === 'field_declaration'
66
77
  ) {
67
- // Try to find the identifier within this param node
68
78
  const identNode = findFirstChild(node, n => n.type === 'identifier' || n.type === 'name')
69
79
  const typeNode = findFirstChild(node, n =>
70
80
  n.type === 'type' || n.type === 'type_annotation' ||
@@ -73,9 +83,9 @@ function extractParamsFromNode(defNode: any): ParsedParam[] {
73
83
  const name = identNode?.text ?? node.text ?? ''
74
84
  const type = typeNode?.text ?? 'any'
75
85
  if (name && name !== '' && !params.some(p => p.name === name)) {
76
- params.push({ name, type, optional: false })
86
+ params.push({ name, type, optional: /\?/.test(type) })
77
87
  }
78
- return // Don't recurse into parameter children
88
+ return
79
89
  }
80
90
  if (node.children) {
81
91
  for (const child of node.children) walk(child)
@@ -94,25 +104,54 @@ function findFirstChild(node: any, predicate: (n: any) => boolean): any {
94
104
  return null
95
105
  }
96
106
 
97
- // ---------------------------------------------------------------------------
98
- // Scope-aware call resolver
99
- // ---------------------------------------------------------------------------
100
-
101
- /**
102
- * Given the ordered list of functions (with startLine/endLine already set)
103
- * and a map of callName → line, assign each call to the innermost function
104
- * whose line range contains that call's line.
105
- *
106
- * Returns an array of call names that were NOT assigned to any function scope
107
- * (these are module-scope calls).
108
- */
107
+ function findAllChildren(node: any, predicate: (n: any) => boolean): any[] {
108
+ const results: any[] = []
109
+ if (!node?.children) return results
110
+ for (const child of node.children) {
111
+ if (predicate(child)) results.push(child)
112
+ results.push(...findAllChildren(child, predicate))
113
+ }
114
+ return results
115
+ }
116
+
117
+ function extractGenericsFromNode(defNode: any, filePath: string): ParsedGeneric[] {
118
+ const generics: ParsedGeneric[] = []
119
+ if (!defNode) return generics
120
+
121
+ const typeParamNodes = findAllChildren(defNode, n =>
122
+ n.type === 'type_parameter' || n.type === 'type_parameters'
123
+ )
124
+
125
+ for (const tpNode of typeParamNodes) {
126
+ if (tpNode.type === 'type_parameters') {
127
+ const params = findAllChildren(tpNode, n => n.type === 'type_parameter')
128
+ for (const param of params) {
129
+ const paramName = findFirstChild(param, n => n.type === 'type_identifier' || n.type === 'identifier')
130
+ if (paramName) {
131
+ generics.push({
132
+ id: `generic:${filePath}:${paramName.text}`,
133
+ name: paramName.text,
134
+ type: 'type',
135
+ file: filePath,
136
+ startLine: param.startPosition?.row + 1 || 0,
137
+ endLine: param.endPosition?.row + 1 || 0,
138
+ isExported: false,
139
+ hash: '',
140
+ })
141
+ }
142
+ }
143
+ }
144
+ }
145
+
146
+ return generics
147
+ }
148
+
109
149
  function assignCallsToFunctions(
110
150
  functions: ParsedFunction[],
111
151
  callEntries: Array<{ name: string; line: number }>
112
152
  ): Array<{ name: string; line: number }> {
113
153
  const unassigned: Array<{ name: string; line: number }> = []
114
154
  for (const { name, line } of callEntries) {
115
- // Find the innermost (smallest range) function that contains this line
116
155
  let best: ParsedFunction | null = null
117
156
  let bestRange = Infinity
118
157
  for (const fn of functions) {
@@ -135,14 +174,11 @@ function assignCallsToFunctions(
135
174
  return unassigned
136
175
  }
137
176
 
138
- // ---------------------------------------------------------------------------
139
- // Main parser class
140
- // ---------------------------------------------------------------------------
141
-
142
177
  export class TreeSitterParser extends BaseParser {
143
178
  private parser: any = null
144
179
  private languages = new Map<string, any>()
145
180
  private nameCounter = new Map<string, number>()
181
+ private wasmLoadError = false
146
182
 
147
183
  getSupportedExtensions(): string[] {
148
184
  return ['.py', '.java', '.c', '.cpp', '.cc', '.h', '.hpp', '.cs', '.go', '.rs', '.php', '.rb']
@@ -150,7 +186,9 @@ export class TreeSitterParser extends BaseParser {
150
186
 
151
187
  private async init() {
152
188
  if (!this.parser) {
153
- await Parser.init()
189
+ if (!Parser || !initPromise) return
190
+ await initPromise.catch(() => {})
191
+ if (!Language) return
154
192
  this.parser = new Parser()
155
193
  }
156
194
  }
@@ -159,24 +197,43 @@ export class TreeSitterParser extends BaseParser {
159
197
  this.nameCounter.clear()
160
198
  await this.init()
161
199
  const ext = path.extname(filePath).toLowerCase()
200
+
201
+ if (!this.parser) {
202
+ return this.buildEmptyFile(filePath, content, ext)
203
+ }
204
+
162
205
  const config = await this.getLanguageConfig(ext)
163
206
 
164
207
  if (!config || !config.lang) {
165
208
  return this.buildEmptyFile(filePath, content, ext)
166
209
  }
167
210
 
211
+ try {
212
+ return this.parseWithConfig(filePath, content, ext, config)
213
+ } catch (err) {
214
+ console.warn(`Parse error for ${filePath}:`, err)
215
+ return this.buildEmptyFile(filePath, content, ext)
216
+ }
217
+ }
218
+
219
+ private async parseWithConfig(filePath: string, content: string, ext: string, config: any): Promise<ParsedFile> {
168
220
  this.parser!.setLanguage(config.lang)
169
221
  const tree = this.parser!.parse(content)
170
222
  const query = config.lang.query(config.query)
223
+
224
+ if (!query) {
225
+ return this.buildEmptyFile(filePath, content, ext)
226
+ }
227
+
171
228
  const matches = query.matches(tree.rootNode)
172
229
 
173
230
  const functions: ParsedFunction[] = []
174
231
  const classesMap = new Map<string, ParsedClass>()
175
232
  const imports: ParsedImport[] = []
176
- // callEntries stores name + line so we can scope them to the right function
233
+ const generics: ParsedGeneric[] = []
177
234
  const callEntries: Array<{ name: string; line: number }> = []
178
- // Track processed function IDs to avoid collisions from overloads
179
235
  const seenFnIds = new Set<string>()
236
+ const routes: Array<{ method: string; path: string; handler: string; line: number }> = []
180
237
 
181
238
  for (const match of matches) {
182
239
  const captures: Record<string, any> = {}
@@ -184,11 +241,53 @@ export class TreeSitterParser extends BaseParser {
184
241
  captures[c.name] = c.node
185
242
  }
186
243
 
187
- // --- Calls: record name and line position ---
244
+ // --- Routes ---
245
+ if (captures['route.name'] || captures['call.name']) {
246
+ let routeName = ''
247
+ let routePath = '/'
248
+ let method = 'GET'
249
+ let routeLine = 0
250
+
251
+ if (captures['route.name']) {
252
+ routeName = captures['route.name'].text ?? ''
253
+ routeLine = (captures['route.name'].startPosition?.row ?? 0) + 1
254
+ } else if (captures['call.name']) {
255
+ routeName = captures['call.name'].text ?? ''
256
+ routeLine = (captures['call.name'].startPosition?.row ?? 0) + 1
257
+ }
258
+
259
+ if (routeName && /^(get|post|put|delete|patch|options|head|resource|apiResource|any)$/i.test(routeName)) {
260
+ method = routeName.toUpperCase()
261
+
262
+ if (captures['route.path']) {
263
+ routePath = captures['route.path'].text?.replace(/['"]/g, '') || '/'
264
+ } else {
265
+ const args = findAllChildren(match.node, n => n.type === 'argument_list')
266
+ for (const arg of args) {
267
+ const str = findFirstChild(arg, n => n.type === 'string' || n.type === 'string_content')
268
+ if (str) {
269
+ routePath = str.text?.replace(/['"]/g, '') || '/'
270
+ break
271
+ }
272
+ }
273
+ }
274
+
275
+ if (routePath !== '/' && routePath !== '') {
276
+ routes.push({ method, path: routePath, handler: '', line: routeLine })
277
+ }
278
+ }
279
+ }
280
+
281
+ // --- Base routes for class-level route ---
282
+ if (captures['route.basepath']) {
283
+ // Store base path for class-level routes
284
+ }
285
+
286
+ // --- Calls ---
188
287
  if (captures['call.name']) {
189
288
  const callNode = captures['call.name']
190
- const name = callNode.text
191
- if (name) {
289
+ const name = callNode?.text
290
+ if (name && !/^(get|post|put|delete|patch|options|head|resource)$/i.test(name)) {
192
291
  callEntries.push({
193
292
  name,
194
293
  line: (callNode.startPosition?.row ?? 0) + 1,
@@ -199,7 +298,7 @@ export class TreeSitterParser extends BaseParser {
199
298
 
200
299
  // --- Imports ---
201
300
  if (captures['import.source']) {
202
- const src = captures['import.source'].text.replace(/['"]/g, '')
301
+ const src = captures['import.source'].text?.replace(/['"]/g, '') || ''
203
302
  imports.push({
204
303
  source: src,
205
304
  resolvedPath: '',
@@ -210,6 +309,24 @@ export class TreeSitterParser extends BaseParser {
210
309
  continue
211
310
  }
212
311
 
312
+ // --- Generic types ---
313
+ if (captures['generic.name'] || captures['generic.arg']) {
314
+ const genName = captures['generic.name']?.text || ''
315
+ const genArg = captures['generic.arg']?.text || ''
316
+ if (genArg && !generics.some(g => g.name === genArg)) {
317
+ generics.push({
318
+ id: `generic:${filePath}:${genArg}`,
319
+ name: genArg,
320
+ type: 'type',
321
+ file: filePath,
322
+ startLine: (captures['generic.arg']?.startPosition?.row ?? 0) + 1,
323
+ endLine: (captures['generic.arg']?.endPosition?.row ?? 0) + 1,
324
+ isExported: false,
325
+ hash: '',
326
+ })
327
+ }
328
+ }
329
+
213
330
  // --- Functions / Methods ---
214
331
  if (captures['definition.function'] || captures['definition.method']) {
215
332
  const nameNode = captures['name']
@@ -223,8 +340,7 @@ export class TreeSitterParser extends BaseParser {
223
340
  const count = (this.nameCounter.get(fnName) ?? 0) + 1
224
341
  this.nameCounter.set(fnName, count)
225
342
 
226
- // Unique ID: use stable format with counter for collisions
227
- let fnId = count === 1 ? `fn:${filePath}:${fnName}` : `fn:${filePath}:${fnName}#${count}`
343
+ const fnId = count === 1 ? `fn:${filePath}:${fnName}` : `fn:${filePath}:${fnName}#${count}`
228
344
  if (seenFnIds.has(fnId)) {
229
345
  continue
230
346
  }
@@ -233,9 +349,7 @@ export class TreeSitterParser extends BaseParser {
233
349
  const exported = isExportedByLanguage(ext, fnName, nodeText)
234
350
  const isAsync = /\basync\b/.test(nodeText)
235
351
 
236
- // Detect return type language-specific heuristics
237
- const returnType = extractReturnType(ext, defNode)
238
-
352
+ const returnType = extractReturnType(ext, defNode, nodeText)
239
353
  const params = extractParamsFromNode(defNode)
240
354
 
241
355
  functions.push({
@@ -248,7 +362,7 @@ export class TreeSitterParser extends BaseParser {
248
362
  returnType,
249
363
  isExported: exported,
250
364
  isAsync,
251
- calls: [], // populated after all functions are collected
365
+ calls: [],
252
366
  hash: hashContent(nodeText),
253
367
  purpose: extractDocComment(content, startLine),
254
368
  edgeCasesHandled: [],
@@ -258,46 +372,49 @@ export class TreeSitterParser extends BaseParser {
258
372
  }
259
373
  }
260
374
 
261
- // --- Classes / Structs / Interfaces ---
262
- if (
263
- captures['definition.class'] ||
264
- captures['definition.struct'] ||
265
- captures['definition.interface']
266
- ) {
267
- const nameNode = captures['name']
268
- const defNode =
269
- captures['definition.class'] ||
270
- captures['definition.struct'] ||
271
- captures['definition.interface']
272
-
273
- if (nameNode && defNode) {
274
- const clsName = nameNode.text
275
- const startLine = defNode.startPosition.row + 1
276
- const endLine = defNode.endPosition.row + 1
277
- const nodeText = defNode.text ?? ''
278
- const clsId = `class:${filePath}:${clsName}` // consistent with ts-extractor
279
-
280
- if (!classesMap.has(clsId)) {
281
- classesMap.set(clsId, {
282
- id: clsId,
283
- name: clsName,
284
- file: filePath,
285
- startLine,
286
- endLine,
287
- methods: [],
288
- properties: [],
289
- isExported: isExportedByLanguage(ext, clsName, nodeText),
290
- hash: hashContent(nodeText),
291
- })
375
+ // --- Classes / Structs / Interfaces / Enums / Unions ---
376
+ const classTypes = [
377
+ 'definition.class', 'definition.struct', 'definition.interface',
378
+ 'definition.enum', 'definition.union', 'definition.trait',
379
+ 'definition.record', 'definition.module', 'definition.namespace'
380
+ ]
381
+
382
+ for (const type of classTypes) {
383
+ if (captures[type]) {
384
+ const nameNode = captures['name']
385
+ const defNode = captures[type]
386
+
387
+ if (nameNode && defNode) {
388
+ const clsName = nameNode.text
389
+ const startLine = defNode.startPosition.row + 1
390
+ const endLine = defNode.endPosition.row + 1
391
+ const nodeText = defNode.text ?? ''
392
+ const clsId = `class:${filePath}:${clsName}`
393
+
394
+ if (!classesMap.has(clsId)) {
395
+ const isEnum = type === 'definition.enum'
396
+ const isStruct = type === 'definition.struct'
397
+ const isUnion = type === 'definition.union'
398
+
399
+ classesMap.set(clsId, {
400
+ id: clsId,
401
+ name: clsName,
402
+ file: filePath,
403
+ startLine,
404
+ endLine,
405
+ methods: [],
406
+ properties: [],
407
+ isExported: isExportedByLanguage(ext, clsName, nodeText),
408
+ hash: hashContent(nodeText),
409
+ })
410
+ }
292
411
  }
293
412
  }
294
413
  }
295
414
  }
296
415
 
297
- // Assign calls to their enclosing function scopes.
298
416
  const unassignedCalls = assignCallsToFunctions(functions, callEntries)
299
417
 
300
- // Only add a synthetic module-level function if there are actually calls made outside any function.
301
418
  if (unassignedCalls.length > 0) {
302
419
  const lineCount = content.split('\n').length
303
420
  functions.push({
@@ -308,7 +425,7 @@ export class TreeSitterParser extends BaseParser {
308
425
  endLine: lineCount || 1,
309
426
  params: [],
310
427
  returnType: 'void',
311
- isExported: false, // Don't export the synthetic module function
428
+ isExported: false,
312
429
  isAsync: false,
313
430
  calls: unassignedCalls.map(c => ({ name: c.name, line: c.line, type: 'function' })),
314
431
  hash: '',
@@ -320,9 +437,6 @@ export class TreeSitterParser extends BaseParser {
320
437
  }
321
438
 
322
439
  const finalLang = extensionToLanguage(ext)
323
-
324
- // Link methods: functions whose names contain '.' belong to a class
325
- // (Go receiver methods, Java/C# member methods detected via method capture)
326
440
  linkMethodsToClasses(functions, classesMap)
327
441
 
328
442
  return {
@@ -330,14 +444,21 @@ export class TreeSitterParser extends BaseParser {
330
444
  language: finalLang,
331
445
  functions,
332
446
  classes: Array.from(classesMap.values()),
333
- generics: [],
447
+ generics,
334
448
  imports,
335
449
  exports: functions.filter(f => f.isExported).map(f => ({
336
450
  name: f.name,
337
451
  type: 'function' as const,
338
452
  file: filePath,
339
453
  })),
340
- routes: [],
454
+ routes: routes.map(r => ({
455
+ method: r.method,
456
+ path: r.path,
457
+ handler: r.handler || '',
458
+ middlewares: [],
459
+ file: filePath,
460
+ line: r.line,
461
+ })),
341
462
  variables: [],
342
463
  calls: [],
343
464
  hash: hashContent(content),
@@ -345,10 +466,22 @@ export class TreeSitterParser extends BaseParser {
345
466
  }
346
467
  }
347
468
 
348
- resolveImports(files: ParsedFile[], _projectRoot: string): ParsedFile[] {
349
- // Tree-sitter resolver: no cross-file resolution implemented.
350
- // Imports are left with resolvedPath = '' which signals unresolved to the graph builder.
351
- // A future pass can resolve Go/Python/Java imports using language-specific rules.
469
+ async resolveImports(files: ParsedFile[], projectRoot: string): Promise<ParsedFile[]> {
470
+ if (files.length === 0) return files
471
+
472
+ const ext = path.extname(files[0].path).toLowerCase()
473
+ const language = extensionToLanguage(ext)
474
+ const resolver = new TreeSitterResolver(projectRoot, language)
475
+
476
+ const allFiles = files.map(f => f.path)
477
+
478
+ for (const file of files) {
479
+ if (file.imports.length > 0) {
480
+ const resolved = resolver.resolveAll(file.imports, file.path, allFiles)
481
+ file.imports = resolved
482
+ }
483
+ }
484
+
352
485
  return files
353
486
  }
354
487
 
@@ -371,13 +504,82 @@ export class TreeSitterParser extends BaseParser {
371
504
 
372
505
  private async loadLang(name: string): Promise<any> {
373
506
  if (this.languages.has(name)) return this.languages.get(name)
507
+ if (this.wasmLoadError) return null
508
+
374
509
  try {
375
- const tcPath = _require.resolve('tree-sitter-wasms/package.json')
376
- const wasmPath = path.join(path.dirname(tcPath), 'out', `tree-sitter-${name}.wasm`)
377
- const lang = await Parser.Language.load(wasmPath)
378
- this.languages.set(name, lang)
379
- return lang
510
+ const nameForFile = name.replace(/-/g, '_')
511
+
512
+ // Try multiple possible WASM locations
513
+ const possiblePaths = [
514
+ path.resolve('node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
515
+ path.resolve('./node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
516
+ path.resolve(process.cwd(), 'node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
517
+ path.resolve(process.cwd(), 'node_modules', 'tree-sitter-wasms', 'out', `tree-sitter-${nameForFile}.wasm`),
518
+ ]
519
+
520
+ let wasmPath = ''
521
+ for (const p of possiblePaths) {
522
+ try {
523
+ const fs = await import('node:fs')
524
+ if (fs.existsSync(p)) {
525
+ wasmPath = p
526
+ break
527
+ }
528
+ } catch { /* skip */ }
529
+ }
530
+
531
+ if (!wasmPath) {
532
+ // Try common variations of the language name
533
+ const variations = [
534
+ nameForFile,
535
+ name.replace(/_/g, '-'),
536
+ name,
537
+ ]
538
+
539
+ for (const variant of variations) {
540
+ for (const base of possiblePaths) {
541
+ const testPath = base.replace(/tree-sitter-[^/]+\.wasm/, `tree-sitter-${variant}.wasm`)
542
+ try {
543
+ const fs = await import('node:fs')
544
+ if (fs.existsSync(testPath)) {
545
+ wasmPath = testPath
546
+ break
547
+ }
548
+ } catch { /* skip */ }
549
+ }
550
+ if (wasmPath) break
551
+ }
552
+ }
553
+
554
+ if (!wasmPath) {
555
+ // WASM not found - but don't mark as permanent error, just skip this language
556
+ console.warn(`Tree-sitter WASM not found for ${name}`)
557
+ return null
558
+ }
559
+
560
+ // Try to load the WASM file with error handling
561
+ let lang: any = null
562
+ try {
563
+ lang = await Language.load(wasmPath)
564
+ } catch (loadErr) {
565
+ console.warn(`Failed to load WASM for ${name} at ${wasmPath}:`, loadErr)
566
+ // Try with dynamic import as fallback
567
+ try {
568
+ const wasmModule = await import(wasmPath)
569
+ if (wasmModule.default) {
570
+ lang = await wasmModule.default()
571
+ }
572
+ } catch { /* skip */ }
573
+ }
574
+
575
+ if (lang) {
576
+ this.languages.set(name, lang)
577
+ return lang
578
+ }
579
+
580
+ return null
380
581
  } catch (err) {
582
+ // Only mark as permanent error after all retries exhausted
381
583
  console.warn(`Failed to load Tree-sitter WASM for ${name}:`, err)
382
584
  return null
383
585
  }
@@ -395,6 +597,7 @@ export class TreeSitterParser extends BaseParser {
395
597
  case '.cpp':
396
598
  case '.cc':
397
599
  case '.hpp':
600
+ case '.hh':
398
601
  return { lang: await this.loadLang('cpp'), query: Queries.CPP_QUERIES }
399
602
  case '.cs':
400
603
  return { lang: await this.loadLang('c-sharp'), query: Queries.CSHARP_QUERIES }
@@ -412,10 +615,6 @@ export class TreeSitterParser extends BaseParser {
412
615
  }
413
616
  }
414
617
 
415
- // ---------------------------------------------------------------------------
416
- // Helpers
417
- // ---------------------------------------------------------------------------
418
-
419
618
  function extensionToLanguage(ext: string): ParsedFile['language'] {
420
619
  switch (ext) {
421
620
  case '.py': return 'python'
@@ -431,61 +630,97 @@ function extensionToLanguage(ext: string): ParsedFile['language'] {
431
630
  }
432
631
  }
433
632
 
434
- /**
435
- * Extract a simple return type hint from the function node text.
436
- * Falls back to 'unknown' rather than 'any' to distinguish "not parsed"
437
- * from "genuinely untyped".
438
- */
439
- function extractReturnType(ext: string, defNode: any): string {
440
- const text: string = defNode?.text ?? ''
441
- // TypeScript/Go/Rust: look for "-> Type" or ": Type" after parameters
442
- const arrowMatch = text.match(/\)\s*->\s*([^\s{]+)/)
443
- if (arrowMatch) return arrowMatch[1].trim()
444
- // Java/C# style: "public int foo(" — type precedes the name
445
- // This is too fragile to do reliably here; return 'unknown'
633
+ function extractReturnType(ext: string, defNode: any, nodeText: string): string {
634
+ if (!defNode && !nodeText) return 'unknown'
635
+ const text = nodeText || defNode?.text || ''
636
+
637
+ // Try to find return type from AST node directly first
638
+ if (defNode?.children) {
639
+ const returnTypeNode = findFirstChild(defNode, n =>
640
+ n.type === 'type' ||
641
+ n.type === 'type_annotation' ||
642
+ n.type === 'return_type' ||
643
+ n.type === 'result_type'
644
+ )
645
+ if (returnTypeNode?.text) {
646
+ return returnTypeNode.text.trim()
647
+ }
648
+ }
649
+
650
+ // Arrow return type (Rust, TS, Go)
651
+ const arrowMatch = text.match(/\)\s*(->|=>)\s*([^\s{;]+)/)
652
+ if (arrowMatch && arrowMatch[3]) {
653
+ const ret = arrowMatch[3].trim()
654
+ if (ret && ret !== 'void' && ret !== 'null') return ret
655
+ }
656
+
657
+ // Go: "func foo() (int, error)" or "func foo() error"
446
658
  if (ext === '.go') {
447
- // Go: "func foo() (int, error)" or "func foo() error"
448
659
  const goReturnTuple = text.match(/\)\s+(\([^)]+\))/)
449
- if (goReturnTuple) return goReturnTuple[1].trim()
660
+ if (goReturnTuple && goReturnTuple[1]) return goReturnTuple[1].trim()
450
661
  const goReturn = text.match(/\)\s+([^\s{(]+)/)
451
- if (goReturn) return goReturn[1].trim()
662
+ if (goReturn && goReturn[1]) return goReturn[1].trim()
452
663
  }
664
+
665
+ // Java/C#/TypeScript: "public int foo(" - type before name
666
+ const javaMatch = text.match(/(?:public|private|protected|internal)?\s*(?:static\s*)?(?:async\s*)?([\w<>[\],\s]+?)\s+\w+\s*\(/)
667
+ if (javaMatch && javaMatch[1]) {
668
+ const ret = javaMatch[1].trim()
669
+ if (ret && ret !== 'void' && ret !== 'public' && ret !== 'private' && ret !== 'protected') {
670
+ return ret
671
+ }
672
+ }
673
+
674
+ // Python type annotations
675
+ const pyMatch = text.match(/def\s+\w+.*?\)\s*->\s*([^\s:]+)/)
676
+ if (pyMatch && pyMatch[1]) return pyMatch[1].trim()
677
+
678
+ // Python: try to find return type from type comment
679
+ const pyTypeComment = text.match(/#\s*type:\s*([^\n]+)/)
680
+ if (pyTypeComment && pyTypeComment[1]) return pyTypeComment[1].trim()
681
+
682
+ // PHP return type
683
+ const phpMatch = text.match(/function\s+\w+.*?\)\s*:\s*(\??[\w\\]+)/)
684
+ if (phpMatch && phpMatch[1]) return phpMatch[1].trim()
685
+
686
+ // Ruby return type
687
+ const rubyMatch = text.match(/def\s+\w+.*?\s+(->\s*[\w?]+)?/)
688
+ if (rubyMatch && rubyMatch[1]) return rubyMatch[1].replace('->', '').trim()
689
+
690
+ // C/C++ return type
691
+ const cMatch = text.match(/^[\w*&\s]+\s+(\w+)\s*\(/m)
692
+ if (cMatch && cMatch[1] && cMatch[1] !== 'if' && cMatch[1] !== 'while') {
693
+ return cMatch[1]
694
+ }
695
+
696
+ // Rust: try to find return type from node directly
697
+ if (ext === '.rs') {
698
+ const rustMatch = text.match(/fn\s+\w+.*?\s*->\s*([^\s{]+)/)
699
+ if (rustMatch && rustMatch[1]) return rustMatch[1].trim()
700
+ }
701
+
453
702
  return 'unknown'
454
703
  }
455
704
 
456
- /**
457
- * Extract a single-line doc comment immediately preceding the given line.
458
- * Scans backwards from startLine looking for `#`, `//`, `/**`, or `"""` comments.
459
- */
460
705
  function extractDocComment(content: string, startLine: number): string {
461
706
  const lines = content.split('\n')
462
- const targetIdx = startLine - 2 // 0-indexed line before the function
707
+ const targetIdx = startLine - 2
463
708
  if (targetIdx < 0) return ''
464
709
 
465
710
  const prev = lines[targetIdx]?.trim() ?? ''
466
- // Single-line comment styles
467
711
  for (const prefix of ['# ', '// ', '/// ']) {
468
712
  if (prev.startsWith(prefix)) return prev.slice(prefix.length).trim()
469
713
  }
470
- // JSDoc / block comment end
471
714
  if (prev === '*/') {
472
- // Walk back to find the first meaningful JSDoc line
473
715
  for (let i = targetIdx - 1; i >= 0; i--) {
474
716
  const line = lines[i].trim()
475
- if (line.startsWith('/*') || line.startsWith('/**')) break
476
717
  const cleaned = line.replace(/^\*+\s?/, '')
477
- if (cleaned && !/^[\-_=*]{3,}$/.test(cleaned)) return cleaned
718
+ if (cleaned && !/^[ \-_=*]{3,}$/.test(cleaned)) return cleaned
478
719
  }
479
720
  }
480
721
  return ''
481
722
  }
482
723
 
483
- /**
484
- * Move functions that are class methods (identified by having a receiver or
485
- * by being within the line range of a class) into the class's methods array.
486
- * This is a best-effort heuristic; direct tree-sitter capture of method
487
- * declarations already places them correctly in most languages.
488
- */
489
724
  function linkMethodsToClasses(
490
725
  functions: ParsedFunction[],
491
726
  classesMap: Map<string, ParsedClass>
@@ -494,18 +729,14 @@ function linkMethodsToClasses(
494
729
  if (classes.length === 0) return
495
730
 
496
731
  for (const fn of functions) {
497
- // Already categorised if name contains "." (e.g. "MyClass.method")
498
- // and never link the synthetic <module> function to a class.
499
732
  if (fn.name === '<module>' || fn.name.includes('.')) continue
500
733
 
501
- // Skip functions nested inside other functions (local helpers)
502
734
  const isNestedInFunction = functions.some(f =>
503
735
  f.id !== fn.id &&
504
736
  fn.startLine >= f.startLine && fn.endLine <= f.endLine
505
737
  )
506
738
  if (isNestedInFunction) continue
507
739
 
508
- // Find the innermost (smallest range) class that contains this function
509
740
  let bestCls: ParsedClass | null = null
510
741
  let bestRange = Infinity
511
742
  for (const cls of classes) {