codesynapt 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +17 -0
  2. package/LICENSE +686 -0
  3. package/LICENSES.md +141 -0
  4. package/README.md +331 -0
  5. package/electron/main.cjs +2849 -0
  6. package/electron/plugin-loader.cjs +184 -0
  7. package/electron/preload.cjs +108 -0
  8. package/package.json +216 -0
  9. package/packages/core/bin/codesynapt-mcp.cjs +611 -0
  10. package/packages/core/bin/codesynapt.cjs +1933 -0
  11. package/packages/core/legacy.js +300 -0
  12. package/packages/core/lib/control-server.cjs +1539 -0
  13. package/packages/core/lib/embedding.cjs +89 -0
  14. package/packages/core/lib/logger.cjs +63 -0
  15. package/packages/core/lib/search-cache.cjs +140 -0
  16. package/packages/core/lib/search-worker.cjs +255 -0
  17. package/packages/core/lib/search.cjs +211 -0
  18. package/packages/core/lib/symbol-graph.cjs +402 -0
  19. package/packages/core/lib/symbol-parser-js.cjs +542 -0
  20. package/packages/core/lib/symbol-parser-misc.cjs +394 -0
  21. package/packages/core/lib/symbol-parser-py.cjs +215 -0
  22. package/packages/core/lib/symbol-parser-treesitter.cjs +658 -0
  23. package/packages/core/lib/symbol-parser-tsc.cjs +332 -0
  24. package/packages/core/monorepo.js +310 -0
  25. package/packages/core/parser.js +2234 -0
  26. package/packages/core/scanner.js +623 -0
  27. package/plugin-api/LICENSE +21 -0
  28. package/plugin-api/README.md +114 -0
  29. package/plugin-api/docs/01-getting-started.md +197 -0
  30. package/plugin-api/docs/02-concepts.md +269 -0
  31. package/plugin-api/docs/api-reference.md +463 -0
  32. package/plugin-api/docs/troubleshooting.md +332 -0
  33. package/plugin-api/docs/types/exporter.md +377 -0
  34. package/plugin-api/docs/types/theme.md +312 -0
  35. package/plugin-api/examples/hello-world-plugin/README.md +70 -0
  36. package/plugin-api/examples/hello-world-plugin/main.js +36 -0
  37. package/plugin-api/examples/hello-world-plugin/manifest.json +12 -0
  38. package/plugin-api/examples/mermaid-exporter/README.md +125 -0
  39. package/plugin-api/examples/mermaid-exporter/main.js +58 -0
  40. package/plugin-api/examples/mermaid-exporter/manifest.json +12 -0
  41. package/plugin-api/examples/rust-parser/README.md +71 -0
  42. package/plugin-api/examples/rust-parser/main.js +123 -0
  43. package/plugin-api/examples/rust-parser/manifest.json +12 -0
  44. package/plugin-api/examples/sunset-theme/README.md +95 -0
  45. package/plugin-api/examples/sunset-theme/manifest.json +12 -0
  46. package/plugin-api/examples/sunset-theme/theme.css +31 -0
  47. package/plugin-api/package.json +20 -0
  48. package/plugin-api/types.d.ts +395 -0
  49. package/public/app.js +6837 -0
  50. package/public/backend.js +285 -0
  51. package/public/index.html +647 -0
  52. package/public/plugin-host.js +321 -0
  53. package/public/style.css +4359 -0
  54. package/public/vendor/three.module.js +53044 -0
  55. package/scripts/competitor-watch.mjs +144 -0
  56. package/scripts/copy-vendor.js +21 -0
  57. package/scripts/download-bundled-node.cjs +53 -0
  58. package/scripts/fuses-after-pack.cjs +34 -0
  59. package/scripts/license-check.js +119 -0
  60. package/scripts/perf-test.js +200 -0
  61. package/server.js +132 -0
@@ -0,0 +1,658 @@
1
+ // Tree-sitter symbol parsers — exact AST instead of regex.
2
+ // Reaches parity with codegraph for the supported languages.
3
+ //
4
+ // One generic walker; per-language config tells it which AST node types
5
+ // represent functions / classes / methods / calls, and what field gives
6
+ // the symbol name. Resolver is the same name-based lookup the other
7
+ // parsers use (file-mode imports get folded in later by Stage 3.5).
8
+ //
9
+ // Loaded lazily on first /symbol/scan after a project loads; per-
10
+ // language Parser instances are cached across scans.
11
+
12
+ 'use strict'
13
+
14
+ const fs = require('fs')
15
+ const path = require('path')
16
+
17
+ let _Parser = null // web-tree-sitter Parser class (after init)
18
+ let _initPromise = null
19
+
20
+ async function getParser() {
21
+ if (_Parser) return _Parser
22
+ if (!_initPromise) {
23
+ _initPromise = (async () => {
24
+ const Parser = require('web-tree-sitter')
25
+ await Parser.init()
26
+ _Parser = Parser
27
+ return Parser
28
+ })()
29
+ }
30
+ return _initPromise
31
+ }
32
+
33
+ const WASM_DIR = path.join(__dirname, '..', '..', '..', 'node_modules', 'tree-sitter-wasms', 'out')
34
+ function wasmPath(name) { return path.join(WASM_DIR, `tree-sitter-${name}.wasm`) }
35
+
36
+ // Per-language config. Keys are CodeSynapt file extensions; value is
37
+ // the grammar wasm name + the AST node types we care about.
38
+ const LANG_CONFIG = {
39
+ js: { grammar: 'javascript' },
40
+ jsx: { grammar: 'javascript' },
41
+ mjs: { grammar: 'javascript' },
42
+ cjs: { grammar: 'javascript' },
43
+ // TypeScript wasms aren't in tree-sitter-wasms@0.1.13; fall back to
44
+ // the JS grammar (works for type annotations as stripped syntax).
45
+ ts: { grammar: 'javascript' },
46
+ tsx: { grammar: 'javascript' },
47
+ py: { grammar: 'python' },
48
+ go: { grammar: 'go' },
49
+ rs: { grammar: 'rust' },
50
+ java: { grammar: 'java' },
51
+ kt: { grammar: 'kotlin' },
52
+ kts: { grammar: 'kotlin' },
53
+ swift: { grammar: 'swift' },
54
+ // Phase B-3 — wider language reach. tree-sitter-wasms@0.1.13 ships
55
+ // grammars for all of these; we just register them.
56
+ cs: { grammar: 'c_sharp' },
57
+ rb: { grammar: 'ruby' },
58
+ php: { grammar: 'php' },
59
+ scala: { grammar: 'scala' },
60
+ lua: { grammar: 'lua' },
61
+ sh: { grammar: 'bash' },
62
+ bash: { grammar: 'bash' },
63
+ dart: { grammar: 'dart' },
64
+ elm: { grammar: 'elm' },
65
+ ex: { grammar: 'elixir' },
66
+ exs: { grammar: 'elixir' },
67
+ c: { grammar: 'c' },
68
+ h: { grammar: 'c' },
69
+ cpp: { grammar: 'cpp' },
70
+ cc: { grammar: 'cpp' },
71
+ hpp: { grammar: 'cpp' },
72
+ hh: { grammar: 'cpp' },
73
+ }
74
+
75
+ // Node types per grammar.
76
+ const NODE_TYPES = {
77
+ javascript: {
78
+ fn: ['function_declaration', 'function', 'arrow_function', 'generator_function_declaration'],
79
+ method: ['method_definition'],
80
+ cls: ['class_declaration'],
81
+ call: ['call_expression', 'new_expression'],
82
+ },
83
+ python: {
84
+ fn: ['function_definition'],
85
+ cls: ['class_definition'],
86
+ call: ['call'],
87
+ },
88
+ go: {
89
+ fn: ['function_declaration'],
90
+ method: ['method_declaration'],
91
+ // Use `type_spec` (the actual name+kind carrier), not its outer
92
+ // `type_declaration` wrapper. nameOf() can't extract a name from
93
+ // the wrapper (its only named child is the type_spec), so before
94
+ // this change every Go file produced zero struct/interface symbols
95
+ // and gin showed `byKind: { function: 1311 }` — no methods, no
96
+ // structs.
97
+ cls: ['type_spec'],
98
+ call: ['call_expression'],
99
+ },
100
+ rust: {
101
+ fn: ['function_item'],
102
+ cls: ['struct_item', 'enum_item', 'trait_item'],
103
+ impl: ['impl_item'],
104
+ call: ['call_expression', 'macro_invocation'],
105
+ },
106
+ java: {
107
+ fn: ['method_declaration', 'constructor_declaration'],
108
+ cls: ['class_declaration', 'interface_declaration', 'record_declaration', 'enum_declaration'],
109
+ call: ['method_invocation', 'object_creation_expression'],
110
+ },
111
+ kotlin: {
112
+ // Includes secondary constructors, property accessors (getter/setter
113
+ // bodies), and anonymous-initializer bodies — all of which contain
114
+ // call sites we were missing before.
115
+ fn: ['function_declaration', 'secondary_constructor',
116
+ 'getter', 'setter', 'anonymous_initializer'],
117
+ cls: ['class_declaration', 'object_declaration', 'interface_declaration'],
118
+ call: ['call_expression', 'infix_expression'],
119
+ },
120
+ swift: {
121
+ // tree-sitter-swift already maps `extension X { ... }` to a
122
+ // `class_declaration` node (with a `user_type` wrapper for the
123
+ // target type), so we don't enroll extension_declaration
124
+ // separately — it would double-count and break enclosing scopes.
125
+ fn: ['function_declaration', 'init_declaration', 'deinit_declaration',
126
+ 'subscript_declaration', 'computed_property'],
127
+ cls: ['class_declaration', 'protocol_declaration'],
128
+ call: ['call_expression'],
129
+ },
130
+ c_sharp: {
131
+ fn: ['method_declaration', 'constructor_declaration', 'local_function_statement'],
132
+ cls: ['class_declaration', 'interface_declaration', 'struct_declaration', 'record_declaration', 'enum_declaration'],
133
+ call: ['invocation_expression', 'object_creation_expression'],
134
+ },
135
+ ruby: {
136
+ fn: ['method', 'singleton_method'],
137
+ cls: ['class', 'module'],
138
+ call: ['call', 'method_call', 'identifier'], // Ruby calls often look like identifiers
139
+ },
140
+ php: {
141
+ fn: ['function_definition', 'method_declaration'],
142
+ cls: ['class_declaration', 'interface_declaration', 'trait_declaration'],
143
+ call: ['function_call_expression', 'method_call_expression', 'object_creation_expression'],
144
+ },
145
+ scala: {
146
+ fn: ['function_definition', 'function_declaration'],
147
+ cls: ['class_definition', 'object_definition', 'trait_definition', 'enum_definition'],
148
+ call: ['call_expression', 'generic_function'],
149
+ },
150
+ lua: {
151
+ fn: ['function_declaration', 'function_definition', 'local_function'],
152
+ cls: [], // Lua has no classes (table-based OOP)
153
+ call: ['function_call'],
154
+ },
155
+ bash: {
156
+ fn: ['function_definition'],
157
+ cls: [],
158
+ call: ['command'],
159
+ },
160
+ dart: {
161
+ fn: ['function_signature', 'function_body'],
162
+ cls: ['class_definition', 'mixin_declaration', 'extension_declaration'],
163
+ call: ['method_invocation'],
164
+ },
165
+ elm: {
166
+ fn: ['function_declaration_left'],
167
+ cls: ['type_declaration', 'type_alias_declaration'],
168
+ call: ['function_call_expr'],
169
+ },
170
+ elixir: {
171
+ fn: ['call'], // Elixir uses macros for `def`
172
+ cls: [],
173
+ call: ['call'],
174
+ },
175
+ c: {
176
+ fn: ['function_definition'],
177
+ cls: ['struct_specifier', 'union_specifier', 'enum_specifier'],
178
+ call: ['call_expression'],
179
+ },
180
+ cpp: {
181
+ fn: ['function_definition', 'declaration'],
182
+ cls: ['class_specifier', 'struct_specifier'],
183
+ call: ['call_expression'],
184
+ },
185
+ }
186
+
187
+ // Cache: grammar name → loaded Parser.Language (web-tree-sitter)
188
+ const _langCache = new Map()
189
+ async function loadLang(grammar) {
190
+ if (_langCache.has(grammar)) return _langCache.get(grammar)
191
+ const Parser = await getParser()
192
+ const buf = fs.readFileSync(wasmPath(grammar))
193
+ const Lang = await Parser.Language.load(buf)
194
+ _langCache.set(grammar, Lang)
195
+ return Lang
196
+ }
197
+
198
+ // Cache: grammar name → Parser instance (Parser instances are stateful
199
+ // but cheap to reuse since we always call setLanguage anyway).
200
+ const _parserCache = new Map()
201
+ async function parserFor(grammar) {
202
+ if (_parserCache.has(grammar)) return _parserCache.get(grammar)
203
+ const Parser = await getParser()
204
+ const p = new Parser()
205
+ p.setLanguage(await loadLang(grammar))
206
+ _parserCache.set(grammar, p)
207
+ return p
208
+ }
209
+
210
+ function mkId(file, name, line) { return `${file}#${name}@${line}` }
211
+
212
+ function nameOf(node) {
213
+ // Try standard field first (Go, JS, Java have it). Kotlin & Swift
214
+ // function_declaration doesn't expose a `name` field — the function
215
+ // identifier lands as a direct `simple_identifier` child instead,
216
+ // so we walk children for any identifier-shaped node as a fallback.
217
+ // For Swift `extension X { … }` the type lands inside `user_type`
218
+ // (a one-level wrapper around `type_identifier`); we peek through.
219
+ const named = node.childForFieldName?.('name')
220
+ if (named) return named.text
221
+ for (let i = 0; i < node.childCount; i++) {
222
+ const c = node.child(i)
223
+ if (c.type === 'identifier'
224
+ || c.type === 'simple_identifier'
225
+ || c.type === 'type_identifier'
226
+ || c.type === 'field_identifier'
227
+ || c.type === 'property_identifier') return c.text
228
+ if (c.type === 'user_type') {
229
+ // user_type → type_identifier (Swift extension's target type).
230
+ for (let j = 0; j < c.childCount; j++) {
231
+ const g = c.child(j)
232
+ if (g.type === 'type_identifier' || g.type === 'simple_identifier') return g.text
233
+ }
234
+ }
235
+ }
236
+ return null
237
+ }
238
+
239
+ function signatureOf(node, content, maxLen = 200) {
240
+ if (!node) return ''
241
+ const start = node.startIndex ?? 0
242
+ let end = content.indexOf('{', start)
243
+ if (end < 0 || end - start > maxLen) end = start + maxLen
244
+ return content.slice(start, end).trim().replace(/\s+/g, ' ')
245
+ }
246
+
247
+ function docOf(node, content) {
248
+ // Walk backwards through siblings; collect line comments / block
249
+ // comments directly above `node`.
250
+ let prev = node.previousSibling
251
+ const blocks = []
252
+ while (prev && (prev.type === 'comment' || prev.type === 'line_comment' || prev.type === 'block_comment')) {
253
+ blocks.unshift(prev.text)
254
+ prev = prev.previousSibling
255
+ }
256
+ if (!blocks.length) return ''
257
+ return blocks.join(' ').replace(/^\s*[/*#]+/gm, '').replace(/\s+/g, ' ').trim().slice(0, 400)
258
+ }
259
+
260
+ // Generic walker. Tracks the enclosing class/impl for method
261
+ // qualification and the enclosing function for call attribution.
262
+ function walk(node, ctx) {
263
+ if (!node) return
264
+ const t = node.type
265
+ const cfg = ctx.types
266
+ let pushedFn = false, pushedCls = false, pushedImpl = false
267
+
268
+ // Class-like declarations
269
+ if (cfg.cls?.includes(t)) {
270
+ const name = nameOf(node)
271
+ if (name) {
272
+ const sym = {
273
+ id: mkId(ctx.fileId, name, node.startPosition.row + 1),
274
+ name,
275
+ qualifiedName: name,
276
+ kind: classKind(t, node),
277
+ file: ctx.fileId,
278
+ startLine: node.startPosition.row + 1,
279
+ endLine: node.endPosition.row + 1,
280
+ signature: signatureOf(node, ctx.content),
281
+ doc: docOf(node, ctx.content),
282
+ exported: isExported(node, ctx.content),
283
+ }
284
+ ctx.symbols.push(sym)
285
+ ctx.classStack.push({ name, sym })
286
+ pushedCls = true
287
+ // Inheritance edges (pass 2 only — we need every symbol indexed
288
+ // first before we can resolve the parent name).
289
+ if (ctx.passTwo) {
290
+ const supers = extractInheritance(node, ctx.lang)
291
+ for (const { name: parentName, kind } of supers) {
292
+ const target = ctx.resolve(parentName, { forCall: true })
293
+ if (!target || target.id === sym.id) continue
294
+ const key = sym.id + '|' + target.id + '|' + kind
295
+ if (ctx.seen.has(key)) continue
296
+ ctx.seen.add(key)
297
+ ctx.edges.push({
298
+ source: sym.id, target: target.id, kind,
299
+ line: node.startPosition.row + 1,
300
+ })
301
+ }
302
+ }
303
+ }
304
+ }
305
+ // Rust impl blocks — track the target type so methods get qualified
306
+ else if (cfg.impl?.includes(t)) {
307
+ const targetType = node.childForFieldName?.('type')?.text
308
+ || node.children.find((c) => c.type === 'type_identifier')?.text
309
+ if (targetType) {
310
+ ctx.classStack.push({ name: targetType, sym: null })
311
+ pushedImpl = true
312
+ }
313
+ }
314
+ // Function/method declarations
315
+ else if (cfg.fn?.includes(t) || cfg.method?.includes(t)) {
316
+ const name = nameOf(node)
317
+ if (name) {
318
+ // Go methods carry their receiver type in a `receiver` field
319
+ // rather than being lexically nested inside the type — without
320
+ // this, `func (e *Engine) handleHTTPRequest()` shows up as a
321
+ // bare function with no link back to Engine.
322
+ let methodOwner = null
323
+ if (ctx.lang === 'go' && cfg.method?.includes(t)) {
324
+ methodOwner = extractGoReceiver(node)
325
+ }
326
+ const cls = ctx.classStack[ctx.classStack.length - 1]
327
+ const lexicallyMethod = !!cls && (cfg.method?.includes(t)
328
+ || ctx.lang === 'python' || ctx.lang === 'kotlin'
329
+ || ctx.lang === 'swift' || ctx.lang === 'rust')
330
+ const isMethod = !!methodOwner || lexicallyMethod
331
+ const qn = methodOwner ? `${methodOwner}.${name}`
332
+ : (lexicallyMethod ? `${cls.name}.${name}` : name)
333
+ const sym = {
334
+ id: mkId(ctx.fileId, qn, node.startPosition.row + 1),
335
+ name,
336
+ qualifiedName: qn,
337
+ kind: isMethod ? 'method' : 'function',
338
+ file: ctx.fileId,
339
+ startLine: node.startPosition.row + 1,
340
+ endLine: node.endPosition.row + 1,
341
+ signature: signatureOf(node, ctx.content),
342
+ doc: docOf(node, ctx.content),
343
+ exported: isExported(node, ctx.content),
344
+ }
345
+ ctx.symbols.push(sym)
346
+ ctx.fnStack.push(sym.id)
347
+ pushedFn = true
348
+ }
349
+ }
350
+ // Call expressions (pass 2 only — checked via ctx.passTwo flag)
351
+ if (ctx.passTwo && cfg.call?.includes(t)) {
352
+ const src = ctx.fnStack[ctx.fnStack.length - 1]
353
+ if (src) {
354
+ const calleeName = extractCalleeName(node)
355
+ if (calleeName && !ctx.kwSet?.has(calleeName)) {
356
+ // Use the loose any-file fallback for calls (`foo()` is a
357
+ // strong signal); references below stay strict.
358
+ const target = ctx.resolve(calleeName, { forCall: true })
359
+ if (target && target.id !== src) {
360
+ const key = src + '|' + target.id + '|call'
361
+ if (!ctx.seen.has(key)) {
362
+ ctx.seen.add(key)
363
+ ctx.edges.push({
364
+ source: src, target: target.id, kind: 'call',
365
+ line: node.startPosition.row + 1,
366
+ })
367
+ }
368
+ }
369
+ }
370
+ }
371
+ }
372
+ // Plain identifier references (Phase 2-extra). Type identifiers
373
+ // get a separate `type-ref` kind so explore can prefer `call`/`ref`
374
+ // when picking entry points and type annotations don't dominate
375
+ // the edge count.
376
+ if (ctx.passTwo
377
+ && (t === 'identifier' || t === 'simple_identifier' || t === 'type_identifier' || t === 'field_identifier')) {
378
+ const src = ctx.fnStack[ctx.fnStack.length - 1]
379
+ if (src) {
380
+ const name = node.text
381
+ if (name && !ctx.kwSet?.has(name) && name.length > 1) {
382
+ // Skip if parent is a declaration node that owns this identifier
383
+ const parent = node.parent
384
+ const isDeclaration =
385
+ parent && (
386
+ cfg.fn?.includes(parent.type) ||
387
+ cfg.method?.includes(parent.type) ||
388
+ cfg.cls?.includes(parent.type) ||
389
+ parent.type === 'parameter' ||
390
+ parent.type === 'function_value_parameters' ||
391
+ parent.type === 'value_definition' ||
392
+ parent.type === 'simple_value_definition'
393
+ )
394
+ // Skip if parent is a call_expression and we're the callee
395
+ const isCallee = parent && cfg.call?.includes(parent.type)
396
+ if (!isDeclaration && !isCallee) {
397
+ const target = ctx.resolve(name)
398
+ if (target && target.id !== src) {
399
+ const edgeKind = t === 'type_identifier' ? 'type-ref' : 'ref'
400
+ const key = src + '|' + target.id + '|' + edgeKind
401
+ if (!ctx.seen.has(key)) {
402
+ ctx.seen.add(key)
403
+ ctx.edges.push({
404
+ source: src, target: target.id, kind: edgeKind,
405
+ line: node.startPosition.row + 1,
406
+ })
407
+ }
408
+ }
409
+ }
410
+ }
411
+ }
412
+ }
413
+
414
+ // Recurse
415
+ for (let i = 0; i < node.childCount; i++) walk(node.child(i), ctx)
416
+
417
+ if (pushedFn) ctx.fnStack.pop()
418
+ if (pushedCls || pushedImpl) ctx.classStack.pop()
419
+ }
420
+
421
+ // Pull inheritance targets from a class-like node. Per-language node
422
+ // names vary; this is best-effort and silently skips unknown shapes.
423
+ // Returns [{ name: 'Bar', kind: 'extends' | 'implements' }].
424
+ function extractInheritance(node, lang) {
425
+ const out = []
426
+ const walkType = (n) => {
427
+ if (!n) return null
428
+ if (n.type === 'type_identifier' || n.type === 'identifier' || n.type === 'simple_identifier') return n.text
429
+ // Member / qualified — take last identifier
430
+ for (let i = n.namedChildCount - 1; i >= 0; i--) {
431
+ const r = walkType(n.namedChild(i))
432
+ if (r) return r
433
+ }
434
+ return null
435
+ }
436
+ // Standard fields when grammars expose them
437
+ const superField = node.childForFieldName?.('superclass')
438
+ || node.childForFieldName?.('parent_class')
439
+ if (superField) {
440
+ const name = walkType(superField)
441
+ if (name) out.push({ name, kind: 'extends' })
442
+ }
443
+ // Walk named children for inheritance-related sub-nodes.
444
+ for (let i = 0; i < node.namedChildCount; i++) {
445
+ const c = node.namedChild(i)
446
+ const ct = c.type
447
+ if (ct === 'superclass' || ct === 'extends_type_clause' || ct === 'class_inheritance_modifiers') {
448
+ const name = walkType(c)
449
+ if (name) out.push({ name, kind: 'extends' })
450
+ } else if (ct === 'super_interfaces' || ct === 'implements_clause' || ct === 'super_interface_specification') {
451
+ // Java/Kotlin — may contain multiple type_identifier children
452
+ for (let j = 0; j < c.namedChildCount; j++) {
453
+ const name = walkType(c.namedChild(j))
454
+ if (name) out.push({ name, kind: 'implements' })
455
+ }
456
+ } else if (ct === 'inheritance_specifier') {
457
+ // Swift — single base type or protocol
458
+ const name = walkType(c)
459
+ if (name) out.push({ name, kind: 'extends' })
460
+ } else if (ct === 'argument_list' && lang === 'python') {
461
+ // Python `class Foo(Bar, Baz):` — base classes as `argument_list`
462
+ for (let j = 0; j < c.namedChildCount; j++) {
463
+ const name = walkType(c.namedChild(j))
464
+ if (name) out.push({ name, kind: 'extends' })
465
+ }
466
+ } else if (ct === 'type_spec_list' && lang === 'go') {
467
+ // Go interface embedding: `type Foo interface { Bar }`
468
+ for (let j = 0; j < c.namedChildCount; j++) {
469
+ const name = walkType(c.namedChild(j))
470
+ if (name) out.push({ name, kind: 'extends' })
471
+ }
472
+ }
473
+ }
474
+ return out
475
+ }
476
+
477
+ // Walk a Go method_declaration's `receiver` field to find the type
478
+ // the method hangs off of. Receivers look like `(e *Engine)` or
479
+ // `(c Context)`; the type can be wrapped in a `pointer_type`.
480
+ function extractGoReceiver(node) {
481
+ const recv = node.childForFieldName?.('receiver')
482
+ if (!recv) return null
483
+ function findType(n) {
484
+ if (!n) return null
485
+ if (n.type === 'type_identifier') return n.text
486
+ for (let i = 0; i < n.namedChildCount; i++) {
487
+ const r = findType(n.namedChild(i))
488
+ if (r) return r
489
+ }
490
+ return null
491
+ }
492
+ return findType(recv)
493
+ }
494
+
495
+ function classKind(nodeType, node = null) {
496
+ if (nodeType.includes('interface')) return 'interface'
497
+ if (nodeType.includes('trait')) return 'interface'
498
+ if (nodeType.includes('protocol')) return 'interface'
499
+ if (nodeType.includes('struct')) return 'struct'
500
+ if (nodeType.includes('enum')) return 'enum'
501
+ if (nodeType.includes('record')) return 'class'
502
+ // Go `type_spec` wraps the actual struct_type / interface_type /
503
+ // map_type / etc — descend one level to recover the real kind.
504
+ // Without this, every `type Foo struct {…}` shows up as kind:'class'.
505
+ if (nodeType === 'type_spec' && node) {
506
+ for (let i = 0; i < node.childCount; i++) {
507
+ const ct = node.child(i).type
508
+ if (ct === 'struct_type') return 'struct'
509
+ if (ct === 'interface_type') return 'interface'
510
+ }
511
+ return 'class'
512
+ }
513
+ return 'class'
514
+ }
515
+
516
+ function isExported(node, content) {
517
+ // Heuristic: any leading 'pub', 'public', 'export' keyword in the
518
+ // first ~120 chars of the node's text. Good enough across languages.
519
+ const head = content.slice(node.startIndex, Math.min(node.startIndex + 120, node.endIndex))
520
+ if (/\b(pub|public|export|open)\b/.test(head)) return true
521
+ // Go: PascalCase identifiers are exported.
522
+ const m = head.match(/\b([A-Za-z_][A-Za-z0-9_]*)\b/)
523
+ if (m && /^[A-Z]/.test(m[1])) return true
524
+ return false
525
+ }
526
+
527
+ // Identifier-shaped node names across grammars.
528
+ const IDENT_TYPES = new Set([
529
+ 'identifier', 'simple_identifier', 'field_identifier', 'type_identifier',
530
+ 'property_identifier', 'shorthand_property_identifier',
531
+ ])
532
+ const NAV_TYPES = new Set([
533
+ 'member_expression', 'selector_expression', 'field_expression',
534
+ 'navigation_expression', 'method_invocation',
535
+ ])
536
+
537
+ function lastIdentText(node) {
538
+ // Walk to the rightmost identifier-ish node — `foo.bar.baz` → "baz".
539
+ if (!node) return null
540
+ if (IDENT_TYPES.has(node.type)) return node.text
541
+ for (let i = node.namedChildCount - 1; i >= 0; i--) {
542
+ const t = lastIdentText(node.namedChild(i))
543
+ if (t) return t
544
+ }
545
+ return null
546
+ }
547
+
548
+ function extractCalleeName(callNode) {
549
+ // 1) Try named fields first — Java has `name`, JS has `function`,
550
+ // Python/Ruby have `function`. These are the cleanest path when
551
+ // the grammar provides them.
552
+ const fn = callNode.childForFieldName?.('function')
553
+ || callNode.childForFieldName?.('name')
554
+ if (fn) {
555
+ if (IDENT_TYPES.has(fn.type)) return fn.text
556
+ if (NAV_TYPES.has(fn.type)) return lastIdentText(fn)
557
+ }
558
+ // 2) Java method_invocation: object.name(args) — `name` is a direct
559
+ // field even when `function` isn't set.
560
+ const j = callNode.childForFieldName?.('name')
561
+ if (j && IDENT_TYPES.has(j.type)) return j.text
562
+ // 3) Fallback — Swift/Kotlin call_expression has no field but its
563
+ // first named child is the callee (simple_identifier or
564
+ // navigation_expression). Walk all named children once.
565
+ for (let i = 0; i < callNode.namedChildCount; i++) {
566
+ const c = callNode.namedChild(i)
567
+ if (IDENT_TYPES.has(c.type)) return c.text
568
+ if (NAV_TYPES.has(c.type)) return lastIdentText(c)
569
+ }
570
+ return null
571
+ }
572
+
573
+ // Keywords to skip when matching call expressions.
574
+ const KEYWORDS = {
575
+ javascript: new Set(['if','else','for','while','return','new','typeof','instanceof','await','async','function','class','const','let','var','true','false','null','undefined','console','require','import','export']),
576
+ python: new Set(['if','elif','else','while','for','return','def','class','import','from','None','True','False','self','print','len','range','int','str','float','bool','list','dict','set','tuple','isinstance','type','super','open','sorted','enumerate','zip','map','filter','any','all','sum','min','max','abs','round','getattr','setattr','hasattr','format','repr','hash','id','object','property','staticmethod','classmethod']),
577
+ go: new Set(['if','else','for','range','return','break','continue','switch','case','func','type','var','const','package','import','interface','struct','map','chan','make','new','len','cap','append','copy','delete','panic','recover','close','true','false','nil','print','println']),
578
+ rust: new Set(['fn','let','mut','if','else','while','for','loop','match','return','break','continue','use','mod','pub','crate','self','super','impl','trait','struct','enum','type','as','where','async','await','dyn','ref','move','Some','None','Ok','Err','true','false','unsafe','extern','static','const','Box','Vec','String','format!','println!','print!','vec!']),
579
+ java: new Set(['if','else','while','for','do','switch','case','break','continue','return','new','this','super','try','catch','finally','throw','throws','class','interface','enum','extends','implements','public','private','protected','static','final','abstract','synchronized','void','int','long','short','byte','char','boolean','float','double','String','true','false','null','import','package','var']),
580
+ kotlin: new Set(['if','else','for','while','do','when','return','break','continue','fun','val','var','class','object','interface','enum','sealed','data','companion','public','private','internal','protected','open','final','abstract','override','suspend','inline','crossinline','noinline','this','super','it','true','false','null']),
581
+ swift: new Set(['if','else','for','in','while','repeat','do','switch','case','break','continue','return','throw','throws','try','catch','rethrows','defer','guard','where','as','is','let','var','func','class','struct','enum','protocol','extension','import','self','super','init','deinit','static','final','public','private','internal','open','fileprivate','true','false','nil','some','any','Self','Optional','print','String','Int','Bool','Double','Float','Array','Dictionary']),
582
+ }
583
+
584
+ function makeResolver(fileId, index) {
585
+ // Two modes. `forCall=true` permits the loose "any same-named
586
+ // symbol" fallback — the `foo()` syntax is a strong-enough hint
587
+ // that the name is a real call target, and skipping the fallback
588
+ // misses too many cross-file calls. `forCall=false` (default,
589
+ // for plain identifier references) stays strict — same file or
590
+ // a file the caller actually imports — so local variables that
591
+ // happen to share a name with some unrelated function elsewhere
592
+ // don't produce a noise edge.
593
+ return function resolve(name, { forCall = false } = {}) {
594
+ return index.resolveCall
595
+ ? index.resolveCall(fileId, name, { allowAny: forCall })
596
+ : null
597
+ }
598
+ }
599
+
600
+ // Public per-extension wrapper used by symbol-graph's parser registry.
601
+ function makeParser(ext) {
602
+ const cfg = LANG_CONFIG[ext]
603
+ if (!cfg) return null
604
+ const lang = cfg.grammar
605
+ const types = NODE_TYPES[lang]
606
+ const kwSet = KEYWORDS[lang]
607
+
608
+ return {
609
+ async extractSymbolsAsync(content, fileId) {
610
+ try {
611
+ const parser = await parserFor(lang)
612
+ const tree = parser.parse(content)
613
+ const ctx = {
614
+ fileId, content, types, lang,
615
+ symbols: [], classStack: [], fnStack: [],
616
+ passTwo: false,
617
+ }
618
+ walk(tree.rootNode, ctx)
619
+ tree.delete?.()
620
+ return ctx.symbols
621
+ } catch (e) { return [] }
622
+ },
623
+ async extractReferencesAsync(content, fileId, index) {
624
+ try {
625
+ const parser = await parserFor(lang)
626
+ const tree = parser.parse(content)
627
+ const ctx = {
628
+ fileId, content, types, lang,
629
+ symbols: [], classStack: [], fnStack: [],
630
+ edges: [], seen: new Set(),
631
+ kwSet,
632
+ resolve: makeResolver(fileId, index),
633
+ passTwo: true,
634
+ }
635
+ walk(tree.rootNode, ctx)
636
+ tree.delete?.()
637
+ return ctx.edges
638
+ } catch (e) { return [] }
639
+ },
640
+ // Sync stubs — registry expects sync extractSymbols/extractReferences
641
+ // but SymbolGraph.build() can also await them since it's already async.
642
+ extractSymbols(content, fileId) { return this.extractSymbolsAsync(content, fileId) },
643
+ extractReferences(content, fileId, index) { return this.extractReferencesAsync(content, fileId, index) },
644
+ }
645
+ }
646
+
647
+ // Probe which grammars actually ship in tree-sitter-wasms (some are
648
+ // optional). Returns the list of extensions whose wasm exists.
649
+ function availableExtensions() {
650
+ const out = []
651
+ for (const ext of Object.keys(LANG_CONFIG)) {
652
+ const grammar = LANG_CONFIG[ext].grammar
653
+ if (fs.existsSync(wasmPath(grammar))) out.push(ext)
654
+ }
655
+ return out
656
+ }
657
+
658
+ module.exports = { makeParser, availableExtensions, LANG_CONFIG }