codesynapt 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/LICENSE +686 -0
- package/LICENSES.md +141 -0
- package/README.md +331 -0
- package/electron/main.cjs +2849 -0
- package/electron/plugin-loader.cjs +184 -0
- package/electron/preload.cjs +108 -0
- package/package.json +216 -0
- package/packages/core/bin/codesynapt-mcp.cjs +611 -0
- package/packages/core/bin/codesynapt.cjs +1933 -0
- package/packages/core/legacy.js +300 -0
- package/packages/core/lib/control-server.cjs +1539 -0
- package/packages/core/lib/embedding.cjs +89 -0
- package/packages/core/lib/logger.cjs +63 -0
- package/packages/core/lib/search-cache.cjs +140 -0
- package/packages/core/lib/search-worker.cjs +255 -0
- package/packages/core/lib/search.cjs +211 -0
- package/packages/core/lib/symbol-graph.cjs +402 -0
- package/packages/core/lib/symbol-parser-js.cjs +542 -0
- package/packages/core/lib/symbol-parser-misc.cjs +394 -0
- package/packages/core/lib/symbol-parser-py.cjs +215 -0
- package/packages/core/lib/symbol-parser-treesitter.cjs +658 -0
- package/packages/core/lib/symbol-parser-tsc.cjs +332 -0
- package/packages/core/monorepo.js +310 -0
- package/packages/core/parser.js +2234 -0
- package/packages/core/scanner.js +623 -0
- package/plugin-api/LICENSE +21 -0
- package/plugin-api/README.md +114 -0
- package/plugin-api/docs/01-getting-started.md +197 -0
- package/plugin-api/docs/02-concepts.md +269 -0
- package/plugin-api/docs/api-reference.md +463 -0
- package/plugin-api/docs/troubleshooting.md +332 -0
- package/plugin-api/docs/types/exporter.md +377 -0
- package/plugin-api/docs/types/theme.md +312 -0
- package/plugin-api/examples/hello-world-plugin/README.md +70 -0
- package/plugin-api/examples/hello-world-plugin/main.js +36 -0
- package/plugin-api/examples/hello-world-plugin/manifest.json +12 -0
- package/plugin-api/examples/mermaid-exporter/README.md +125 -0
- package/plugin-api/examples/mermaid-exporter/main.js +58 -0
- package/plugin-api/examples/mermaid-exporter/manifest.json +12 -0
- package/plugin-api/examples/rust-parser/README.md +71 -0
- package/plugin-api/examples/rust-parser/main.js +123 -0
- package/plugin-api/examples/rust-parser/manifest.json +12 -0
- package/plugin-api/examples/sunset-theme/README.md +95 -0
- package/plugin-api/examples/sunset-theme/manifest.json +12 -0
- package/plugin-api/examples/sunset-theme/theme.css +31 -0
- package/plugin-api/package.json +20 -0
- package/plugin-api/types.d.ts +395 -0
- package/public/app.js +6837 -0
- package/public/backend.js +285 -0
- package/public/index.html +647 -0
- package/public/plugin-host.js +321 -0
- package/public/style.css +4359 -0
- package/public/vendor/three.module.js +53044 -0
- package/scripts/competitor-watch.mjs +144 -0
- package/scripts/copy-vendor.js +21 -0
- package/scripts/download-bundled-node.cjs +53 -0
- package/scripts/fuses-after-pack.cjs +34 -0
- package/scripts/license-check.js +119 -0
- package/scripts/perf-test.js +200 -0
- package/server.js +132 -0
|
@@ -0,0 +1,658 @@
|
|
|
1
|
+
// Tree-sitter symbol parsers — exact AST instead of regex.
|
|
2
|
+
// Reaches parity with codegraph for the supported languages.
|
|
3
|
+
//
|
|
4
|
+
// One generic walker; per-language config tells it which AST node types
|
|
5
|
+
// represent functions / classes / methods / calls, and what field gives
|
|
6
|
+
// the symbol name. Resolver is the same name-based lookup the other
|
|
7
|
+
// parsers use (file-mode imports get folded in later by Stage 3.5).
|
|
8
|
+
//
|
|
9
|
+
// Loaded lazily on first /symbol/scan after a project loads; per-
|
|
10
|
+
// language Parser instances are cached across scans.
|
|
11
|
+
|
|
12
|
+
'use strict'
|
|
13
|
+
|
|
14
|
+
const fs = require('fs')
|
|
15
|
+
const path = require('path')
|
|
16
|
+
|
|
17
|
+
let _Parser = null // web-tree-sitter Parser class (after init)
|
|
18
|
+
let _initPromise = null
|
|
19
|
+
|
|
20
|
+
async function getParser() {
|
|
21
|
+
if (_Parser) return _Parser
|
|
22
|
+
if (!_initPromise) {
|
|
23
|
+
_initPromise = (async () => {
|
|
24
|
+
const Parser = require('web-tree-sitter')
|
|
25
|
+
await Parser.init()
|
|
26
|
+
_Parser = Parser
|
|
27
|
+
return Parser
|
|
28
|
+
})()
|
|
29
|
+
}
|
|
30
|
+
return _initPromise
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const WASM_DIR = path.join(__dirname, '..', '..', '..', 'node_modules', 'tree-sitter-wasms', 'out')
|
|
34
|
+
function wasmPath(name) { return path.join(WASM_DIR, `tree-sitter-${name}.wasm`) }
|
|
35
|
+
|
|
36
|
+
// Per-language config. Keys are CodeSynapt file extensions; value is
|
|
37
|
+
// the grammar wasm name + the AST node types we care about.
|
|
38
|
+
const LANG_CONFIG = {
|
|
39
|
+
js: { grammar: 'javascript' },
|
|
40
|
+
jsx: { grammar: 'javascript' },
|
|
41
|
+
mjs: { grammar: 'javascript' },
|
|
42
|
+
cjs: { grammar: 'javascript' },
|
|
43
|
+
// TypeScript wasms aren't in tree-sitter-wasms@0.1.13; fall back to
|
|
44
|
+
// the JS grammar (works for type annotations as stripped syntax).
|
|
45
|
+
ts: { grammar: 'javascript' },
|
|
46
|
+
tsx: { grammar: 'javascript' },
|
|
47
|
+
py: { grammar: 'python' },
|
|
48
|
+
go: { grammar: 'go' },
|
|
49
|
+
rs: { grammar: 'rust' },
|
|
50
|
+
java: { grammar: 'java' },
|
|
51
|
+
kt: { grammar: 'kotlin' },
|
|
52
|
+
kts: { grammar: 'kotlin' },
|
|
53
|
+
swift: { grammar: 'swift' },
|
|
54
|
+
// Phase B-3 — wider language reach. tree-sitter-wasms@0.1.13 ships
|
|
55
|
+
// grammars for all of these; we just register them.
|
|
56
|
+
cs: { grammar: 'c_sharp' },
|
|
57
|
+
rb: { grammar: 'ruby' },
|
|
58
|
+
php: { grammar: 'php' },
|
|
59
|
+
scala: { grammar: 'scala' },
|
|
60
|
+
lua: { grammar: 'lua' },
|
|
61
|
+
sh: { grammar: 'bash' },
|
|
62
|
+
bash: { grammar: 'bash' },
|
|
63
|
+
dart: { grammar: 'dart' },
|
|
64
|
+
elm: { grammar: 'elm' },
|
|
65
|
+
ex: { grammar: 'elixir' },
|
|
66
|
+
exs: { grammar: 'elixir' },
|
|
67
|
+
c: { grammar: 'c' },
|
|
68
|
+
h: { grammar: 'c' },
|
|
69
|
+
cpp: { grammar: 'cpp' },
|
|
70
|
+
cc: { grammar: 'cpp' },
|
|
71
|
+
hpp: { grammar: 'cpp' },
|
|
72
|
+
hh: { grammar: 'cpp' },
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Node types per grammar.
|
|
76
|
+
const NODE_TYPES = {
|
|
77
|
+
javascript: {
|
|
78
|
+
fn: ['function_declaration', 'function', 'arrow_function', 'generator_function_declaration'],
|
|
79
|
+
method: ['method_definition'],
|
|
80
|
+
cls: ['class_declaration'],
|
|
81
|
+
call: ['call_expression', 'new_expression'],
|
|
82
|
+
},
|
|
83
|
+
python: {
|
|
84
|
+
fn: ['function_definition'],
|
|
85
|
+
cls: ['class_definition'],
|
|
86
|
+
call: ['call'],
|
|
87
|
+
},
|
|
88
|
+
go: {
|
|
89
|
+
fn: ['function_declaration'],
|
|
90
|
+
method: ['method_declaration'],
|
|
91
|
+
// Use `type_spec` (the actual name+kind carrier), not its outer
|
|
92
|
+
// `type_declaration` wrapper. nameOf() can't extract a name from
|
|
93
|
+
// the wrapper (its only named child is the type_spec), so before
|
|
94
|
+
// this change every Go file produced zero struct/interface symbols
|
|
95
|
+
// and gin showed `byKind: { function: 1311 }` — no methods, no
|
|
96
|
+
// structs.
|
|
97
|
+
cls: ['type_spec'],
|
|
98
|
+
call: ['call_expression'],
|
|
99
|
+
},
|
|
100
|
+
rust: {
|
|
101
|
+
fn: ['function_item'],
|
|
102
|
+
cls: ['struct_item', 'enum_item', 'trait_item'],
|
|
103
|
+
impl: ['impl_item'],
|
|
104
|
+
call: ['call_expression', 'macro_invocation'],
|
|
105
|
+
},
|
|
106
|
+
java: {
|
|
107
|
+
fn: ['method_declaration', 'constructor_declaration'],
|
|
108
|
+
cls: ['class_declaration', 'interface_declaration', 'record_declaration', 'enum_declaration'],
|
|
109
|
+
call: ['method_invocation', 'object_creation_expression'],
|
|
110
|
+
},
|
|
111
|
+
kotlin: {
|
|
112
|
+
// Includes secondary constructors, property accessors (getter/setter
|
|
113
|
+
// bodies), and anonymous-initializer bodies — all of which contain
|
|
114
|
+
// call sites we were missing before.
|
|
115
|
+
fn: ['function_declaration', 'secondary_constructor',
|
|
116
|
+
'getter', 'setter', 'anonymous_initializer'],
|
|
117
|
+
cls: ['class_declaration', 'object_declaration', 'interface_declaration'],
|
|
118
|
+
call: ['call_expression', 'infix_expression'],
|
|
119
|
+
},
|
|
120
|
+
swift: {
|
|
121
|
+
// tree-sitter-swift already maps `extension X { ... }` to a
|
|
122
|
+
// `class_declaration` node (with a `user_type` wrapper for the
|
|
123
|
+
// target type), so we don't enroll extension_declaration
|
|
124
|
+
// separately — it would double-count and break enclosing scopes.
|
|
125
|
+
fn: ['function_declaration', 'init_declaration', 'deinit_declaration',
|
|
126
|
+
'subscript_declaration', 'computed_property'],
|
|
127
|
+
cls: ['class_declaration', 'protocol_declaration'],
|
|
128
|
+
call: ['call_expression'],
|
|
129
|
+
},
|
|
130
|
+
c_sharp: {
|
|
131
|
+
fn: ['method_declaration', 'constructor_declaration', 'local_function_statement'],
|
|
132
|
+
cls: ['class_declaration', 'interface_declaration', 'struct_declaration', 'record_declaration', 'enum_declaration'],
|
|
133
|
+
call: ['invocation_expression', 'object_creation_expression'],
|
|
134
|
+
},
|
|
135
|
+
ruby: {
|
|
136
|
+
fn: ['method', 'singleton_method'],
|
|
137
|
+
cls: ['class', 'module'],
|
|
138
|
+
call: ['call', 'method_call', 'identifier'], // Ruby calls often look like identifiers
|
|
139
|
+
},
|
|
140
|
+
php: {
|
|
141
|
+
fn: ['function_definition', 'method_declaration'],
|
|
142
|
+
cls: ['class_declaration', 'interface_declaration', 'trait_declaration'],
|
|
143
|
+
call: ['function_call_expression', 'method_call_expression', 'object_creation_expression'],
|
|
144
|
+
},
|
|
145
|
+
scala: {
|
|
146
|
+
fn: ['function_definition', 'function_declaration'],
|
|
147
|
+
cls: ['class_definition', 'object_definition', 'trait_definition', 'enum_definition'],
|
|
148
|
+
call: ['call_expression', 'generic_function'],
|
|
149
|
+
},
|
|
150
|
+
lua: {
|
|
151
|
+
fn: ['function_declaration', 'function_definition', 'local_function'],
|
|
152
|
+
cls: [], // Lua has no classes (table-based OOP)
|
|
153
|
+
call: ['function_call'],
|
|
154
|
+
},
|
|
155
|
+
bash: {
|
|
156
|
+
fn: ['function_definition'],
|
|
157
|
+
cls: [],
|
|
158
|
+
call: ['command'],
|
|
159
|
+
},
|
|
160
|
+
dart: {
|
|
161
|
+
fn: ['function_signature', 'function_body'],
|
|
162
|
+
cls: ['class_definition', 'mixin_declaration', 'extension_declaration'],
|
|
163
|
+
call: ['method_invocation'],
|
|
164
|
+
},
|
|
165
|
+
elm: {
|
|
166
|
+
fn: ['function_declaration_left'],
|
|
167
|
+
cls: ['type_declaration', 'type_alias_declaration'],
|
|
168
|
+
call: ['function_call_expr'],
|
|
169
|
+
},
|
|
170
|
+
elixir: {
|
|
171
|
+
fn: ['call'], // Elixir uses macros for `def`
|
|
172
|
+
cls: [],
|
|
173
|
+
call: ['call'],
|
|
174
|
+
},
|
|
175
|
+
c: {
|
|
176
|
+
fn: ['function_definition'],
|
|
177
|
+
cls: ['struct_specifier', 'union_specifier', 'enum_specifier'],
|
|
178
|
+
call: ['call_expression'],
|
|
179
|
+
},
|
|
180
|
+
cpp: {
|
|
181
|
+
fn: ['function_definition', 'declaration'],
|
|
182
|
+
cls: ['class_specifier', 'struct_specifier'],
|
|
183
|
+
call: ['call_expression'],
|
|
184
|
+
},
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Cache: grammar name → loaded Parser.Language (web-tree-sitter)
|
|
188
|
+
const _langCache = new Map()
|
|
189
|
+
async function loadLang(grammar) {
|
|
190
|
+
if (_langCache.has(grammar)) return _langCache.get(grammar)
|
|
191
|
+
const Parser = await getParser()
|
|
192
|
+
const buf = fs.readFileSync(wasmPath(grammar))
|
|
193
|
+
const Lang = await Parser.Language.load(buf)
|
|
194
|
+
_langCache.set(grammar, Lang)
|
|
195
|
+
return Lang
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Cache: grammar name → Parser instance (Parser instances are stateful
|
|
199
|
+
// but cheap to reuse since we always call setLanguage anyway).
|
|
200
|
+
const _parserCache = new Map()
|
|
201
|
+
async function parserFor(grammar) {
|
|
202
|
+
if (_parserCache.has(grammar)) return _parserCache.get(grammar)
|
|
203
|
+
const Parser = await getParser()
|
|
204
|
+
const p = new Parser()
|
|
205
|
+
p.setLanguage(await loadLang(grammar))
|
|
206
|
+
_parserCache.set(grammar, p)
|
|
207
|
+
return p
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function mkId(file, name, line) { return `${file}#${name}@${line}` }
|
|
211
|
+
|
|
212
|
+
function nameOf(node) {
|
|
213
|
+
// Try standard field first (Go, JS, Java have it). Kotlin & Swift
|
|
214
|
+
// function_declaration doesn't expose a `name` field — the function
|
|
215
|
+
// identifier lands as a direct `simple_identifier` child instead,
|
|
216
|
+
// so we walk children for any identifier-shaped node as a fallback.
|
|
217
|
+
// For Swift `extension X { … }` the type lands inside `user_type`
|
|
218
|
+
// (a one-level wrapper around `type_identifier`); we peek through.
|
|
219
|
+
const named = node.childForFieldName?.('name')
|
|
220
|
+
if (named) return named.text
|
|
221
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
222
|
+
const c = node.child(i)
|
|
223
|
+
if (c.type === 'identifier'
|
|
224
|
+
|| c.type === 'simple_identifier'
|
|
225
|
+
|| c.type === 'type_identifier'
|
|
226
|
+
|| c.type === 'field_identifier'
|
|
227
|
+
|| c.type === 'property_identifier') return c.text
|
|
228
|
+
if (c.type === 'user_type') {
|
|
229
|
+
// user_type → type_identifier (Swift extension's target type).
|
|
230
|
+
for (let j = 0; j < c.childCount; j++) {
|
|
231
|
+
const g = c.child(j)
|
|
232
|
+
if (g.type === 'type_identifier' || g.type === 'simple_identifier') return g.text
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
return null
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
function signatureOf(node, content, maxLen = 200) {
|
|
240
|
+
if (!node) return ''
|
|
241
|
+
const start = node.startIndex ?? 0
|
|
242
|
+
let end = content.indexOf('{', start)
|
|
243
|
+
if (end < 0 || end - start > maxLen) end = start + maxLen
|
|
244
|
+
return content.slice(start, end).trim().replace(/\s+/g, ' ')
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
function docOf(node, content) {
|
|
248
|
+
// Walk backwards through siblings; collect line comments / block
|
|
249
|
+
// comments directly above `node`.
|
|
250
|
+
let prev = node.previousSibling
|
|
251
|
+
const blocks = []
|
|
252
|
+
while (prev && (prev.type === 'comment' || prev.type === 'line_comment' || prev.type === 'block_comment')) {
|
|
253
|
+
blocks.unshift(prev.text)
|
|
254
|
+
prev = prev.previousSibling
|
|
255
|
+
}
|
|
256
|
+
if (!blocks.length) return ''
|
|
257
|
+
return blocks.join(' ').replace(/^\s*[/*#]+/gm, '').replace(/\s+/g, ' ').trim().slice(0, 400)
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Generic walker. Tracks the enclosing class/impl for method
|
|
261
|
+
// qualification and the enclosing function for call attribution.
|
|
262
|
+
function walk(node, ctx) {
|
|
263
|
+
if (!node) return
|
|
264
|
+
const t = node.type
|
|
265
|
+
const cfg = ctx.types
|
|
266
|
+
let pushedFn = false, pushedCls = false, pushedImpl = false
|
|
267
|
+
|
|
268
|
+
// Class-like declarations
|
|
269
|
+
if (cfg.cls?.includes(t)) {
|
|
270
|
+
const name = nameOf(node)
|
|
271
|
+
if (name) {
|
|
272
|
+
const sym = {
|
|
273
|
+
id: mkId(ctx.fileId, name, node.startPosition.row + 1),
|
|
274
|
+
name,
|
|
275
|
+
qualifiedName: name,
|
|
276
|
+
kind: classKind(t, node),
|
|
277
|
+
file: ctx.fileId,
|
|
278
|
+
startLine: node.startPosition.row + 1,
|
|
279
|
+
endLine: node.endPosition.row + 1,
|
|
280
|
+
signature: signatureOf(node, ctx.content),
|
|
281
|
+
doc: docOf(node, ctx.content),
|
|
282
|
+
exported: isExported(node, ctx.content),
|
|
283
|
+
}
|
|
284
|
+
ctx.symbols.push(sym)
|
|
285
|
+
ctx.classStack.push({ name, sym })
|
|
286
|
+
pushedCls = true
|
|
287
|
+
// Inheritance edges (pass 2 only — we need every symbol indexed
|
|
288
|
+
// first before we can resolve the parent name).
|
|
289
|
+
if (ctx.passTwo) {
|
|
290
|
+
const supers = extractInheritance(node, ctx.lang)
|
|
291
|
+
for (const { name: parentName, kind } of supers) {
|
|
292
|
+
const target = ctx.resolve(parentName, { forCall: true })
|
|
293
|
+
if (!target || target.id === sym.id) continue
|
|
294
|
+
const key = sym.id + '|' + target.id + '|' + kind
|
|
295
|
+
if (ctx.seen.has(key)) continue
|
|
296
|
+
ctx.seen.add(key)
|
|
297
|
+
ctx.edges.push({
|
|
298
|
+
source: sym.id, target: target.id, kind,
|
|
299
|
+
line: node.startPosition.row + 1,
|
|
300
|
+
})
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
// Rust impl blocks — track the target type so methods get qualified
|
|
306
|
+
else if (cfg.impl?.includes(t)) {
|
|
307
|
+
const targetType = node.childForFieldName?.('type')?.text
|
|
308
|
+
|| node.children.find((c) => c.type === 'type_identifier')?.text
|
|
309
|
+
if (targetType) {
|
|
310
|
+
ctx.classStack.push({ name: targetType, sym: null })
|
|
311
|
+
pushedImpl = true
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
// Function/method declarations
|
|
315
|
+
else if (cfg.fn?.includes(t) || cfg.method?.includes(t)) {
|
|
316
|
+
const name = nameOf(node)
|
|
317
|
+
if (name) {
|
|
318
|
+
// Go methods carry their receiver type in a `receiver` field
|
|
319
|
+
// rather than being lexically nested inside the type — without
|
|
320
|
+
// this, `func (e *Engine) handleHTTPRequest()` shows up as a
|
|
321
|
+
// bare function with no link back to Engine.
|
|
322
|
+
let methodOwner = null
|
|
323
|
+
if (ctx.lang === 'go' && cfg.method?.includes(t)) {
|
|
324
|
+
methodOwner = extractGoReceiver(node)
|
|
325
|
+
}
|
|
326
|
+
const cls = ctx.classStack[ctx.classStack.length - 1]
|
|
327
|
+
const lexicallyMethod = !!cls && (cfg.method?.includes(t)
|
|
328
|
+
|| ctx.lang === 'python' || ctx.lang === 'kotlin'
|
|
329
|
+
|| ctx.lang === 'swift' || ctx.lang === 'rust')
|
|
330
|
+
const isMethod = !!methodOwner || lexicallyMethod
|
|
331
|
+
const qn = methodOwner ? `${methodOwner}.${name}`
|
|
332
|
+
: (lexicallyMethod ? `${cls.name}.${name}` : name)
|
|
333
|
+
const sym = {
|
|
334
|
+
id: mkId(ctx.fileId, qn, node.startPosition.row + 1),
|
|
335
|
+
name,
|
|
336
|
+
qualifiedName: qn,
|
|
337
|
+
kind: isMethod ? 'method' : 'function',
|
|
338
|
+
file: ctx.fileId,
|
|
339
|
+
startLine: node.startPosition.row + 1,
|
|
340
|
+
endLine: node.endPosition.row + 1,
|
|
341
|
+
signature: signatureOf(node, ctx.content),
|
|
342
|
+
doc: docOf(node, ctx.content),
|
|
343
|
+
exported: isExported(node, ctx.content),
|
|
344
|
+
}
|
|
345
|
+
ctx.symbols.push(sym)
|
|
346
|
+
ctx.fnStack.push(sym.id)
|
|
347
|
+
pushedFn = true
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
// Call expressions (pass 2 only — checked via ctx.passTwo flag)
|
|
351
|
+
if (ctx.passTwo && cfg.call?.includes(t)) {
|
|
352
|
+
const src = ctx.fnStack[ctx.fnStack.length - 1]
|
|
353
|
+
if (src) {
|
|
354
|
+
const calleeName = extractCalleeName(node)
|
|
355
|
+
if (calleeName && !ctx.kwSet?.has(calleeName)) {
|
|
356
|
+
// Use the loose any-file fallback for calls (`foo()` is a
|
|
357
|
+
// strong signal); references below stay strict.
|
|
358
|
+
const target = ctx.resolve(calleeName, { forCall: true })
|
|
359
|
+
if (target && target.id !== src) {
|
|
360
|
+
const key = src + '|' + target.id + '|call'
|
|
361
|
+
if (!ctx.seen.has(key)) {
|
|
362
|
+
ctx.seen.add(key)
|
|
363
|
+
ctx.edges.push({
|
|
364
|
+
source: src, target: target.id, kind: 'call',
|
|
365
|
+
line: node.startPosition.row + 1,
|
|
366
|
+
})
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
// Plain identifier references (Phase 2-extra). Type identifiers
|
|
373
|
+
// get a separate `type-ref` kind so explore can prefer `call`/`ref`
|
|
374
|
+
// when picking entry points and type annotations don't dominate
|
|
375
|
+
// the edge count.
|
|
376
|
+
if (ctx.passTwo
|
|
377
|
+
&& (t === 'identifier' || t === 'simple_identifier' || t === 'type_identifier' || t === 'field_identifier')) {
|
|
378
|
+
const src = ctx.fnStack[ctx.fnStack.length - 1]
|
|
379
|
+
if (src) {
|
|
380
|
+
const name = node.text
|
|
381
|
+
if (name && !ctx.kwSet?.has(name) && name.length > 1) {
|
|
382
|
+
// Skip if parent is a declaration node that owns this identifier
|
|
383
|
+
const parent = node.parent
|
|
384
|
+
const isDeclaration =
|
|
385
|
+
parent && (
|
|
386
|
+
cfg.fn?.includes(parent.type) ||
|
|
387
|
+
cfg.method?.includes(parent.type) ||
|
|
388
|
+
cfg.cls?.includes(parent.type) ||
|
|
389
|
+
parent.type === 'parameter' ||
|
|
390
|
+
parent.type === 'function_value_parameters' ||
|
|
391
|
+
parent.type === 'value_definition' ||
|
|
392
|
+
parent.type === 'simple_value_definition'
|
|
393
|
+
)
|
|
394
|
+
// Skip if parent is a call_expression and we're the callee
|
|
395
|
+
const isCallee = parent && cfg.call?.includes(parent.type)
|
|
396
|
+
if (!isDeclaration && !isCallee) {
|
|
397
|
+
const target = ctx.resolve(name)
|
|
398
|
+
if (target && target.id !== src) {
|
|
399
|
+
const edgeKind = t === 'type_identifier' ? 'type-ref' : 'ref'
|
|
400
|
+
const key = src + '|' + target.id + '|' + edgeKind
|
|
401
|
+
if (!ctx.seen.has(key)) {
|
|
402
|
+
ctx.seen.add(key)
|
|
403
|
+
ctx.edges.push({
|
|
404
|
+
source: src, target: target.id, kind: edgeKind,
|
|
405
|
+
line: node.startPosition.row + 1,
|
|
406
|
+
})
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// Recurse
|
|
415
|
+
for (let i = 0; i < node.childCount; i++) walk(node.child(i), ctx)
|
|
416
|
+
|
|
417
|
+
if (pushedFn) ctx.fnStack.pop()
|
|
418
|
+
if (pushedCls || pushedImpl) ctx.classStack.pop()
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// Pull inheritance targets from a class-like node. Per-language node
|
|
422
|
+
// names vary; this is best-effort and silently skips unknown shapes.
|
|
423
|
+
// Returns [{ name: 'Bar', kind: 'extends' | 'implements' }].
|
|
424
|
+
function extractInheritance(node, lang) {
|
|
425
|
+
const out = []
|
|
426
|
+
const walkType = (n) => {
|
|
427
|
+
if (!n) return null
|
|
428
|
+
if (n.type === 'type_identifier' || n.type === 'identifier' || n.type === 'simple_identifier') return n.text
|
|
429
|
+
// Member / qualified — take last identifier
|
|
430
|
+
for (let i = n.namedChildCount - 1; i >= 0; i--) {
|
|
431
|
+
const r = walkType(n.namedChild(i))
|
|
432
|
+
if (r) return r
|
|
433
|
+
}
|
|
434
|
+
return null
|
|
435
|
+
}
|
|
436
|
+
// Standard fields when grammars expose them
|
|
437
|
+
const superField = node.childForFieldName?.('superclass')
|
|
438
|
+
|| node.childForFieldName?.('parent_class')
|
|
439
|
+
if (superField) {
|
|
440
|
+
const name = walkType(superField)
|
|
441
|
+
if (name) out.push({ name, kind: 'extends' })
|
|
442
|
+
}
|
|
443
|
+
// Walk named children for inheritance-related sub-nodes.
|
|
444
|
+
for (let i = 0; i < node.namedChildCount; i++) {
|
|
445
|
+
const c = node.namedChild(i)
|
|
446
|
+
const ct = c.type
|
|
447
|
+
if (ct === 'superclass' || ct === 'extends_type_clause' || ct === 'class_inheritance_modifiers') {
|
|
448
|
+
const name = walkType(c)
|
|
449
|
+
if (name) out.push({ name, kind: 'extends' })
|
|
450
|
+
} else if (ct === 'super_interfaces' || ct === 'implements_clause' || ct === 'super_interface_specification') {
|
|
451
|
+
// Java/Kotlin — may contain multiple type_identifier children
|
|
452
|
+
for (let j = 0; j < c.namedChildCount; j++) {
|
|
453
|
+
const name = walkType(c.namedChild(j))
|
|
454
|
+
if (name) out.push({ name, kind: 'implements' })
|
|
455
|
+
}
|
|
456
|
+
} else if (ct === 'inheritance_specifier') {
|
|
457
|
+
// Swift — single base type or protocol
|
|
458
|
+
const name = walkType(c)
|
|
459
|
+
if (name) out.push({ name, kind: 'extends' })
|
|
460
|
+
} else if (ct === 'argument_list' && lang === 'python') {
|
|
461
|
+
// Python `class Foo(Bar, Baz):` — base classes as `argument_list`
|
|
462
|
+
for (let j = 0; j < c.namedChildCount; j++) {
|
|
463
|
+
const name = walkType(c.namedChild(j))
|
|
464
|
+
if (name) out.push({ name, kind: 'extends' })
|
|
465
|
+
}
|
|
466
|
+
} else if (ct === 'type_spec_list' && lang === 'go') {
|
|
467
|
+
// Go interface embedding: `type Foo interface { Bar }`
|
|
468
|
+
for (let j = 0; j < c.namedChildCount; j++) {
|
|
469
|
+
const name = walkType(c.namedChild(j))
|
|
470
|
+
if (name) out.push({ name, kind: 'extends' })
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
return out
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// Walk a Go method_declaration's `receiver` field to find the type
|
|
478
|
+
// the method hangs off of. Receivers look like `(e *Engine)` or
|
|
479
|
+
// `(c Context)`; the type can be wrapped in a `pointer_type`.
|
|
480
|
+
function extractGoReceiver(node) {
|
|
481
|
+
const recv = node.childForFieldName?.('receiver')
|
|
482
|
+
if (!recv) return null
|
|
483
|
+
function findType(n) {
|
|
484
|
+
if (!n) return null
|
|
485
|
+
if (n.type === 'type_identifier') return n.text
|
|
486
|
+
for (let i = 0; i < n.namedChildCount; i++) {
|
|
487
|
+
const r = findType(n.namedChild(i))
|
|
488
|
+
if (r) return r
|
|
489
|
+
}
|
|
490
|
+
return null
|
|
491
|
+
}
|
|
492
|
+
return findType(recv)
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
function classKind(nodeType, node = null) {
|
|
496
|
+
if (nodeType.includes('interface')) return 'interface'
|
|
497
|
+
if (nodeType.includes('trait')) return 'interface'
|
|
498
|
+
if (nodeType.includes('protocol')) return 'interface'
|
|
499
|
+
if (nodeType.includes('struct')) return 'struct'
|
|
500
|
+
if (nodeType.includes('enum')) return 'enum'
|
|
501
|
+
if (nodeType.includes('record')) return 'class'
|
|
502
|
+
// Go `type_spec` wraps the actual struct_type / interface_type /
|
|
503
|
+
// map_type / etc — descend one level to recover the real kind.
|
|
504
|
+
// Without this, every `type Foo struct {…}` shows up as kind:'class'.
|
|
505
|
+
if (nodeType === 'type_spec' && node) {
|
|
506
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
507
|
+
const ct = node.child(i).type
|
|
508
|
+
if (ct === 'struct_type') return 'struct'
|
|
509
|
+
if (ct === 'interface_type') return 'interface'
|
|
510
|
+
}
|
|
511
|
+
return 'class'
|
|
512
|
+
}
|
|
513
|
+
return 'class'
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
function isExported(node, content) {
|
|
517
|
+
// Heuristic: any leading 'pub', 'public', 'export' keyword in the
|
|
518
|
+
// first ~120 chars of the node's text. Good enough across languages.
|
|
519
|
+
const head = content.slice(node.startIndex, Math.min(node.startIndex + 120, node.endIndex))
|
|
520
|
+
if (/\b(pub|public|export|open)\b/.test(head)) return true
|
|
521
|
+
// Go: PascalCase identifiers are exported.
|
|
522
|
+
const m = head.match(/\b([A-Za-z_][A-Za-z0-9_]*)\b/)
|
|
523
|
+
if (m && /^[A-Z]/.test(m[1])) return true
|
|
524
|
+
return false
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// Identifier-shaped node names across grammars.
|
|
528
|
+
const IDENT_TYPES = new Set([
|
|
529
|
+
'identifier', 'simple_identifier', 'field_identifier', 'type_identifier',
|
|
530
|
+
'property_identifier', 'shorthand_property_identifier',
|
|
531
|
+
])
|
|
532
|
+
const NAV_TYPES = new Set([
|
|
533
|
+
'member_expression', 'selector_expression', 'field_expression',
|
|
534
|
+
'navigation_expression', 'method_invocation',
|
|
535
|
+
])
|
|
536
|
+
|
|
537
|
+
function lastIdentText(node) {
|
|
538
|
+
// Walk to the rightmost identifier-ish node — `foo.bar.baz` → "baz".
|
|
539
|
+
if (!node) return null
|
|
540
|
+
if (IDENT_TYPES.has(node.type)) return node.text
|
|
541
|
+
for (let i = node.namedChildCount - 1; i >= 0; i--) {
|
|
542
|
+
const t = lastIdentText(node.namedChild(i))
|
|
543
|
+
if (t) return t
|
|
544
|
+
}
|
|
545
|
+
return null
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
function extractCalleeName(callNode) {
|
|
549
|
+
// 1) Try named fields first — Java has `name`, JS has `function`,
|
|
550
|
+
// Python/Ruby have `function`. These are the cleanest path when
|
|
551
|
+
// the grammar provides them.
|
|
552
|
+
const fn = callNode.childForFieldName?.('function')
|
|
553
|
+
|| callNode.childForFieldName?.('name')
|
|
554
|
+
if (fn) {
|
|
555
|
+
if (IDENT_TYPES.has(fn.type)) return fn.text
|
|
556
|
+
if (NAV_TYPES.has(fn.type)) return lastIdentText(fn)
|
|
557
|
+
}
|
|
558
|
+
// 2) Java method_invocation: object.name(args) — `name` is a direct
|
|
559
|
+
// field even when `function` isn't set.
|
|
560
|
+
const j = callNode.childForFieldName?.('name')
|
|
561
|
+
if (j && IDENT_TYPES.has(j.type)) return j.text
|
|
562
|
+
// 3) Fallback — Swift/Kotlin call_expression has no field but its
|
|
563
|
+
// first named child is the callee (simple_identifier or
|
|
564
|
+
// navigation_expression). Walk all named children once.
|
|
565
|
+
for (let i = 0; i < callNode.namedChildCount; i++) {
|
|
566
|
+
const c = callNode.namedChild(i)
|
|
567
|
+
if (IDENT_TYPES.has(c.type)) return c.text
|
|
568
|
+
if (NAV_TYPES.has(c.type)) return lastIdentText(c)
|
|
569
|
+
}
|
|
570
|
+
return null
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
// Keywords to skip when matching call expressions.
|
|
574
|
+
const KEYWORDS = {
|
|
575
|
+
javascript: new Set(['if','else','for','while','return','new','typeof','instanceof','await','async','function','class','const','let','var','true','false','null','undefined','console','require','import','export']),
|
|
576
|
+
python: new Set(['if','elif','else','while','for','return','def','class','import','from','None','True','False','self','print','len','range','int','str','float','bool','list','dict','set','tuple','isinstance','type','super','open','sorted','enumerate','zip','map','filter','any','all','sum','min','max','abs','round','getattr','setattr','hasattr','format','repr','hash','id','object','property','staticmethod','classmethod']),
|
|
577
|
+
go: new Set(['if','else','for','range','return','break','continue','switch','case','func','type','var','const','package','import','interface','struct','map','chan','make','new','len','cap','append','copy','delete','panic','recover','close','true','false','nil','print','println']),
|
|
578
|
+
rust: new Set(['fn','let','mut','if','else','while','for','loop','match','return','break','continue','use','mod','pub','crate','self','super','impl','trait','struct','enum','type','as','where','async','await','dyn','ref','move','Some','None','Ok','Err','true','false','unsafe','extern','static','const','Box','Vec','String','format!','println!','print!','vec!']),
|
|
579
|
+
java: new Set(['if','else','while','for','do','switch','case','break','continue','return','new','this','super','try','catch','finally','throw','throws','class','interface','enum','extends','implements','public','private','protected','static','final','abstract','synchronized','void','int','long','short','byte','char','boolean','float','double','String','true','false','null','import','package','var']),
|
|
580
|
+
kotlin: new Set(['if','else','for','while','do','when','return','break','continue','fun','val','var','class','object','interface','enum','sealed','data','companion','public','private','internal','protected','open','final','abstract','override','suspend','inline','crossinline','noinline','this','super','it','true','false','null']),
|
|
581
|
+
swift: new Set(['if','else','for','in','while','repeat','do','switch','case','break','continue','return','throw','throws','try','catch','rethrows','defer','guard','where','as','is','let','var','func','class','struct','enum','protocol','extension','import','self','super','init','deinit','static','final','public','private','internal','open','fileprivate','true','false','nil','some','any','Self','Optional','print','String','Int','Bool','Double','Float','Array','Dictionary']),
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
function makeResolver(fileId, index) {
|
|
585
|
+
// Two modes. `forCall=true` permits the loose "any same-named
|
|
586
|
+
// symbol" fallback — the `foo()` syntax is a strong-enough hint
|
|
587
|
+
// that the name is a real call target, and skipping the fallback
|
|
588
|
+
// misses too many cross-file calls. `forCall=false` (default,
|
|
589
|
+
// for plain identifier references) stays strict — same file or
|
|
590
|
+
// a file the caller actually imports — so local variables that
|
|
591
|
+
// happen to share a name with some unrelated function elsewhere
|
|
592
|
+
// don't produce a noise edge.
|
|
593
|
+
return function resolve(name, { forCall = false } = {}) {
|
|
594
|
+
return index.resolveCall
|
|
595
|
+
? index.resolveCall(fileId, name, { allowAny: forCall })
|
|
596
|
+
: null
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
// Public per-extension wrapper used by symbol-graph's parser registry.
|
|
601
|
+
function makeParser(ext) {
|
|
602
|
+
const cfg = LANG_CONFIG[ext]
|
|
603
|
+
if (!cfg) return null
|
|
604
|
+
const lang = cfg.grammar
|
|
605
|
+
const types = NODE_TYPES[lang]
|
|
606
|
+
const kwSet = KEYWORDS[lang]
|
|
607
|
+
|
|
608
|
+
return {
|
|
609
|
+
async extractSymbolsAsync(content, fileId) {
|
|
610
|
+
try {
|
|
611
|
+
const parser = await parserFor(lang)
|
|
612
|
+
const tree = parser.parse(content)
|
|
613
|
+
const ctx = {
|
|
614
|
+
fileId, content, types, lang,
|
|
615
|
+
symbols: [], classStack: [], fnStack: [],
|
|
616
|
+
passTwo: false,
|
|
617
|
+
}
|
|
618
|
+
walk(tree.rootNode, ctx)
|
|
619
|
+
tree.delete?.()
|
|
620
|
+
return ctx.symbols
|
|
621
|
+
} catch (e) { return [] }
|
|
622
|
+
},
|
|
623
|
+
async extractReferencesAsync(content, fileId, index) {
|
|
624
|
+
try {
|
|
625
|
+
const parser = await parserFor(lang)
|
|
626
|
+
const tree = parser.parse(content)
|
|
627
|
+
const ctx = {
|
|
628
|
+
fileId, content, types, lang,
|
|
629
|
+
symbols: [], classStack: [], fnStack: [],
|
|
630
|
+
edges: [], seen: new Set(),
|
|
631
|
+
kwSet,
|
|
632
|
+
resolve: makeResolver(fileId, index),
|
|
633
|
+
passTwo: true,
|
|
634
|
+
}
|
|
635
|
+
walk(tree.rootNode, ctx)
|
|
636
|
+
tree.delete?.()
|
|
637
|
+
return ctx.edges
|
|
638
|
+
} catch (e) { return [] }
|
|
639
|
+
},
|
|
640
|
+
// Sync stubs — registry expects sync extractSymbols/extractReferences
|
|
641
|
+
// but SymbolGraph.build() can also await them since it's already async.
|
|
642
|
+
extractSymbols(content, fileId) { return this.extractSymbolsAsync(content, fileId) },
|
|
643
|
+
extractReferences(content, fileId, index) { return this.extractReferencesAsync(content, fileId, index) },
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
// Probe which grammars actually ship in tree-sitter-wasms (some are
|
|
648
|
+
// optional). Returns the list of extensions whose wasm exists.
|
|
649
|
+
function availableExtensions() {
|
|
650
|
+
const out = []
|
|
651
|
+
for (const ext of Object.keys(LANG_CONFIG)) {
|
|
652
|
+
const grammar = LANG_CONFIG[ext].grammar
|
|
653
|
+
if (fs.existsSync(wasmPath(grammar))) out.push(ext)
|
|
654
|
+
}
|
|
655
|
+
return out
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
module.exports = { makeParser, availableExtensions, LANG_CONFIG }
|