codesynapt 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/LICENSE +686 -0
- package/LICENSES.md +141 -0
- package/README.md +331 -0
- package/electron/main.cjs +2849 -0
- package/electron/plugin-loader.cjs +184 -0
- package/electron/preload.cjs +108 -0
- package/package.json +216 -0
- package/packages/core/bin/codesynapt-mcp.cjs +611 -0
- package/packages/core/bin/codesynapt.cjs +1933 -0
- package/packages/core/legacy.js +300 -0
- package/packages/core/lib/control-server.cjs +1539 -0
- package/packages/core/lib/embedding.cjs +89 -0
- package/packages/core/lib/logger.cjs +63 -0
- package/packages/core/lib/search-cache.cjs +140 -0
- package/packages/core/lib/search-worker.cjs +255 -0
- package/packages/core/lib/search.cjs +211 -0
- package/packages/core/lib/symbol-graph.cjs +402 -0
- package/packages/core/lib/symbol-parser-js.cjs +542 -0
- package/packages/core/lib/symbol-parser-misc.cjs +394 -0
- package/packages/core/lib/symbol-parser-py.cjs +215 -0
- package/packages/core/lib/symbol-parser-treesitter.cjs +658 -0
- package/packages/core/lib/symbol-parser-tsc.cjs +332 -0
- package/packages/core/monorepo.js +310 -0
- package/packages/core/parser.js +2234 -0
- package/packages/core/scanner.js +623 -0
- package/plugin-api/LICENSE +21 -0
- package/plugin-api/README.md +114 -0
- package/plugin-api/docs/01-getting-started.md +197 -0
- package/plugin-api/docs/02-concepts.md +269 -0
- package/plugin-api/docs/api-reference.md +463 -0
- package/plugin-api/docs/troubleshooting.md +332 -0
- package/plugin-api/docs/types/exporter.md +377 -0
- package/plugin-api/docs/types/theme.md +312 -0
- package/plugin-api/examples/hello-world-plugin/README.md +70 -0
- package/plugin-api/examples/hello-world-plugin/main.js +36 -0
- package/plugin-api/examples/hello-world-plugin/manifest.json +12 -0
- package/plugin-api/examples/mermaid-exporter/README.md +125 -0
- package/plugin-api/examples/mermaid-exporter/main.js +58 -0
- package/plugin-api/examples/mermaid-exporter/manifest.json +12 -0
- package/plugin-api/examples/rust-parser/README.md +71 -0
- package/plugin-api/examples/rust-parser/main.js +123 -0
- package/plugin-api/examples/rust-parser/manifest.json +12 -0
- package/plugin-api/examples/sunset-theme/README.md +95 -0
- package/plugin-api/examples/sunset-theme/manifest.json +12 -0
- package/plugin-api/examples/sunset-theme/theme.css +31 -0
- package/plugin-api/package.json +20 -0
- package/plugin-api/types.d.ts +395 -0
- package/public/app.js +6837 -0
- package/public/backend.js +285 -0
- package/public/index.html +647 -0
- package/public/plugin-host.js +321 -0
- package/public/style.css +4359 -0
- package/public/vendor/three.module.js +53044 -0
- package/scripts/competitor-watch.mjs +144 -0
- package/scripts/copy-vendor.js +21 -0
- package/scripts/download-bundled-node.cjs +53 -0
- package/scripts/fuses-after-pack.cjs +34 -0
- package/scripts/license-check.js +119 -0
- package/scripts/perf-test.js +200 -0
- package/server.js +132 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
// search.cjs — full-text search over the scanner's file list.
|
|
2
|
+
//
|
|
3
|
+
// Concurrency / safety:
|
|
4
|
+
// - concurrency-limited (default 32) instead of all-at-once Promise.all
|
|
5
|
+
// → bounds libuv thread-pool queue depth, avoids stall on big repos.
|
|
6
|
+
// - per-file timeout (default 5s) → if a single file's read or scan hangs
|
|
7
|
+
// (OS lock, pathological regex), we skip it and continue.
|
|
8
|
+
// - skipped files are reported in the response so the caller knows what
|
|
9
|
+
// wasn't searched.
|
|
10
|
+
|
|
11
|
+
const DEFAULT_MAX = 100
|
|
12
|
+
const DEFAULT_CONCURRENCY = 32
|
|
13
|
+
const DEFAULT_FILE_TIMEOUT_MS = 5000
|
|
14
|
+
const SNIPPET_CONTEXT = 50
|
|
15
|
+
|
|
16
|
+
function scanContent(text, q, opts, maxPerFile) {
|
|
17
|
+
const matches = []
|
|
18
|
+
if (opts.regex) {
|
|
19
|
+
const flags = opts.caseSensitive ? 'g' : 'gi'
|
|
20
|
+
let re
|
|
21
|
+
try { re = new RegExp(q, flags) }
|
|
22
|
+
catch { return [] }
|
|
23
|
+
let m
|
|
24
|
+
while ((m = re.exec(text)) !== null) {
|
|
25
|
+
const idx = m.index
|
|
26
|
+
const before = text.lastIndexOf('\n', idx - 1)
|
|
27
|
+
const line = (text.slice(0, idx).match(/\n/g) || []).length + 1
|
|
28
|
+
const col = idx - (before + 1) + 1
|
|
29
|
+
const sStart = Math.max(0, idx - SNIPPET_CONTEXT)
|
|
30
|
+
const sEnd = Math.min(text.length, idx + m[0].length + SNIPPET_CONTEXT)
|
|
31
|
+
matches.push({
|
|
32
|
+
line, col,
|
|
33
|
+
snippet: text.slice(sStart, sEnd).replace(/\r?\n/g, ' '),
|
|
34
|
+
})
|
|
35
|
+
if (matches.length >= maxPerFile) break
|
|
36
|
+
if (m.index === re.lastIndex) re.lastIndex++
|
|
37
|
+
}
|
|
38
|
+
return matches
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const needle = opts.caseSensitive ? q : q.toLowerCase()
|
|
42
|
+
const hay = opts.caseSensitive ? text : text.toLowerCase()
|
|
43
|
+
let lineStart = 0
|
|
44
|
+
let lineNo = 1
|
|
45
|
+
while (lineStart < hay.length) {
|
|
46
|
+
let lineEnd = hay.indexOf('\n', lineStart)
|
|
47
|
+
if (lineEnd === -1) lineEnd = hay.length
|
|
48
|
+
let from = lineStart
|
|
49
|
+
while (true) {
|
|
50
|
+
const idx = hay.indexOf(needle, from)
|
|
51
|
+
if (idx === -1 || idx >= lineEnd) break
|
|
52
|
+
const sStart = Math.max(0, idx - SNIPPET_CONTEXT)
|
|
53
|
+
const sEnd = Math.min(text.length, idx + needle.length + SNIPPET_CONTEXT)
|
|
54
|
+
matches.push({
|
|
55
|
+
line: lineNo,
|
|
56
|
+
col: idx - lineStart + 1,
|
|
57
|
+
snippet: text.slice(sStart, sEnd).replace(/\r?\n/g, ' '),
|
|
58
|
+
})
|
|
59
|
+
if (matches.length >= maxPerFile) return matches
|
|
60
|
+
from = idx + needle.length
|
|
61
|
+
}
|
|
62
|
+
lineStart = lineEnd + 1
|
|
63
|
+
lineNo++
|
|
64
|
+
}
|
|
65
|
+
return matches
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function withTimeout(promise, ms, label) {
|
|
69
|
+
let to
|
|
70
|
+
const timeout = new Promise((_, reject) => {
|
|
71
|
+
to = setTimeout(() => reject(new Error(`timeout ${ms}ms: ${label}`)), ms)
|
|
72
|
+
})
|
|
73
|
+
return Promise.race([promise, timeout]).finally(() => clearTimeout(to))
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Run `tasks` with at most `concurrency` in flight at once.
|
|
77
|
+
// Stops accepting new tasks once `shouldStop()` returns true.
|
|
78
|
+
async function runConcurrent(tasks, concurrency, shouldStop) {
|
|
79
|
+
let i = 0
|
|
80
|
+
async function worker() {
|
|
81
|
+
while (i < tasks.length) {
|
|
82
|
+
if (shouldStop && shouldStop()) return
|
|
83
|
+
const idx = i++
|
|
84
|
+
await tasks[idx]()
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
const workers = Array.from({ length: Math.min(concurrency, tasks.length) }, worker)
|
|
88
|
+
await Promise.all(workers)
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async function search(scanner, cache, opts) {
|
|
92
|
+
const t0 = Date.now()
|
|
93
|
+
const q = opts.q
|
|
94
|
+
if (!q || typeof q !== 'string') throw new Error('q (query string) is required')
|
|
95
|
+
|
|
96
|
+
const max = opts.max ?? DEFAULT_MAX
|
|
97
|
+
const concurrency = opts.concurrency ?? DEFAULT_CONCURRENCY
|
|
98
|
+
const fileTimeoutMs = opts.fileTimeoutMs ?? DEFAULT_FILE_TIMEOUT_MS
|
|
99
|
+
const regex = !!opts.regex
|
|
100
|
+
const caseSensitive = !!opts.caseSensitive
|
|
101
|
+
const maxPerFile = opts.maxPerFile ?? 10
|
|
102
|
+
const debug = !!opts.debug
|
|
103
|
+
|
|
104
|
+
if (regex) {
|
|
105
|
+
try { new RegExp(q) }
|
|
106
|
+
catch (e) { throw new Error(`invalid regex: ${e.message}`) }
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const tFilesStart = Date.now()
|
|
110
|
+
const files = [...scanner.files.values()]
|
|
111
|
+
const totalFiles = files.length
|
|
112
|
+
const filesEnumMs = Date.now() - tFilesStart
|
|
113
|
+
|
|
114
|
+
const matches = []
|
|
115
|
+
const skipped = []
|
|
116
|
+
let filesScanned = 0
|
|
117
|
+
let filesMatched = 0
|
|
118
|
+
let stopFlag = false
|
|
119
|
+
|
|
120
|
+
// Timing buckets per file (debug only)
|
|
121
|
+
const timings = debug ? { readMs: [], scanMs: [] } : null
|
|
122
|
+
|
|
123
|
+
const tasks = files.map((f) => async () => {
|
|
124
|
+
if (stopFlag) return
|
|
125
|
+
let text
|
|
126
|
+
const tRead = debug ? Date.now() : 0
|
|
127
|
+
try {
|
|
128
|
+
text = await withTimeout(
|
|
129
|
+
cache.getText(f.id, f.absPath),
|
|
130
|
+
fileTimeoutMs,
|
|
131
|
+
f.id,
|
|
132
|
+
)
|
|
133
|
+
} catch (e) {
|
|
134
|
+
skipped.push({ id: f.id, reason: e.message.startsWith('timeout') ? 'timeout' : 'read-error' })
|
|
135
|
+
return
|
|
136
|
+
}
|
|
137
|
+
if (debug) timings.readMs.push(Date.now() - tRead)
|
|
138
|
+
if (stopFlag) return
|
|
139
|
+
|
|
140
|
+
filesScanned++
|
|
141
|
+
const tScan = debug ? Date.now() : 0
|
|
142
|
+
let fileMatches
|
|
143
|
+
try {
|
|
144
|
+
fileMatches = scanContent(text, q, { regex, caseSensitive }, maxPerFile)
|
|
145
|
+
} catch (e) {
|
|
146
|
+
skipped.push({ id: f.id, reason: 'scan-error' })
|
|
147
|
+
return
|
|
148
|
+
}
|
|
149
|
+
if (debug) timings.scanMs.push(Date.now() - tScan)
|
|
150
|
+
if (fileMatches.length === 0) return
|
|
151
|
+
|
|
152
|
+
filesMatched++
|
|
153
|
+
for (const m of fileMatches) {
|
|
154
|
+
matches.push({ id: f.id, line: m.line, col: m.col, snippet: m.snippet, totalInFile: fileMatches.length })
|
|
155
|
+
if (matches.length >= max) {
|
|
156
|
+
stopFlag = true
|
|
157
|
+
break
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
})
|
|
161
|
+
|
|
162
|
+
const tConcStart = Date.now()
|
|
163
|
+
await runConcurrent(tasks, concurrency, () => stopFlag)
|
|
164
|
+
const concMs = Date.now() - tConcStart
|
|
165
|
+
|
|
166
|
+
const result = {
|
|
167
|
+
query: q,
|
|
168
|
+
regex, caseSensitive,
|
|
169
|
+
totalFiles,
|
|
170
|
+
filesScanned,
|
|
171
|
+
filesMatched,
|
|
172
|
+
matches,
|
|
173
|
+
skipped,
|
|
174
|
+
truncated: stopFlag,
|
|
175
|
+
ms: Date.now() - t0,
|
|
176
|
+
cacheStats: cache.stats(),
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (debug) {
|
|
180
|
+
const sum = (a) => a.reduce((s, x) => s + x, 0)
|
|
181
|
+
const sorted = (a) => [...a].sort((x, y) => x - y)
|
|
182
|
+
const pctile = (a, p) => { const s = sorted(a); return s[Math.min(s.length-1, Math.floor(p * s.length))] || 0 }
|
|
183
|
+
result.debug = {
|
|
184
|
+
concurrency,
|
|
185
|
+
filesEnumMs,
|
|
186
|
+
runConcurrentMs: concMs,
|
|
187
|
+
reads: {
|
|
188
|
+
count: timings.readMs.length,
|
|
189
|
+
sumMs: sum(timings.readMs),
|
|
190
|
+
avgMs: timings.readMs.length ? +(sum(timings.readMs) / timings.readMs.length).toFixed(2) : 0,
|
|
191
|
+
p50: pctile(timings.readMs, 0.5),
|
|
192
|
+
p95: pctile(timings.readMs, 0.95),
|
|
193
|
+
p99: pctile(timings.readMs, 0.99),
|
|
194
|
+
max: Math.max(0, ...timings.readMs),
|
|
195
|
+
},
|
|
196
|
+
scans: {
|
|
197
|
+
count: timings.scanMs.length,
|
|
198
|
+
sumMs: sum(timings.scanMs),
|
|
199
|
+
avgMs: timings.scanMs.length ? +(sum(timings.scanMs) / timings.scanMs.length).toFixed(2) : 0,
|
|
200
|
+
p50: pctile(timings.scanMs, 0.5),
|
|
201
|
+
p95: pctile(timings.scanMs, 0.95),
|
|
202
|
+
p99: pctile(timings.scanMs, 0.99),
|
|
203
|
+
max: Math.max(0, ...timings.scanMs),
|
|
204
|
+
},
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return result
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
module.exports = { search }
|
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
// CodeSynapt symbol mode — in-memory symbol graph that lives alongside
|
|
2
|
+
// the file-graph Scanner. Built lazily; first /symbol/* request triggers
|
|
3
|
+
// the scan against the currently-loaded file set.
|
|
4
|
+
//
|
|
5
|
+
// Data model and design notes live in docs/SYMBOL-MODE-PLAN.md.
|
|
6
|
+
|
|
7
|
+
'use strict'
|
|
8
|
+
|
|
9
|
+
const fs = require('fs')
|
|
10
|
+
const path = require('path')
|
|
11
|
+
|
|
12
|
+
// Parser registry — extended per-language in Stage 1 / Stage 2.
|
|
13
|
+
// Each entry: { extractSymbols(content, fileId) → SymbolNode[],
|
|
14
|
+
// extractReferences(content, fileId, index) → SymbolEdge[] }
|
|
15
|
+
const PARSERS = Object.create(null)
|
|
16
|
+
|
|
17
|
+
// Heuristic: a file at one of these path segments isn't usually called
|
|
18
|
+
// from production code. Affects resolveCall — when a name has matches
|
|
19
|
+
// in both production and auxiliary paths, production wins. Doesn't
|
|
20
|
+
// hide aux symbols, just deprioritises them as call targets.
|
|
21
|
+
const AUX_PATH_SEGMENTS = new Set([
|
|
22
|
+
'scripts', 'script', 'tools', 'tool',
|
|
23
|
+
'tests', 'test', '__tests__', 'spec', 'specs',
|
|
24
|
+
'examples', 'example', 'samples', 'sample', 'demo', 'demos',
|
|
25
|
+
'build', 'dist', 'out', 'bin',
|
|
26
|
+
'docs', 'doc',
|
|
27
|
+
'fixtures', 'fixture',
|
|
28
|
+
'benchmarks', 'benchmark', 'bench',
|
|
29
|
+
// Vendored / prebuilt bundles that ship inside source dirs
|
|
30
|
+
// (Next.js's packages/next/src/compiled/* is the canonical case).
|
|
31
|
+
// file-mode ignores top-level node_modules, but vendored copies
|
|
32
|
+
// inside src/ slip through; deprioritise them as call targets.
|
|
33
|
+
'compiled', 'vendored', 'vendor',
|
|
34
|
+
])
|
|
35
|
+
function isAuxPath(fileId) {
|
|
36
|
+
if (!fileId) return false
|
|
37
|
+
// Check the first path segment + any segment whose name matches.
|
|
38
|
+
// `tests/foo.ts`, `packages/x/scripts/y.ts`, `build/x.js` all match.
|
|
39
|
+
const parts = fileId.split('/')
|
|
40
|
+
return parts.some((p) => AUX_PATH_SEGMENTS.has(p))
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function registerParser(extOrExts, parser) {
|
|
44
|
+
const exts = Array.isArray(extOrExts) ? extOrExts : [extOrExts]
|
|
45
|
+
for (const e of exts) PARSERS[e] = parser
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function extFor(filePath) {
|
|
49
|
+
const e = path.extname(filePath).slice(1).toLowerCase()
|
|
50
|
+
return e
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
class SymbolGraph {
|
|
54
|
+
constructor() {
|
|
55
|
+
this.nodes = new Map() // id → SymbolNode
|
|
56
|
+
this.edges = [] // SymbolEdge[]
|
|
57
|
+
this.byFile = new Map() // fileId → Set<symbolId>
|
|
58
|
+
this.byName = new Map() // lowercased name → Set<symbolId>
|
|
59
|
+
// Adjacency for fast callers/callees lookup.
|
|
60
|
+
this.outAdj = new Map() // symbolId → Set<targetId>
|
|
61
|
+
this.inAdj = new Map() // symbolId → Set<sourceId>
|
|
62
|
+
// File-mode imports — fed in from the host (scanner.edges). Lets
|
|
63
|
+
// call resolution disambiguate same-name symbols across files
|
|
64
|
+
// by preferring targets in files the caller actually imports.
|
|
65
|
+
this.fileImports = new Map() // fileId → Set<importedFileId>
|
|
66
|
+
this.builtAt = 0
|
|
67
|
+
this.fileCount = 0
|
|
68
|
+
this.scanMs = 0
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
clear() {
|
|
72
|
+
this.nodes.clear()
|
|
73
|
+
this.edges.length = 0
|
|
74
|
+
this.byFile.clear()
|
|
75
|
+
this.byName.clear()
|
|
76
|
+
this.outAdj.clear()
|
|
77
|
+
this.inAdj.clear()
|
|
78
|
+
this.fileImports.clear()
|
|
79
|
+
this.builtAt = 0
|
|
80
|
+
this.fileCount = 0
|
|
81
|
+
this.scanMs = 0
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Best symbol match for `name` called from `fromFileId`. Preference:
|
|
85
|
+
// 1) same file
|
|
86
|
+
// 2) a file directly imported by `fromFileId`
|
|
87
|
+
// We deliberately *do not* fall back to "any file with that name"
|
|
88
|
+
// — that would link a local `request` variable in utils.ts to an
|
|
89
|
+
// unrelated `request()` function in some other file just because
|
|
90
|
+
// they share a name. AI agents downstream would get noise edges
|
|
91
|
+
// and follow false trails. Conservative beats clever here.
|
|
92
|
+
// If the host wants the loose match, set `allowAny: true`.
|
|
93
|
+
resolveCall(fromFileId, name, { allowAny = false } = {}) {
|
|
94
|
+
if (!name) return null
|
|
95
|
+
// Type-aware lookup: `User.method` matches a symbol whose
|
|
96
|
+
// qualifiedName === 'User.method' exactly. Higher priority than
|
|
97
|
+
// name-only matches because it narrows from "any method named X"
|
|
98
|
+
// to "X defined on this class".
|
|
99
|
+
if (name.includes('.')) {
|
|
100
|
+
const tail = name.split('.').pop()
|
|
101
|
+
const set = this.byName.get(tail.toLowerCase())
|
|
102
|
+
if (set) {
|
|
103
|
+
for (const id of set) {
|
|
104
|
+
const node = this.nodes.get(id)
|
|
105
|
+
if (node?.qualifiedName === name) return node
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
// No qualifiedName match — fall back to the bare method name
|
|
109
|
+
// through the regular path below.
|
|
110
|
+
name = tail
|
|
111
|
+
}
|
|
112
|
+
const set = this.byName.get(name.toLowerCase())
|
|
113
|
+
if (!set || !set.size) return null
|
|
114
|
+
let sameFile = null, imported = null
|
|
115
|
+
// Two-bucket fallback: prefer a production-path candidate
|
|
116
|
+
// over an auxiliary-path one (scripts/, test/, build/, examples/
|
|
117
|
+
// etc.) when nothing imported matches. Stops the case where
|
|
118
|
+
// production code's call to `fetch(...)` lands on a helper named
|
|
119
|
+
// `fetch` defined in scripts/.
|
|
120
|
+
let prodAny = null, auxAny = null
|
|
121
|
+
const callerIsAux = isAuxPath(fromFileId)
|
|
122
|
+
const importsOf = this.fileImports.get(fromFileId)
|
|
123
|
+
for (const id of set) {
|
|
124
|
+
const node = this.nodes.get(id)
|
|
125
|
+
if (!node) continue
|
|
126
|
+
if (node.file === fromFileId) { sameFile = node; break }
|
|
127
|
+
if (!imported && importsOf && importsOf.has(node.file)) imported = node
|
|
128
|
+
if (isAuxPath(node.file)) {
|
|
129
|
+
if (!auxAny) auxAny = node
|
|
130
|
+
} else {
|
|
131
|
+
if (!prodAny) prodAny = node
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
if (sameFile) return sameFile
|
|
135
|
+
if (imported) return imported
|
|
136
|
+
if (!allowAny) return null
|
|
137
|
+
// Prefer production over auxiliary unless the caller itself is
|
|
138
|
+
// already aux (in which case linking back into scripts/ is fine).
|
|
139
|
+
if (callerIsAux) return prodAny || auxAny
|
|
140
|
+
return prodAny || auxAny
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
addNode(node) {
|
|
144
|
+
this.nodes.set(node.id, node)
|
|
145
|
+
if (!this.byFile.has(node.file)) this.byFile.set(node.file, new Set())
|
|
146
|
+
this.byFile.get(node.file).add(node.id)
|
|
147
|
+
const key = (node.name || '').toLowerCase()
|
|
148
|
+
if (key) {
|
|
149
|
+
if (!this.byName.has(key)) this.byName.set(key, new Set())
|
|
150
|
+
this.byName.get(key).add(node.id)
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
addEdge(edge) {
|
|
155
|
+
this.edges.push(edge)
|
|
156
|
+
if (!this.outAdj.has(edge.source)) this.outAdj.set(edge.source, new Set())
|
|
157
|
+
this.outAdj.get(edge.source).add(edge.target)
|
|
158
|
+
if (!this.inAdj.has(edge.target)) this.inAdj.set(edge.target, new Set())
|
|
159
|
+
this.inAdj.get(edge.target).add(edge.source)
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Index every symbol as a 384-d MiniLM embedding so /symbol/explore
|
|
163
|
+
// can rerank by semantic similarity (auth ↔ login synonyms etc).
|
|
164
|
+
// Runs in batches of 32 to keep peak memory bounded; the caller
|
|
165
|
+
// typically fires this off without awaiting so the build finishes
|
|
166
|
+
// quickly and the embeddings populate in the background.
|
|
167
|
+
//
|
|
168
|
+
// Each symbol gets a `_embedding` Float64-style Array assigned in
|
|
169
|
+
// place. Falls back silently if `embedBatchFn` returns null (e.g.
|
|
170
|
+
// the @xenova/transformers dep isn't installed).
|
|
171
|
+
async embedAllSymbols(embedBatchFn, { chunkSize = 32 } = {}) {
|
|
172
|
+
if (this._embedded || this._embedding) return // idempotent
|
|
173
|
+
this._embedding = true
|
|
174
|
+
const ids = []
|
|
175
|
+
const texts = []
|
|
176
|
+
for (const n of this.nodes.values()) {
|
|
177
|
+
ids.push(n.id)
|
|
178
|
+
// Keep text short — MiniLM is a 128-token model, longer input
|
|
179
|
+
// gets truncated. name + qn + kind + first 100 chars of doc is
|
|
180
|
+
// enough signal to distinguish auth-shaped from db-shaped.
|
|
181
|
+
const doc = (n.doc || '').slice(0, 100)
|
|
182
|
+
texts.push(`${n.name || ''} ${n.qualifiedName || ''} ${n.kind || ''} ${doc}`.trim())
|
|
183
|
+
}
|
|
184
|
+
const start = Date.now()
|
|
185
|
+
let done = 0
|
|
186
|
+
for (let i = 0; i < texts.length; i += chunkSize) {
|
|
187
|
+
const batch = texts.slice(i, i + chunkSize)
|
|
188
|
+
const vecs = await embedBatchFn(batch)
|
|
189
|
+
if (!vecs) { // embed failed — give up cleanly
|
|
190
|
+
this._embedding = false
|
|
191
|
+
return false
|
|
192
|
+
}
|
|
193
|
+
for (let j = 0; j < vecs.length; j++) {
|
|
194
|
+
const node = this.nodes.get(ids[i + j])
|
|
195
|
+
if (node) node._embedding = vecs[j]
|
|
196
|
+
}
|
|
197
|
+
done += vecs.length
|
|
198
|
+
// Yield to the event loop between batches so concurrent HTTP
|
|
199
|
+
// requests (the desktop UI, the bench harness, MCP tools)
|
|
200
|
+
// aren't starved while a multi-second indexing pass runs.
|
|
201
|
+
// ONNX inference inside embedBatchFn pegs the main thread, so
|
|
202
|
+
// a `setImmediate` after each batch is the difference between
|
|
203
|
+
// "queries time out" and "queries respond within 50 ms".
|
|
204
|
+
await new Promise((r) => setImmediate(r))
|
|
205
|
+
}
|
|
206
|
+
this._embedded = true
|
|
207
|
+
this._embedding = false
|
|
208
|
+
this.embedMs = Date.now() - start
|
|
209
|
+
return true
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// BFS along outAdj from every symbol the host considers a public
|
|
213
|
+
// entry (main, route handler, exported CLI bin, etc). Symbols not
|
|
214
|
+
// reachable from any entry are likely dead code. The host passes
|
|
215
|
+
// its own isEntry predicate so this module stays parser-agnostic.
|
|
216
|
+
//
|
|
217
|
+
// Important caveat documented in the explore code: our entry
|
|
218
|
+
// heuristic (name + path patterns) misses some real entries
|
|
219
|
+
// (React components, framework callbacks, decorator-bound
|
|
220
|
+
// handlers), so a `reachable: false` flag is a *hint*, not a
|
|
221
|
+
// verdict — we expose it as data and let the ranker / UI choose
|
|
222
|
+
// how strongly to weight it.
|
|
223
|
+
computeReachability(isEntry) {
|
|
224
|
+
const reachable = new Set()
|
|
225
|
+
const queue = []
|
|
226
|
+
for (const node of this.nodes.values()) {
|
|
227
|
+
try {
|
|
228
|
+
if (isEntry(node)) { reachable.add(node.id); queue.push(node.id) }
|
|
229
|
+
} catch {}
|
|
230
|
+
}
|
|
231
|
+
while (queue.length) {
|
|
232
|
+
const id = queue.shift()
|
|
233
|
+
const callees = this.outAdj.get(id)
|
|
234
|
+
if (!callees) continue
|
|
235
|
+
for (const c of callees) {
|
|
236
|
+
if (!reachable.has(c)) { reachable.add(c); queue.push(c) }
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
this._reachable = reachable
|
|
240
|
+
return reachable
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
callersOf(id) {
|
|
244
|
+
const set = this.inAdj.get(id)
|
|
245
|
+
if (!set) return []
|
|
246
|
+
return [...set].map((sid) => this.nodes.get(sid)).filter(Boolean)
|
|
247
|
+
}
|
|
248
|
+
calleesOf(id) {
|
|
249
|
+
const set = this.outAdj.get(id)
|
|
250
|
+
if (!set) return []
|
|
251
|
+
return [...set].map((tid) => this.nodes.get(tid)).filter(Boolean)
|
|
252
|
+
}
|
|
253
|
+
findByName(query, limit = 50) {
|
|
254
|
+
const q = (query || '').toLowerCase()
|
|
255
|
+
if (!q) return []
|
|
256
|
+
const matches = []
|
|
257
|
+
for (const [name, ids] of this.byName) {
|
|
258
|
+
if (name.includes(q)) {
|
|
259
|
+
for (const id of ids) {
|
|
260
|
+
const n = this.nodes.get(id)
|
|
261
|
+
if (n) matches.push(n)
|
|
262
|
+
if (matches.length >= limit) return matches
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
return matches
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// ─── Scanning ──────────────────────────────────────────────────
|
|
270
|
+
// `fileEntries` is an iterable of { id, absPath, ext } — typically
|
|
271
|
+
// derived from the file-mode Scanner's `files` map.
|
|
272
|
+
// `fileImports` (optional) is a Map<fileId, Set<importedFileId>>
|
|
273
|
+
// built from the file-mode edge list; lets resolveCall prefer
|
|
274
|
+
// imported targets.
|
|
275
|
+
async build(fileEntries, fileImports = null, options = {}) {
|
|
276
|
+
const start = Date.now()
|
|
277
|
+
this.clear()
|
|
278
|
+
// Set imports *after* clear so the host-provided map survives.
|
|
279
|
+
if (fileImports) this.fileImports = fileImports
|
|
280
|
+
// Big-repo safety knobs. None of them block — they cap the work
|
|
281
|
+
// so a runaway monorepo (100k+ symbols) doesn't OOM the process.
|
|
282
|
+
const MAX_SYMBOLS = options.maxSymbols
|
|
283
|
+
|| parseInt(process.env.CS_MAX_SYMBOLS || '200000', 10)
|
|
284
|
+
const MAX_EDGES = options.maxEdges
|
|
285
|
+
|| parseInt(process.env.CS_MAX_EDGES || '1000000', 10)
|
|
286
|
+
const MAX_FILE_BYTES = options.maxFileBytes
|
|
287
|
+
|| parseInt(process.env.CS_MAX_FILE_BYTES || '524288', 10) // 512KB
|
|
288
|
+
let abortedAt = null
|
|
289
|
+
// Pass 1 — symbols. We need every symbol indexed before we can
|
|
290
|
+
// resolve references in pass 2.
|
|
291
|
+
let fileCount = 0
|
|
292
|
+
const fileContents = new Map() // fileId → content (kept for pass 2)
|
|
293
|
+
for (const entry of fileEntries) {
|
|
294
|
+
if (this.nodes.size >= MAX_SYMBOLS) { abortedAt = 'symbols'; break }
|
|
295
|
+
const parser = PARSERS[entry.ext]
|
|
296
|
+
if (!parser) continue
|
|
297
|
+
let content, fileMtimeMs = 0
|
|
298
|
+
try {
|
|
299
|
+
const stat = fs.statSync(entry.absPath)
|
|
300
|
+
if (stat.size > MAX_FILE_BYTES) continue // skip giant files (minified bundles, vendored libs)
|
|
301
|
+
content = fs.readFileSync(entry.absPath, 'utf8')
|
|
302
|
+
fileMtimeMs = stat.mtimeMs
|
|
303
|
+
} catch { continue }
|
|
304
|
+
fileContents.set(entry.id, content)
|
|
305
|
+
let symbols
|
|
306
|
+
try {
|
|
307
|
+
const ret = parser.extractSymbols(content, entry.id)
|
|
308
|
+
symbols = (await ret) || []
|
|
309
|
+
} catch (e) { symbols = [] }
|
|
310
|
+
// Lazy split per file — used by the deprecated probe below.
|
|
311
|
+
// Most files have no deprecated marker, so the .test() short-
|
|
312
|
+
// circuits and we never pay the split cost.
|
|
313
|
+
let _lines = null
|
|
314
|
+
const lines = () => _lines ??= content.split('\n')
|
|
315
|
+
const DEPRECATED_RE = /@?deprecated\b|todo\s*[:_-]?\s*remove|fixme\s*[:_-]?\s*remove/i
|
|
316
|
+
// Stricter pattern — used ONLY against the file header so
|
|
317
|
+
// common code-body words like "wip"/"work in progress" don't
|
|
318
|
+
// false-positive entire files. The body-level probe sticks to
|
|
319
|
+
// the precise @deprecated / TODO remove patterns above.
|
|
320
|
+
const HEAD_DEPRECATED_RE = /@?deprecated\b|do\s+not\s+use\b|work\s+in\s+progress\b|\bwip[\s:]/i
|
|
321
|
+
const fileHasDeprecated = DEPRECATED_RE.test(content)
|
|
322
|
+
// File-level deprecated marker — if the first 5 lines flag the
|
|
323
|
+
// whole file as deprecated (common pattern: file header with
|
|
324
|
+
// `// @deprecated — moved to …`), tag every symbol the file
|
|
325
|
+
// exports. Avoids the case where the file header is far above
|
|
326
|
+
// any declaration's 5-line probe window. Header check uses
|
|
327
|
+
// HEAD_DEPRECATED_RE so a body-only deprecated marker can't
|
|
328
|
+
// promote a single-symbol file to "everything deprecated".
|
|
329
|
+
let fileLevelDeprecated = false
|
|
330
|
+
const head = lines().slice(0, 5).join('\n')
|
|
331
|
+
if (HEAD_DEPRECATED_RE.test(head)) fileLevelDeprecated = true
|
|
332
|
+
for (const s of symbols) {
|
|
333
|
+
if (this.nodes.size >= MAX_SYMBOLS) { abortedAt = 'symbols'; break }
|
|
334
|
+
// Stamp every symbol with the file's mtime — explore uses it
|
|
335
|
+
// for the `legacy` classification (old + low in-degree). Cost
|
|
336
|
+
// is one extra Map allocation per symbol; the stat call was
|
|
337
|
+
// already happening above.
|
|
338
|
+
s.mtimeMs = fileMtimeMs
|
|
339
|
+
// Deprecated marker — look at the 5 lines directly above the
|
|
340
|
+
// symbol declaration. Cheaper + more precise than fighting
|
|
341
|
+
// babel's export-wrapper leading-comment attachment quirk.
|
|
342
|
+
if (fileLevelDeprecated) {
|
|
343
|
+
s.deprecated = true
|
|
344
|
+
} else if (fileHasDeprecated && s.startLine) {
|
|
345
|
+
const start = Math.max(0, s.startLine - 1 - 5)
|
|
346
|
+
const prelude = lines().slice(start, s.startLine - 1).join('\n')
|
|
347
|
+
if (DEPRECATED_RE.test(prelude)) s.deprecated = true
|
|
348
|
+
}
|
|
349
|
+
this.addNode(s)
|
|
350
|
+
}
|
|
351
|
+
fileCount++
|
|
352
|
+
}
|
|
353
|
+
// Pass 2 — references. Per-file, ask the language parser to find
|
|
354
|
+
// call/extends/implements edges. Parsers consult `this` (the
|
|
355
|
+
// symbol index) to resolve names.
|
|
356
|
+
for (const [fileId, content] of fileContents) {
|
|
357
|
+
if (this.edges.length >= MAX_EDGES) { abortedAt = abortedAt || 'edges'; break }
|
|
358
|
+
const ext = extFor(fileId)
|
|
359
|
+
const parser = PARSERS[ext]
|
|
360
|
+
if (!parser || !parser.extractReferences) continue
|
|
361
|
+
let refs
|
|
362
|
+
try {
|
|
363
|
+
const ret = parser.extractReferences(content, fileId, this)
|
|
364
|
+
refs = (await ret) || []
|
|
365
|
+
} catch (e) { refs = [] }
|
|
366
|
+
for (const r of refs) {
|
|
367
|
+
if (this.edges.length >= MAX_EDGES) { abortedAt = abortedAt || 'edges'; break }
|
|
368
|
+
if (this.nodes.has(r.source) && this.nodes.has(r.target)) {
|
|
369
|
+
this.addEdge(r)
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
this.fileCount = fileCount
|
|
374
|
+
this.builtAt = Date.now()
|
|
375
|
+
this.scanMs = this.builtAt - start
|
|
376
|
+
this.abortedAt = abortedAt // null or 'symbols'/'edges'
|
|
377
|
+
return this.stats()
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
stats() {
|
|
381
|
+
const byKind = {}
|
|
382
|
+
for (const n of this.nodes.values()) {
|
|
383
|
+
byKind[n.kind] = (byKind[n.kind] || 0) + 1
|
|
384
|
+
}
|
|
385
|
+
const byEdgeKind = {}
|
|
386
|
+
for (const e of this.edges) {
|
|
387
|
+
byEdgeKind[e.kind] = (byEdgeKind[e.kind] || 0) + 1
|
|
388
|
+
}
|
|
389
|
+
return {
|
|
390
|
+
fileCount: this.fileCount,
|
|
391
|
+
symbolCount: this.nodes.size,
|
|
392
|
+
edgeCount: this.edges.length,
|
|
393
|
+
byKind,
|
|
394
|
+
byEdgeKind,
|
|
395
|
+
scanMs: this.scanMs,
|
|
396
|
+
builtAt: this.builtAt,
|
|
397
|
+
abortedAt: this.abortedAt || null, // null | 'symbols' | 'edges'
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
module.exports = { SymbolGraph, registerParser, extFor, PARSERS }
|