codesynapt 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +17 -0
  2. package/LICENSE +686 -0
  3. package/LICENSES.md +141 -0
  4. package/README.md +331 -0
  5. package/electron/main.cjs +2849 -0
  6. package/electron/plugin-loader.cjs +184 -0
  7. package/electron/preload.cjs +108 -0
  8. package/package.json +216 -0
  9. package/packages/core/bin/codesynapt-mcp.cjs +611 -0
  10. package/packages/core/bin/codesynapt.cjs +1933 -0
  11. package/packages/core/legacy.js +300 -0
  12. package/packages/core/lib/control-server.cjs +1539 -0
  13. package/packages/core/lib/embedding.cjs +89 -0
  14. package/packages/core/lib/logger.cjs +63 -0
  15. package/packages/core/lib/search-cache.cjs +140 -0
  16. package/packages/core/lib/search-worker.cjs +255 -0
  17. package/packages/core/lib/search.cjs +211 -0
  18. package/packages/core/lib/symbol-graph.cjs +402 -0
  19. package/packages/core/lib/symbol-parser-js.cjs +542 -0
  20. package/packages/core/lib/symbol-parser-misc.cjs +394 -0
  21. package/packages/core/lib/symbol-parser-py.cjs +215 -0
  22. package/packages/core/lib/symbol-parser-treesitter.cjs +658 -0
  23. package/packages/core/lib/symbol-parser-tsc.cjs +332 -0
  24. package/packages/core/monorepo.js +310 -0
  25. package/packages/core/parser.js +2234 -0
  26. package/packages/core/scanner.js +623 -0
  27. package/plugin-api/LICENSE +21 -0
  28. package/plugin-api/README.md +114 -0
  29. package/plugin-api/docs/01-getting-started.md +197 -0
  30. package/plugin-api/docs/02-concepts.md +269 -0
  31. package/plugin-api/docs/api-reference.md +463 -0
  32. package/plugin-api/docs/troubleshooting.md +332 -0
  33. package/plugin-api/docs/types/exporter.md +377 -0
  34. package/plugin-api/docs/types/theme.md +312 -0
  35. package/plugin-api/examples/hello-world-plugin/README.md +70 -0
  36. package/plugin-api/examples/hello-world-plugin/main.js +36 -0
  37. package/plugin-api/examples/hello-world-plugin/manifest.json +12 -0
  38. package/plugin-api/examples/mermaid-exporter/README.md +125 -0
  39. package/plugin-api/examples/mermaid-exporter/main.js +58 -0
  40. package/plugin-api/examples/mermaid-exporter/manifest.json +12 -0
  41. package/plugin-api/examples/rust-parser/README.md +71 -0
  42. package/plugin-api/examples/rust-parser/main.js +123 -0
  43. package/plugin-api/examples/rust-parser/manifest.json +12 -0
  44. package/plugin-api/examples/sunset-theme/README.md +95 -0
  45. package/plugin-api/examples/sunset-theme/manifest.json +12 -0
  46. package/plugin-api/examples/sunset-theme/theme.css +31 -0
  47. package/plugin-api/package.json +20 -0
  48. package/plugin-api/types.d.ts +395 -0
  49. package/public/app.js +6837 -0
  50. package/public/backend.js +285 -0
  51. package/public/index.html +647 -0
  52. package/public/plugin-host.js +321 -0
  53. package/public/style.css +4359 -0
  54. package/public/vendor/three.module.js +53044 -0
  55. package/scripts/competitor-watch.mjs +144 -0
  56. package/scripts/copy-vendor.js +21 -0
  57. package/scripts/download-bundled-node.cjs +53 -0
  58. package/scripts/fuses-after-pack.cjs +34 -0
  59. package/scripts/license-check.js +119 -0
  60. package/scripts/perf-test.js +200 -0
  61. package/server.js +132 -0
@@ -0,0 +1,211 @@
1
+ // search.cjs — full-text search over the scanner's file list.
2
+ //
3
+ // Concurrency / safety:
4
+ // - concurrency-limited (default 32) instead of all-at-once Promise.all
5
+ // → bounds libuv thread-pool queue depth, avoids stall on big repos.
6
+ // - per-file timeout (default 5s) → if a single file's read or scan hangs
7
+ // (OS lock, pathological regex), we skip it and continue.
8
+ // - skipped files are reported in the response so the caller knows what
9
+ // wasn't searched.
10
+
11
+ const DEFAULT_MAX = 100
12
+ const DEFAULT_CONCURRENCY = 32
13
+ const DEFAULT_FILE_TIMEOUT_MS = 5000
14
+ const SNIPPET_CONTEXT = 50
15
+
16
+ function scanContent(text, q, opts, maxPerFile) {
17
+ const matches = []
18
+ if (opts.regex) {
19
+ const flags = opts.caseSensitive ? 'g' : 'gi'
20
+ let re
21
+ try { re = new RegExp(q, flags) }
22
+ catch { return [] }
23
+ let m
24
+ while ((m = re.exec(text)) !== null) {
25
+ const idx = m.index
26
+ const before = text.lastIndexOf('\n', idx - 1)
27
+ const line = (text.slice(0, idx).match(/\n/g) || []).length + 1
28
+ const col = idx - (before + 1) + 1
29
+ const sStart = Math.max(0, idx - SNIPPET_CONTEXT)
30
+ const sEnd = Math.min(text.length, idx + m[0].length + SNIPPET_CONTEXT)
31
+ matches.push({
32
+ line, col,
33
+ snippet: text.slice(sStart, sEnd).replace(/\r?\n/g, ' '),
34
+ })
35
+ if (matches.length >= maxPerFile) break
36
+ if (m.index === re.lastIndex) re.lastIndex++
37
+ }
38
+ return matches
39
+ }
40
+
41
+ const needle = opts.caseSensitive ? q : q.toLowerCase()
42
+ const hay = opts.caseSensitive ? text : text.toLowerCase()
43
+ let lineStart = 0
44
+ let lineNo = 1
45
+ while (lineStart < hay.length) {
46
+ let lineEnd = hay.indexOf('\n', lineStart)
47
+ if (lineEnd === -1) lineEnd = hay.length
48
+ let from = lineStart
49
+ while (true) {
50
+ const idx = hay.indexOf(needle, from)
51
+ if (idx === -1 || idx >= lineEnd) break
52
+ const sStart = Math.max(0, idx - SNIPPET_CONTEXT)
53
+ const sEnd = Math.min(text.length, idx + needle.length + SNIPPET_CONTEXT)
54
+ matches.push({
55
+ line: lineNo,
56
+ col: idx - lineStart + 1,
57
+ snippet: text.slice(sStart, sEnd).replace(/\r?\n/g, ' '),
58
+ })
59
+ if (matches.length >= maxPerFile) return matches
60
+ from = idx + needle.length
61
+ }
62
+ lineStart = lineEnd + 1
63
+ lineNo++
64
+ }
65
+ return matches
66
+ }
67
+
68
+ function withTimeout(promise, ms, label) {
69
+ let to
70
+ const timeout = new Promise((_, reject) => {
71
+ to = setTimeout(() => reject(new Error(`timeout ${ms}ms: ${label}`)), ms)
72
+ })
73
+ return Promise.race([promise, timeout]).finally(() => clearTimeout(to))
74
+ }
75
+
76
+ // Run `tasks` with at most `concurrency` in flight at once.
77
+ // Stops accepting new tasks once `shouldStop()` returns true.
78
+ async function runConcurrent(tasks, concurrency, shouldStop) {
79
+ let i = 0
80
+ async function worker() {
81
+ while (i < tasks.length) {
82
+ if (shouldStop && shouldStop()) return
83
+ const idx = i++
84
+ await tasks[idx]()
85
+ }
86
+ }
87
+ const workers = Array.from({ length: Math.min(concurrency, tasks.length) }, worker)
88
+ await Promise.all(workers)
89
+ }
90
+
91
+ async function search(scanner, cache, opts) {
92
+ const t0 = Date.now()
93
+ const q = opts.q
94
+ if (!q || typeof q !== 'string') throw new Error('q (query string) is required')
95
+
96
+ const max = opts.max ?? DEFAULT_MAX
97
+ const concurrency = opts.concurrency ?? DEFAULT_CONCURRENCY
98
+ const fileTimeoutMs = opts.fileTimeoutMs ?? DEFAULT_FILE_TIMEOUT_MS
99
+ const regex = !!opts.regex
100
+ const caseSensitive = !!opts.caseSensitive
101
+ const maxPerFile = opts.maxPerFile ?? 10
102
+ const debug = !!opts.debug
103
+
104
+ if (regex) {
105
+ try { new RegExp(q) }
106
+ catch (e) { throw new Error(`invalid regex: ${e.message}`) }
107
+ }
108
+
109
+ const tFilesStart = Date.now()
110
+ const files = [...scanner.files.values()]
111
+ const totalFiles = files.length
112
+ const filesEnumMs = Date.now() - tFilesStart
113
+
114
+ const matches = []
115
+ const skipped = []
116
+ let filesScanned = 0
117
+ let filesMatched = 0
118
+ let stopFlag = false
119
+
120
+ // Timing buckets per file (debug only)
121
+ const timings = debug ? { readMs: [], scanMs: [] } : null
122
+
123
+ const tasks = files.map((f) => async () => {
124
+ if (stopFlag) return
125
+ let text
126
+ const tRead = debug ? Date.now() : 0
127
+ try {
128
+ text = await withTimeout(
129
+ cache.getText(f.id, f.absPath),
130
+ fileTimeoutMs,
131
+ f.id,
132
+ )
133
+ } catch (e) {
134
+ skipped.push({ id: f.id, reason: e.message.startsWith('timeout') ? 'timeout' : 'read-error' })
135
+ return
136
+ }
137
+ if (debug) timings.readMs.push(Date.now() - tRead)
138
+ if (stopFlag) return
139
+
140
+ filesScanned++
141
+ const tScan = debug ? Date.now() : 0
142
+ let fileMatches
143
+ try {
144
+ fileMatches = scanContent(text, q, { regex, caseSensitive }, maxPerFile)
145
+ } catch (e) {
146
+ skipped.push({ id: f.id, reason: 'scan-error' })
147
+ return
148
+ }
149
+ if (debug) timings.scanMs.push(Date.now() - tScan)
150
+ if (fileMatches.length === 0) return
151
+
152
+ filesMatched++
153
+ for (const m of fileMatches) {
154
+ matches.push({ id: f.id, line: m.line, col: m.col, snippet: m.snippet, totalInFile: fileMatches.length })
155
+ if (matches.length >= max) {
156
+ stopFlag = true
157
+ break
158
+ }
159
+ }
160
+ })
161
+
162
+ const tConcStart = Date.now()
163
+ await runConcurrent(tasks, concurrency, () => stopFlag)
164
+ const concMs = Date.now() - tConcStart
165
+
166
+ const result = {
167
+ query: q,
168
+ regex, caseSensitive,
169
+ totalFiles,
170
+ filesScanned,
171
+ filesMatched,
172
+ matches,
173
+ skipped,
174
+ truncated: stopFlag,
175
+ ms: Date.now() - t0,
176
+ cacheStats: cache.stats(),
177
+ }
178
+
179
+ if (debug) {
180
+ const sum = (a) => a.reduce((s, x) => s + x, 0)
181
+ const sorted = (a) => [...a].sort((x, y) => x - y)
182
+ const pctile = (a, p) => { const s = sorted(a); return s[Math.min(s.length-1, Math.floor(p * s.length))] || 0 }
183
+ result.debug = {
184
+ concurrency,
185
+ filesEnumMs,
186
+ runConcurrentMs: concMs,
187
+ reads: {
188
+ count: timings.readMs.length,
189
+ sumMs: sum(timings.readMs),
190
+ avgMs: timings.readMs.length ? +(sum(timings.readMs) / timings.readMs.length).toFixed(2) : 0,
191
+ p50: pctile(timings.readMs, 0.5),
192
+ p95: pctile(timings.readMs, 0.95),
193
+ p99: pctile(timings.readMs, 0.99),
194
+ max: Math.max(0, ...timings.readMs),
195
+ },
196
+ scans: {
197
+ count: timings.scanMs.length,
198
+ sumMs: sum(timings.scanMs),
199
+ avgMs: timings.scanMs.length ? +(sum(timings.scanMs) / timings.scanMs.length).toFixed(2) : 0,
200
+ p50: pctile(timings.scanMs, 0.5),
201
+ p95: pctile(timings.scanMs, 0.95),
202
+ p99: pctile(timings.scanMs, 0.99),
203
+ max: Math.max(0, ...timings.scanMs),
204
+ },
205
+ }
206
+ }
207
+
208
+ return result
209
+ }
210
+
211
+ module.exports = { search }
@@ -0,0 +1,402 @@
1
+ // CodeSynapt symbol mode — in-memory symbol graph that lives alongside
2
+ // the file-graph Scanner. Built lazily; first /symbol/* request triggers
3
+ // the scan against the currently-loaded file set.
4
+ //
5
+ // Data model and design notes live in docs/SYMBOL-MODE-PLAN.md.
6
+
7
+ 'use strict'
8
+
9
+ const fs = require('fs')
10
+ const path = require('path')
11
+
12
+ // Parser registry — extended per-language in Stage 1 / Stage 2.
13
+ // Each entry: { extractSymbols(content, fileId) → SymbolNode[],
14
+ // extractReferences(content, fileId, index) → SymbolEdge[] }
15
+ const PARSERS = Object.create(null)
16
+
17
+ // Heuristic: a file at one of these path segments isn't usually called
18
+ // from production code. Affects resolveCall — when a name has matches
19
+ // in both production and auxiliary paths, production wins. Doesn't
20
+ // hide aux symbols, just deprioritises them as call targets.
21
+ const AUX_PATH_SEGMENTS = new Set([
22
+ 'scripts', 'script', 'tools', 'tool',
23
+ 'tests', 'test', '__tests__', 'spec', 'specs',
24
+ 'examples', 'example', 'samples', 'sample', 'demo', 'demos',
25
+ 'build', 'dist', 'out', 'bin',
26
+ 'docs', 'doc',
27
+ 'fixtures', 'fixture',
28
+ 'benchmarks', 'benchmark', 'bench',
29
+ // Vendored / prebuilt bundles that ship inside source dirs
30
+ // (Next.js's packages/next/src/compiled/* is the canonical case).
31
+ // file-mode ignores top-level node_modules, but vendored copies
32
+ // inside src/ slip through; deprioritise them as call targets.
33
+ 'compiled', 'vendored', 'vendor',
34
+ ])
35
+ function isAuxPath(fileId) {
36
+ if (!fileId) return false
37
+ // Check the first path segment + any segment whose name matches.
38
+ // `tests/foo.ts`, `packages/x/scripts/y.ts`, `build/x.js` all match.
39
+ const parts = fileId.split('/')
40
+ return parts.some((p) => AUX_PATH_SEGMENTS.has(p))
41
+ }
42
+
43
+ function registerParser(extOrExts, parser) {
44
+ const exts = Array.isArray(extOrExts) ? extOrExts : [extOrExts]
45
+ for (const e of exts) PARSERS[e] = parser
46
+ }
47
+
48
+ function extFor(filePath) {
49
+ const e = path.extname(filePath).slice(1).toLowerCase()
50
+ return e
51
+ }
52
+
53
+ class SymbolGraph {
54
+ constructor() {
55
+ this.nodes = new Map() // id → SymbolNode
56
+ this.edges = [] // SymbolEdge[]
57
+ this.byFile = new Map() // fileId → Set<symbolId>
58
+ this.byName = new Map() // lowercased name → Set<symbolId>
59
+ // Adjacency for fast callers/callees lookup.
60
+ this.outAdj = new Map() // symbolId → Set<targetId>
61
+ this.inAdj = new Map() // symbolId → Set<sourceId>
62
+ // File-mode imports — fed in from the host (scanner.edges). Lets
63
+ // call resolution disambiguate same-name symbols across files
64
+ // by preferring targets in files the caller actually imports.
65
+ this.fileImports = new Map() // fileId → Set<importedFileId>
66
+ this.builtAt = 0
67
+ this.fileCount = 0
68
+ this.scanMs = 0
69
+ }
70
+
71
+ clear() {
72
+ this.nodes.clear()
73
+ this.edges.length = 0
74
+ this.byFile.clear()
75
+ this.byName.clear()
76
+ this.outAdj.clear()
77
+ this.inAdj.clear()
78
+ this.fileImports.clear()
79
+ this.builtAt = 0
80
+ this.fileCount = 0
81
+ this.scanMs = 0
82
+ }
83
+
84
+ // Best symbol match for `name` called from `fromFileId`. Preference:
85
+ // 1) same file
86
+ // 2) a file directly imported by `fromFileId`
87
+ // We deliberately *do not* fall back to "any file with that name"
88
+ // — that would link a local `request` variable in utils.ts to an
89
+ // unrelated `request()` function in some other file just because
90
+ // they share a name. AI agents downstream would get noise edges
91
+ // and follow false trails. Conservative beats clever here.
92
+ // If the host wants the loose match, set `allowAny: true`.
93
+ resolveCall(fromFileId, name, { allowAny = false } = {}) {
94
+ if (!name) return null
95
+ // Type-aware lookup: `User.method` matches a symbol whose
96
+ // qualifiedName === 'User.method' exactly. Higher priority than
97
+ // name-only matches because it narrows from "any method named X"
98
+ // to "X defined on this class".
99
+ if (name.includes('.')) {
100
+ const tail = name.split('.').pop()
101
+ const set = this.byName.get(tail.toLowerCase())
102
+ if (set) {
103
+ for (const id of set) {
104
+ const node = this.nodes.get(id)
105
+ if (node?.qualifiedName === name) return node
106
+ }
107
+ }
108
+ // No qualifiedName match — fall back to the bare method name
109
+ // through the regular path below.
110
+ name = tail
111
+ }
112
+ const set = this.byName.get(name.toLowerCase())
113
+ if (!set || !set.size) return null
114
+ let sameFile = null, imported = null
115
+ // Two-bucket fallback: prefer a production-path candidate
116
+ // over an auxiliary-path one (scripts/, test/, build/, examples/
117
+ // etc.) when nothing imported matches. Stops the case where
118
+ // production code's call to `fetch(...)` lands on a helper named
119
+ // `fetch` defined in scripts/.
120
+ let prodAny = null, auxAny = null
121
+ const callerIsAux = isAuxPath(fromFileId)
122
+ const importsOf = this.fileImports.get(fromFileId)
123
+ for (const id of set) {
124
+ const node = this.nodes.get(id)
125
+ if (!node) continue
126
+ if (node.file === fromFileId) { sameFile = node; break }
127
+ if (!imported && importsOf && importsOf.has(node.file)) imported = node
128
+ if (isAuxPath(node.file)) {
129
+ if (!auxAny) auxAny = node
130
+ } else {
131
+ if (!prodAny) prodAny = node
132
+ }
133
+ }
134
+ if (sameFile) return sameFile
135
+ if (imported) return imported
136
+ if (!allowAny) return null
137
+ // Prefer production over auxiliary unless the caller itself is
138
+ // already aux (in which case linking back into scripts/ is fine).
139
+ if (callerIsAux) return prodAny || auxAny
140
+ return prodAny || auxAny
141
+ }
142
+
143
+ addNode(node) {
144
+ this.nodes.set(node.id, node)
145
+ if (!this.byFile.has(node.file)) this.byFile.set(node.file, new Set())
146
+ this.byFile.get(node.file).add(node.id)
147
+ const key = (node.name || '').toLowerCase()
148
+ if (key) {
149
+ if (!this.byName.has(key)) this.byName.set(key, new Set())
150
+ this.byName.get(key).add(node.id)
151
+ }
152
+ }
153
+
154
+ addEdge(edge) {
155
+ this.edges.push(edge)
156
+ if (!this.outAdj.has(edge.source)) this.outAdj.set(edge.source, new Set())
157
+ this.outAdj.get(edge.source).add(edge.target)
158
+ if (!this.inAdj.has(edge.target)) this.inAdj.set(edge.target, new Set())
159
+ this.inAdj.get(edge.target).add(edge.source)
160
+ }
161
+
162
+ // Index every symbol as a 384-d MiniLM embedding so /symbol/explore
163
+ // can rerank by semantic similarity (auth ↔ login synonyms etc).
164
+ // Runs in batches of 32 to keep peak memory bounded; the caller
165
+ // typically fires this off without awaiting so the build finishes
166
+ // quickly and the embeddings populate in the background.
167
+ //
168
+ // Each symbol gets a `_embedding` Float64-style Array assigned in
169
+ // place. Falls back silently if `embedBatchFn` returns null (e.g.
170
+ // the @xenova/transformers dep isn't installed).
171
+ async embedAllSymbols(embedBatchFn, { chunkSize = 32 } = {}) {
172
+ if (this._embedded || this._embedding) return // idempotent
173
+ this._embedding = true
174
+ const ids = []
175
+ const texts = []
176
+ for (const n of this.nodes.values()) {
177
+ ids.push(n.id)
178
+ // Keep text short — MiniLM is a 128-token model, longer input
179
+ // gets truncated. name + qn + kind + first 100 chars of doc is
180
+ // enough signal to distinguish auth-shaped from db-shaped.
181
+ const doc = (n.doc || '').slice(0, 100)
182
+ texts.push(`${n.name || ''} ${n.qualifiedName || ''} ${n.kind || ''} ${doc}`.trim())
183
+ }
184
+ const start = Date.now()
185
+ let done = 0
186
+ for (let i = 0; i < texts.length; i += chunkSize) {
187
+ const batch = texts.slice(i, i + chunkSize)
188
+ const vecs = await embedBatchFn(batch)
189
+ if (!vecs) { // embed failed — give up cleanly
190
+ this._embedding = false
191
+ return false
192
+ }
193
+ for (let j = 0; j < vecs.length; j++) {
194
+ const node = this.nodes.get(ids[i + j])
195
+ if (node) node._embedding = vecs[j]
196
+ }
197
+ done += vecs.length
198
+ // Yield to the event loop between batches so concurrent HTTP
199
+ // requests (the desktop UI, the bench harness, MCP tools)
200
+ // aren't starved while a multi-second indexing pass runs.
201
+ // ONNX inference inside embedBatchFn pegs the main thread, so
202
+ // a `setImmediate` after each batch is the difference between
203
+ // "queries time out" and "queries respond within 50 ms".
204
+ await new Promise((r) => setImmediate(r))
205
+ }
206
+ this._embedded = true
207
+ this._embedding = false
208
+ this.embedMs = Date.now() - start
209
+ return true
210
+ }
211
+
212
+ // BFS along outAdj from every symbol the host considers a public
213
+ // entry (main, route handler, exported CLI bin, etc). Symbols not
214
+ // reachable from any entry are likely dead code. The host passes
215
+ // its own isEntry predicate so this module stays parser-agnostic.
216
+ //
217
+ // Important caveat documented in the explore code: our entry
218
+ // heuristic (name + path patterns) misses some real entries
219
+ // (React components, framework callbacks, decorator-bound
220
+ // handlers), so a `reachable: false` flag is a *hint*, not a
221
+ // verdict — we expose it as data and let the ranker / UI choose
222
+ // how strongly to weight it.
223
+ computeReachability(isEntry) {
224
+ const reachable = new Set()
225
+ const queue = []
226
+ for (const node of this.nodes.values()) {
227
+ try {
228
+ if (isEntry(node)) { reachable.add(node.id); queue.push(node.id) }
229
+ } catch {}
230
+ }
231
+ while (queue.length) {
232
+ const id = queue.shift()
233
+ const callees = this.outAdj.get(id)
234
+ if (!callees) continue
235
+ for (const c of callees) {
236
+ if (!reachable.has(c)) { reachable.add(c); queue.push(c) }
237
+ }
238
+ }
239
+ this._reachable = reachable
240
+ return reachable
241
+ }
242
+
243
+ callersOf(id) {
244
+ const set = this.inAdj.get(id)
245
+ if (!set) return []
246
+ return [...set].map((sid) => this.nodes.get(sid)).filter(Boolean)
247
+ }
248
+ calleesOf(id) {
249
+ const set = this.outAdj.get(id)
250
+ if (!set) return []
251
+ return [...set].map((tid) => this.nodes.get(tid)).filter(Boolean)
252
+ }
253
+ findByName(query, limit = 50) {
254
+ const q = (query || '').toLowerCase()
255
+ if (!q) return []
256
+ const matches = []
257
+ for (const [name, ids] of this.byName) {
258
+ if (name.includes(q)) {
259
+ for (const id of ids) {
260
+ const n = this.nodes.get(id)
261
+ if (n) matches.push(n)
262
+ if (matches.length >= limit) return matches
263
+ }
264
+ }
265
+ }
266
+ return matches
267
+ }
268
+
269
+ // ─── Scanning ──────────────────────────────────────────────────
270
+ // `fileEntries` is an iterable of { id, absPath, ext } — typically
271
+ // derived from the file-mode Scanner's `files` map.
272
+ // `fileImports` (optional) is a Map<fileId, Set<importedFileId>>
273
+ // built from the file-mode edge list; lets resolveCall prefer
274
+ // imported targets.
275
+ async build(fileEntries, fileImports = null, options = {}) {
276
+ const start = Date.now()
277
+ this.clear()
278
+ // Set imports *after* clear so the host-provided map survives.
279
+ if (fileImports) this.fileImports = fileImports
280
+ // Big-repo safety knobs. None of them block — they cap the work
281
+ // so a runaway monorepo (100k+ symbols) doesn't OOM the process.
282
+ const MAX_SYMBOLS = options.maxSymbols
283
+ || parseInt(process.env.CS_MAX_SYMBOLS || '200000', 10)
284
+ const MAX_EDGES = options.maxEdges
285
+ || parseInt(process.env.CS_MAX_EDGES || '1000000', 10)
286
+ const MAX_FILE_BYTES = options.maxFileBytes
287
+ || parseInt(process.env.CS_MAX_FILE_BYTES || '524288', 10) // 512KB
288
+ let abortedAt = null
289
+ // Pass 1 — symbols. We need every symbol indexed before we can
290
+ // resolve references in pass 2.
291
+ let fileCount = 0
292
+ const fileContents = new Map() // fileId → content (kept for pass 2)
293
+ for (const entry of fileEntries) {
294
+ if (this.nodes.size >= MAX_SYMBOLS) { abortedAt = 'symbols'; break }
295
+ const parser = PARSERS[entry.ext]
296
+ if (!parser) continue
297
+ let content, fileMtimeMs = 0
298
+ try {
299
+ const stat = fs.statSync(entry.absPath)
300
+ if (stat.size > MAX_FILE_BYTES) continue // skip giant files (minified bundles, vendored libs)
301
+ content = fs.readFileSync(entry.absPath, 'utf8')
302
+ fileMtimeMs = stat.mtimeMs
303
+ } catch { continue }
304
+ fileContents.set(entry.id, content)
305
+ let symbols
306
+ try {
307
+ const ret = parser.extractSymbols(content, entry.id)
308
+ symbols = (await ret) || []
309
+ } catch (e) { symbols = [] }
310
+ // Lazy split per file — used by the deprecated probe below.
311
+ // Most files have no deprecated marker, so the .test() short-
312
+ // circuits and we never pay the split cost.
313
+ let _lines = null
314
+ const lines = () => _lines ??= content.split('\n')
315
+ const DEPRECATED_RE = /@?deprecated\b|todo\s*[:_-]?\s*remove|fixme\s*[:_-]?\s*remove/i
316
+ // Stricter pattern — used ONLY against the file header so
317
+ // common code-body words like "wip"/"work in progress" don't
318
+ // false-positive entire files. The body-level probe sticks to
319
+ // the precise @deprecated / TODO remove patterns above.
320
+ const HEAD_DEPRECATED_RE = /@?deprecated\b|do\s+not\s+use\b|work\s+in\s+progress\b|\bwip[\s:]/i
321
+ const fileHasDeprecated = DEPRECATED_RE.test(content)
322
+ // File-level deprecated marker — if the first 5 lines flag the
323
+ // whole file as deprecated (common pattern: file header with
324
+ // `// @deprecated — moved to …`), tag every symbol the file
325
+ // exports. Avoids the case where the file header is far above
326
+ // any declaration's 5-line probe window. Header check uses
327
+ // HEAD_DEPRECATED_RE so a body-only deprecated marker can't
328
+ // promote a single-symbol file to "everything deprecated".
329
+ let fileLevelDeprecated = false
330
+ const head = lines().slice(0, 5).join('\n')
331
+ if (HEAD_DEPRECATED_RE.test(head)) fileLevelDeprecated = true
332
+ for (const s of symbols) {
333
+ if (this.nodes.size >= MAX_SYMBOLS) { abortedAt = 'symbols'; break }
334
+ // Stamp every symbol with the file's mtime — explore uses it
335
+ // for the `legacy` classification (old + low in-degree). Cost
336
+ // is one extra Map allocation per symbol; the stat call was
337
+ // already happening above.
338
+ s.mtimeMs = fileMtimeMs
339
+ // Deprecated marker — look at the 5 lines directly above the
340
+ // symbol declaration. Cheaper + more precise than fighting
341
+ // babel's export-wrapper leading-comment attachment quirk.
342
+ if (fileLevelDeprecated) {
343
+ s.deprecated = true
344
+ } else if (fileHasDeprecated && s.startLine) {
345
+ const start = Math.max(0, s.startLine - 1 - 5)
346
+ const prelude = lines().slice(start, s.startLine - 1).join('\n')
347
+ if (DEPRECATED_RE.test(prelude)) s.deprecated = true
348
+ }
349
+ this.addNode(s)
350
+ }
351
+ fileCount++
352
+ }
353
+ // Pass 2 — references. Per-file, ask the language parser to find
354
+ // call/extends/implements edges. Parsers consult `this` (the
355
+ // symbol index) to resolve names.
356
+ for (const [fileId, content] of fileContents) {
357
+ if (this.edges.length >= MAX_EDGES) { abortedAt = abortedAt || 'edges'; break }
358
+ const ext = extFor(fileId)
359
+ const parser = PARSERS[ext]
360
+ if (!parser || !parser.extractReferences) continue
361
+ let refs
362
+ try {
363
+ const ret = parser.extractReferences(content, fileId, this)
364
+ refs = (await ret) || []
365
+ } catch (e) { refs = [] }
366
+ for (const r of refs) {
367
+ if (this.edges.length >= MAX_EDGES) { abortedAt = abortedAt || 'edges'; break }
368
+ if (this.nodes.has(r.source) && this.nodes.has(r.target)) {
369
+ this.addEdge(r)
370
+ }
371
+ }
372
+ }
373
+ this.fileCount = fileCount
374
+ this.builtAt = Date.now()
375
+ this.scanMs = this.builtAt - start
376
+ this.abortedAt = abortedAt // null or 'symbols'/'edges'
377
+ return this.stats()
378
+ }
379
+
380
+ stats() {
381
+ const byKind = {}
382
+ for (const n of this.nodes.values()) {
383
+ byKind[n.kind] = (byKind[n.kind] || 0) + 1
384
+ }
385
+ const byEdgeKind = {}
386
+ for (const e of this.edges) {
387
+ byEdgeKind[e.kind] = (byEdgeKind[e.kind] || 0) + 1
388
+ }
389
+ return {
390
+ fileCount: this.fileCount,
391
+ symbolCount: this.nodes.size,
392
+ edgeCount: this.edges.length,
393
+ byKind,
394
+ byEdgeKind,
395
+ scanMs: this.scanMs,
396
+ builtAt: this.builtAt,
397
+ abortedAt: this.abortedAt || null, // null | 'symbols' | 'edges'
398
+ }
399
+ }
400
+ }
401
+
402
+ module.exports = { SymbolGraph, registerParser, extFor, PARSERS }