seer-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (371) hide show
  1. package/.vscode/settings.json +3 -0
  2. package/LICENSE +176 -0
  3. package/README.md +272 -0
  4. package/README_dev.md +199 -0
  5. package/dist/bundle/ci.d.ts +47 -0
  6. package/dist/bundle/ci.d.ts.map +1 -0
  7. package/dist/bundle/ci.js +113 -0
  8. package/dist/bundle/ci.js.map +1 -0
  9. package/dist/bundle/contract.d.ts +111 -0
  10. package/dist/bundle/contract.d.ts.map +1 -0
  11. package/dist/bundle/contract.js +352 -0
  12. package/dist/bundle/contract.js.map +1 -0
  13. package/dist/bundle/export.d.ts +36 -0
  14. package/dist/bundle/export.d.ts.map +1 -0
  15. package/dist/bundle/export.js +152 -0
  16. package/dist/bundle/export.js.map +1 -0
  17. package/dist/bundle/external.d.ts +66 -0
  18. package/dist/bundle/external.d.ts.map +1 -0
  19. package/dist/bundle/external.js +238 -0
  20. package/dist/bundle/external.js.map +1 -0
  21. package/dist/bundle/format.d.ts +94 -0
  22. package/dist/bundle/format.d.ts.map +1 -0
  23. package/dist/bundle/format.js +42 -0
  24. package/dist/bundle/format.js.map +1 -0
  25. package/dist/bundle/import.d.ts +49 -0
  26. package/dist/bundle/import.d.ts.map +1 -0
  27. package/dist/bundle/import.js +116 -0
  28. package/dist/bundle/import.js.map +1 -0
  29. package/dist/cli/index.d.ts +3 -0
  30. package/dist/cli/index.d.ts.map +1 -0
  31. package/dist/cli/index.js +1402 -0
  32. package/dist/cli/index.js.map +1 -0
  33. package/dist/cli/init.d.ts +48 -0
  34. package/dist/cli/init.d.ts.map +1 -0
  35. package/dist/cli/init.js +284 -0
  36. package/dist/cli/init.js.map +1 -0
  37. package/dist/db/schema.d.ts +3 -0
  38. package/dist/db/schema.d.ts.map +1 -0
  39. package/dist/db/schema.js +616 -0
  40. package/dist/db/schema.js.map +1 -0
  41. package/dist/db/store.d.ts +1011 -0
  42. package/dist/db/store.d.ts.map +1 -0
  43. package/dist/db/store.js +3888 -0
  44. package/dist/db/store.js.map +1 -0
  45. package/dist/graph/pagerank.d.ts +9 -0
  46. package/dist/graph/pagerank.d.ts.map +1 -0
  47. package/dist/graph/pagerank.js +47 -0
  48. package/dist/graph/pagerank.js.map +1 -0
  49. package/dist/indexer/architecture.d.ts +72 -0
  50. package/dist/indexer/architecture.d.ts.map +1 -0
  51. package/dist/indexer/architecture.js +112 -0
  52. package/dist/indexer/architecture.js.map +1 -0
  53. package/dist/indexer/behavior.d.ts +75 -0
  54. package/dist/indexer/behavior.d.ts.map +1 -0
  55. package/dist/indexer/behavior.js +395 -0
  56. package/dist/indexer/behavior.js.map +1 -0
  57. package/dist/indexer/boundaries.d.ts +60 -0
  58. package/dist/indexer/boundaries.d.ts.map +1 -0
  59. package/dist/indexer/boundaries.js +366 -0
  60. package/dist/indexer/boundaries.js.map +1 -0
  61. package/dist/indexer/churn.d.ts +15 -0
  62. package/dist/indexer/churn.d.ts.map +1 -0
  63. package/dist/indexer/churn.js +49 -0
  64. package/dist/indexer/churn.js.map +1 -0
  65. package/dist/indexer/classify.d.ts +9 -0
  66. package/dist/indexer/classify.d.ts.map +1 -0
  67. package/dist/indexer/classify.js +90 -0
  68. package/dist/indexer/classify.js.map +1 -0
  69. package/dist/indexer/context.d.ts +176 -0
  70. package/dist/indexer/context.d.ts.map +1 -0
  71. package/dist/indexer/context.js +193 -0
  72. package/dist/indexer/context.js.map +1 -0
  73. package/dist/indexer/continuity.d.ts +67 -0
  74. package/dist/indexer/continuity.d.ts.map +1 -0
  75. package/dist/indexer/continuity.js +288 -0
  76. package/dist/indexer/continuity.js.map +1 -0
  77. package/dist/indexer/detectchanges.d.ts +32 -0
  78. package/dist/indexer/detectchanges.d.ts.map +1 -0
  79. package/dist/indexer/detectchanges.js +74 -0
  80. package/dist/indexer/detectchanges.js.map +1 -0
  81. package/dist/indexer/discovery.d.ts +37 -0
  82. package/dist/indexer/discovery.d.ts.map +1 -0
  83. package/dist/indexer/discovery.js +136 -0
  84. package/dist/indexer/discovery.js.map +1 -0
  85. package/dist/indexer/externaldeps.d.ts +18 -0
  86. package/dist/indexer/externaldeps.d.ts.map +1 -0
  87. package/dist/indexer/externaldeps.js +288 -0
  88. package/dist/indexer/externaldeps.js.map +1 -0
  89. package/dist/indexer/freshness.d.ts +48 -0
  90. package/dist/indexer/freshness.d.ts.map +1 -0
  91. package/dist/indexer/freshness.js +128 -0
  92. package/dist/indexer/freshness.js.map +1 -0
  93. package/dist/indexer/git.d.ts +144 -0
  94. package/dist/indexer/git.d.ts.map +1 -0
  95. package/dist/indexer/git.js +444 -0
  96. package/dist/indexer/git.js.map +1 -0
  97. package/dist/indexer/index.d.ts +145 -0
  98. package/dist/indexer/index.d.ts.map +1 -0
  99. package/dist/indexer/index.js +930 -0
  100. package/dist/indexer/index.js.map +1 -0
  101. package/dist/indexer/modules.d.ts +62 -0
  102. package/dist/indexer/modules.d.ts.map +1 -0
  103. package/dist/indexer/modules.js +293 -0
  104. package/dist/indexer/modules.js.map +1 -0
  105. package/dist/indexer/preflight.d.ts +154 -0
  106. package/dist/indexer/preflight.d.ts.map +1 -0
  107. package/dist/indexer/preflight.js +399 -0
  108. package/dist/indexer/preflight.js.map +1 -0
  109. package/dist/indexer/protoScanner.d.ts +34 -0
  110. package/dist/indexer/protoScanner.d.ts.map +1 -0
  111. package/dist/indexer/protoScanner.js +133 -0
  112. package/dist/indexer/protoScanner.js.map +1 -0
  113. package/dist/indexer/risk.d.ts +115 -0
  114. package/dist/indexer/risk.d.ts.map +1 -0
  115. package/dist/indexer/risk.js +194 -0
  116. package/dist/indexer/risk.js.map +1 -0
  117. package/dist/indexer/serviceHostScanner.d.ts +25 -0
  118. package/dist/indexer/serviceHostScanner.d.ts.map +1 -0
  119. package/dist/indexer/serviceHostScanner.js +95 -0
  120. package/dist/indexer/serviceHostScanner.js.map +1 -0
  121. package/dist/indexer/serviceLinks.d.ts +105 -0
  122. package/dist/indexer/serviceLinks.d.ts.map +1 -0
  123. package/dist/indexer/serviceLinks.js +509 -0
  124. package/dist/indexer/serviceLinks.js.map +1 -0
  125. package/dist/indexer/shapehash.d.ts +98 -0
  126. package/dist/indexer/shapehash.d.ts.map +1 -0
  127. package/dist/indexer/shapehash.js +354 -0
  128. package/dist/indexer/shapehash.js.map +1 -0
  129. package/dist/indexer/skeleton.d.ts +15 -0
  130. package/dist/indexer/skeleton.d.ts.map +1 -0
  131. package/dist/indexer/skeleton.js +136 -0
  132. package/dist/indexer/skeleton.js.map +1 -0
  133. package/dist/indexer/symbolhistory.d.ts +41 -0
  134. package/dist/indexer/symbolhistory.d.ts.map +1 -0
  135. package/dist/indexer/symbolhistory.js +124 -0
  136. package/dist/indexer/symbolhistory.js.map +1 -0
  137. package/dist/indexer/watcher.d.ts +68 -0
  138. package/dist/indexer/watcher.d.ts.map +1 -0
  139. package/dist/indexer/watcher.js +179 -0
  140. package/dist/indexer/watcher.js.map +1 -0
  141. package/dist/mcp/server.d.ts +80 -0
  142. package/dist/mcp/server.d.ts.map +1 -0
  143. package/dist/mcp/server.js +1610 -0
  144. package/dist/mcp/server.js.map +1 -0
  145. package/dist/parser/index.d.ts +8 -0
  146. package/dist/parser/index.d.ts.map +1 -0
  147. package/dist/parser/index.js +33 -0
  148. package/dist/parser/index.js.map +1 -0
  149. package/dist/parser/languages/cpp.d.ts +3 -0
  150. package/dist/parser/languages/cpp.d.ts.map +1 -0
  151. package/dist/parser/languages/cpp.js +350 -0
  152. package/dist/parser/languages/cpp.js.map +1 -0
  153. package/dist/parser/languages/csharp.d.ts +3 -0
  154. package/dist/parser/languages/csharp.d.ts.map +1 -0
  155. package/dist/parser/languages/csharp.js +239 -0
  156. package/dist/parser/languages/csharp.js.map +1 -0
  157. package/dist/parser/languages/go.d.ts +3 -0
  158. package/dist/parser/languages/go.d.ts.map +1 -0
  159. package/dist/parser/languages/go.js +259 -0
  160. package/dist/parser/languages/go.js.map +1 -0
  161. package/dist/parser/languages/java.d.ts +3 -0
  162. package/dist/parser/languages/java.d.ts.map +1 -0
  163. package/dist/parser/languages/java.js +391 -0
  164. package/dist/parser/languages/java.js.map +1 -0
  165. package/dist/parser/languages/python.d.ts +3 -0
  166. package/dist/parser/languages/python.d.ts.map +1 -0
  167. package/dist/parser/languages/python.js +396 -0
  168. package/dist/parser/languages/python.js.map +1 -0
  169. package/dist/parser/languages/rust.d.ts +3 -0
  170. package/dist/parser/languages/rust.d.ts.map +1 -0
  171. package/dist/parser/languages/rust.js +159 -0
  172. package/dist/parser/languages/rust.js.map +1 -0
  173. package/dist/parser/languages/typescript.d.ts +3 -0
  174. package/dist/parser/languages/typescript.d.ts.map +1 -0
  175. package/dist/parser/languages/typescript.js +1442 -0
  176. package/dist/parser/languages/typescript.js.map +1 -0
  177. package/dist/parser/parserContext.d.ts +77 -0
  178. package/dist/parser/parserContext.d.ts.map +1 -0
  179. package/dist/parser/parserContext.js +354 -0
  180. package/dist/parser/parserContext.js.map +1 -0
  181. package/dist/parser/walker.d.ts +81 -0
  182. package/dist/parser/walker.d.ts.map +1 -0
  183. package/dist/parser/walker.js +217 -0
  184. package/dist/parser/walker.js.map +1 -0
  185. package/dist/parser/worker.d.ts +66 -0
  186. package/dist/parser/worker.d.ts.map +1 -0
  187. package/dist/parser/worker.js +129 -0
  188. package/dist/parser/worker.js.map +1 -0
  189. package/dist/parser/workerpool.d.ts +107 -0
  190. package/dist/parser/workerpool.d.ts.map +1 -0
  191. package/dist/parser/workerpool.js +383 -0
  192. package/dist/parser/workerpool.js.map +1 -0
  193. package/dist/scip/format.d.ts +87 -0
  194. package/dist/scip/format.d.ts.map +1 -0
  195. package/dist/scip/format.js +31 -0
  196. package/dist/scip/format.js.map +1 -0
  197. package/dist/scip/import.d.ts +37 -0
  198. package/dist/scip/import.d.ts.map +1 -0
  199. package/dist/scip/import.js +180 -0
  200. package/dist/scip/import.js.map +1 -0
  201. package/dist/types.d.ts +392 -0
  202. package/dist/types.d.ts.map +1 -0
  203. package/dist/types.js +4 -0
  204. package/dist/types.js.map +1 -0
  205. package/docs/architecture.md +105 -0
  206. package/docs/benchmarks/methodology.md +134 -0
  207. package/docs/benchmarks/raw-results.md +71 -0
  208. package/docs/benchmarks.md +74 -0
  209. package/docs/cli.md +148 -0
  210. package/docs/examples/behavior-tests.md +70 -0
  211. package/docs/examples/change-history.md +85 -0
  212. package/docs/examples/pre-edit-context.md +81 -0
  213. package/docs/examples/service-links.md +88 -0
  214. package/docs/examples.md +80 -0
  215. package/docs/faq.md +70 -0
  216. package/docs/internals.md +104 -0
  217. package/docs/languages.md +70 -0
  218. package/docs/limits.md +52 -0
  219. package/docs/mcp.md +199 -0
  220. package/docs/quickstart.md +119 -0
  221. package/docs/testing.md +123 -0
  222. package/docs/tools.md +115 -0
  223. package/package.json +52 -0
  224. package/research-codebase.md +578 -0
  225. package/seer-cli-docs.md +326 -0
  226. package/seer-master-guide.md +246 -0
  227. package/src/bundle/ci.ts +141 -0
  228. package/src/bundle/contract.ts +387 -0
  229. package/src/bundle/export.ts +175 -0
  230. package/src/bundle/external.ts +285 -0
  231. package/src/bundle/format.ts +92 -0
  232. package/src/bundle/import.ts +157 -0
  233. package/src/cli/index.ts +1249 -0
  234. package/src/cli/init.ts +389 -0
  235. package/src/db/schema.ts +614 -0
  236. package/src/db/store.ts +4306 -0
  237. package/src/graph/pagerank.ts +53 -0
  238. package/src/indexer/architecture.ts +148 -0
  239. package/src/indexer/behavior.ts +466 -0
  240. package/src/indexer/boundaries.ts +374 -0
  241. package/src/indexer/churn.ts +58 -0
  242. package/src/indexer/classify.ts +96 -0
  243. package/src/indexer/context.ts +340 -0
  244. package/src/indexer/continuity.ts +322 -0
  245. package/src/indexer/detectchanges.ts +94 -0
  246. package/src/indexer/discovery.ts +176 -0
  247. package/src/indexer/externaldeps.ts +243 -0
  248. package/src/indexer/freshness.ts +166 -0
  249. package/src/indexer/git.ts +453 -0
  250. package/src/indexer/index.ts +1092 -0
  251. package/src/indexer/modules.ts +358 -0
  252. package/src/indexer/preflight.ts +548 -0
  253. package/src/indexer/protoScanner.ts +147 -0
  254. package/src/indexer/risk.ts +304 -0
  255. package/src/indexer/serviceHostScanner.ts +92 -0
  256. package/src/indexer/serviceLinks.ts +543 -0
  257. package/src/indexer/shapehash.ts +370 -0
  258. package/src/indexer/skeleton.ts +169 -0
  259. package/src/indexer/symbolhistory.ts +172 -0
  260. package/src/indexer/watcher.ts +206 -0
  261. package/src/mcp/server.ts +1659 -0
  262. package/src/parser/index.ts +37 -0
  263. package/src/parser/languages/cpp.ts +361 -0
  264. package/src/parser/languages/csharp.ts +235 -0
  265. package/src/parser/languages/go.ts +259 -0
  266. package/src/parser/languages/java.ts +382 -0
  267. package/src/parser/languages/python.ts +370 -0
  268. package/src/parser/languages/rust.ts +164 -0
  269. package/src/parser/languages/typescript.ts +1435 -0
  270. package/src/parser/parserContext.ts +392 -0
  271. package/src/parser/walker.ts +306 -0
  272. package/src/parser/worker.ts +181 -0
  273. package/src/parser/workerpool.ts +448 -0
  274. package/src/scip/format.ts +83 -0
  275. package/src/scip/import.ts +216 -0
  276. package/src/types.ts +457 -0
  277. package/tests/benchmark-service-links.ts +244 -0
  278. package/tests/bug-regressions.ts +626 -0
  279. package/tests/filters.ts +264 -0
  280. package/tests/fixtures/Counter.tsx +38 -0
  281. package/tests/fixtures/caller.ts +7 -0
  282. package/tests/fixtures/collisions.ts +23 -0
  283. package/tests/fixtures/local_helper.ts +5 -0
  284. package/tests/fixtures/overloads.java +17 -0
  285. package/tests/fixtures/remote_helper.ts +4 -0
  286. package/tests/fixtures/sample.c +15 -0
  287. package/tests/fixtures/sample.cpp +47 -0
  288. package/tests/fixtures/sample.cs +62 -0
  289. package/tests/fixtures/sample.go +68 -0
  290. package/tests/fixtures/sample.h +30 -0
  291. package/tests/fixtures/sample.java +85 -0
  292. package/tests/fixtures/sample.py +46 -0
  293. package/tests/fixtures/sample.rs +78 -0
  294. package/tests/fixtures/sample.ts +76 -0
  295. package/tests/fixtures-service/HttpClients.cs +30 -0
  296. package/tests/fixtures-service/HttpClients.java +24 -0
  297. package/tests/fixtures-service/billing.ts +15 -0
  298. package/tests/fixtures-service/docker-compose.yml +15 -0
  299. package/tests/fixtures-service/gateway.ts +10 -0
  300. package/tests/fixtures-service/get_user.ts +11 -0
  301. package/tests/fixtures-service/graphql_client.ts +63 -0
  302. package/tests/fixtures-service/graphql_server.ts +30 -0
  303. package/tests/fixtures-service/grpc_client.go +30 -0
  304. package/tests/fixtures-service/http_clients.go +23 -0
  305. package/tests/fixtures-service/http_clients.py +38 -0
  306. package/tests/fixtures-service/http_clients.ts +49 -0
  307. package/tests/fixtures-service/k8s/payment-service.yaml +22 -0
  308. package/tests/fixtures-service/k8s_calls.ts +20 -0
  309. package/tests/fixtures-service/messaging.ts +87 -0
  310. package/tests/fixtures-service/trpc_client.ts +39 -0
  311. package/tests/fixtures-service/trpc_server.ts +39 -0
  312. package/tests/fixtures-service/user_service.proto +33 -0
  313. package/tests/fixtures-trackcd/Cargo.toml +11 -0
  314. package/tests/fixtures-trackcd/SpringController.java +36 -0
  315. package/tests/fixtures-trackcd/auth_service.ts +19 -0
  316. package/tests/fixtures-trackcd/complex_module.py +50 -0
  317. package/tests/fixtures-trackcd/express_app.js +30 -0
  318. package/tests/fixtures-trackcd/fastapi_app.py +49 -0
  319. package/tests/fixtures-trackcd/fastify_object_routes.js +32 -0
  320. package/tests/fixtures-trackcd/go.mod +8 -0
  321. package/tests/fixtures-trackcd/package.json +15 -0
  322. package/tests/fixtures-trackcd/requirements.txt +4 -0
  323. package/tests/fixtures-trackcd/tests/auth_service.test.ts +13 -0
  324. package/tests/fixtures-tracke/auth/AuthService.ts +23 -0
  325. package/tests/fixtures-tracke/auth/crypto.ts +7 -0
  326. package/tests/fixtures-tracke/billing/Billing.ts +20 -0
  327. package/tests/fixtures-tracke/billing/Invoice.ts +10 -0
  328. package/tests/fixtures-tracke/billing/server.ts +17 -0
  329. package/tests/fixtures-tracke/package.json +7 -0
  330. package/tests/fixtures-tracke/tests/auth.test.ts +23 -0
  331. package/tests/fixtures-tracke/tests/billing.test.ts +14 -0
  332. package/tests/fixtures-trackf/package.json +5 -0
  333. package/tests/fixtures-trackf/src/auth.ts +26 -0
  334. package/tests/fixtures-trackf/src/handlers.ts +35 -0
  335. package/tests/fixtures-tracki/billing/routes.ts +12 -0
  336. package/tests/fixtures-tracki/gateway/client.ts +13 -0
  337. package/tests/git-features.ts +267 -0
  338. package/tests/init.ts +141 -0
  339. package/tests/mcp-jit.ts +130 -0
  340. package/tests/mcp-smoke.ts +191 -0
  341. package/tests/mcp-trackcd.ts +169 -0
  342. package/tests/mcp-tracke.ts +229 -0
  343. package/tests/mcp-trackf.ts +330 -0
  344. package/tests/mcp-trackg.ts +219 -0
  345. package/tests/mcp-tracki.ts +174 -0
  346. package/tests/mcp-watcher.ts +126 -0
  347. package/tests/optspec.ts +194 -0
  348. package/tests/parallel-index.ts +333 -0
  349. package/tests/parallel-read.ts +125 -0
  350. package/tests/parallel-recovery.ts +241 -0
  351. package/tests/perf-callers.ts +145 -0
  352. package/tests/query-parity.ts +184 -0
  353. package/tests/query-perf.ts +55 -0
  354. package/tests/scale-parallel-parity.ts +225 -0
  355. package/tests/scale-test.ts +523 -0
  356. package/tests/smoke.ts +396 -0
  357. package/tests/trackcd.ts +325 -0
  358. package/tests/tracke-collisions.ts +255 -0
  359. package/tests/tracke.ts +314 -0
  360. package/tests/trackf-bugs.ts +406 -0
  361. package/tests/trackf.ts +390 -0
  362. package/tests/trackg.ts +1372 -0
  363. package/tests/tracki-boundaries.ts +202 -0
  364. package/tests/tracki-continuity.ts +253 -0
  365. package/tests/tracki-contract-diff.ts +249 -0
  366. package/tests/tracki-external-bundles.ts +341 -0
  367. package/tests/tracki-preflight.ts +251 -0
  368. package/tests/verify-roles.ts +51 -0
  369. package/tests/worker-parity.ts +286 -0
  370. package/tests/worker-pool.ts +262 -0
  371. package/tsconfig.json +20 -0
@@ -0,0 +1,4306 @@
1
+ import path from 'path';
2
+ import { DatabaseSync, StatementSync } from 'node:sqlite';
3
+ import { CURRENT_SCHEMA_VERSION, SCHEMA_SQL } from './schema.js';
4
+ import type {
5
+ SymbolDef, SymbolKind, SymbolRole, SymbolRow, CallerRow, CalleeRow, StatsRow,
6
+ RouteRow, ExternalDepRow, ConfigKeyRow, FileChurnRow, SymbolHistoryRow,
7
+ } from '../types.js';
8
+
9
+ /**
10
+ * Which symbol kinds participate in PageRank, ranking, and the default
11
+ * symbol list. Functions/methods/constructors/classes are rankable because
12
+ * they are call targets — edges flow into them and meaningful behavior lives
13
+ * there. Structs, enums, type aliases, interfaces, and variables are not
14
+ * rankable: they are type/state declarations, not call targets.
15
+ *
16
+ * Excluding non-rankable kinds from PageRank is a correctness fix as much as
17
+ * an optimization. With them included, the graph has hundreds of thousands of
18
+ * isolated zero-edge nodes (every struct/enum row) that absorb the (1-d)/n
19
+ * mass on each iteration but never propagate it. That dilutes every real
20
+ * function's score and inflates compute time linearly with the noise count.
21
+ */
22
+ const RANKABLE_KINDS: ReadonlySet<SymbolKind> = new Set<SymbolKind>([
23
+ 'function', 'method', 'constructor', 'class',
24
+ ]);
25
+
26
+ const SERVICE_CALLS_BACKFILL_VERSION = '1';
27
+
28
+ export function isRankableKind(kind: string): boolean {
29
+ return RANKABLE_KINDS.has(kind as SymbolKind);
30
+ }
31
+
32
+ // Typed wrapper around node:sqlite rows (which use null prototypes)
33
+ type Row = Record<string, unknown>;
34
+
35
+ function toNum(v: unknown): number { return Number(v); }
36
+ /** Escape SQLite LIKE metacharacters (`%`, `_`, `\`) for use with ESCAPE '\'.
37
+ * Lets a literal filename like `bom_crlf.ts` match without `_` acting as a
38
+ * single-char wildcard. */
39
+ function escapeLike(s: string): string {
40
+ return s.replace(/[\\%_]/g, m => '\\' + m);
41
+ }
42
+ function toStr(v: unknown): string { return String(v ?? ''); }
43
+ function toNullStr(v: unknown): string | null { return v == null ? null : String(v); }
44
+ function toNullNum(v: unknown): number | null { return v == null ? null : Number(v); }
45
+
46
+ /**
47
+ * Convert a 64-bit unsigned bigint shape hash into a signed bigint suitable
48
+ * for storage in an SQLite INTEGER column. We treat the high bit as the sign,
49
+ * so `0x8000_0000_0000_0000` and above wrap into negative values; this round-
50
+ * trips losslessly with `toUnsignedI64` below.
51
+ */
52
+ function toSignedI64(u: bigint): bigint {
53
+ const MAX_I64 = 0x7FFFFFFFFFFFFFFFn;
54
+ return u > MAX_I64 ? u - 0x10000000000000000n : u;
55
+ }
56
+ function toUnsignedI64(v: unknown): bigint {
57
+ if (v == null) return 0n;
58
+ const b = typeof v === 'bigint' ? v : BigInt(Number(v));
59
+ return b < 0n ? b + 0x10000000000000000n : b;
60
+ }
61
+
62
+ export interface EdgeResolutionStats {
63
+ sameFile: number;
64
+ imported: number;
65
+ global: number;
66
+ total: number;
67
+ }
68
+
69
+ /**
70
+ * What kind of code a file is. Used to keep project-owned code from being
71
+ * drowned out by vendored or generated boilerplate in ranking and search.
72
+ */
73
+ export type FileRole = 'project' | 'vendor' | 'generated' | 'test';
74
+
75
+ export interface FileClassification {
76
+ role: FileRole;
77
+ isVendor: 0 | 1;
78
+ isGenerated: 0 | 1;
79
+ }
80
+
81
+ export interface SymbolSearchOptions {
82
+ limit?: number;
83
+ includeVendor?: boolean;
84
+ includeGenerated?: boolean;
85
+ /**
86
+ * When false (default for agent-facing search/ranking), file-role=test
87
+ * symbols are filtered out. seer_behavior bypasses this because the test
88
+ * relationship IS its content; everything else (top symbols, search, deps,
89
+ * complexity) should default to non-test code so agents don't get drowned
90
+ * in test names. Indexer-wide test indexing stays on so seer_behavior keeps
91
+ * working — this filter is purely query-side.
92
+ */
93
+ includeTests?: boolean;
94
+ /**
95
+ * When false (default), rows where symbol_role='declaration' (forward
96
+ * declarations, C++ class-body method declarations whose bodies live
97
+ * out-of-line) are hidden. Pass true to include them — useful for
98
+ * "show me every place this method is announced" workflows.
99
+ */
100
+ includeDeclarations?: boolean;
101
+ /**
102
+ * When false (default), symbol_role='type_ref' rows stay hidden. Currently
103
+ * Seer's extractors never emit type-ref rows, so the flag is a forward-
104
+ * looking opt-in for future indexing modes that materialize them.
105
+ */
106
+ includeTypeRefs?: boolean;
107
+ }
108
+
109
+ /**
110
+ * Build the per-table predicate clauses for the default project-first lens.
111
+ * Used by `findSymbols` / `getDefinition` / `getTopSymbols` / `countSymbols`
112
+ * and the MCP tool wrappers around them. Each `include*` flag turns OFF the
113
+ * corresponding restriction.
114
+ *
115
+ * The function is forgiving about pre-v4 / pre-v5 DBs: when the role columns
116
+ * or the symbol_role column don't exist on disk, the corresponding clauses
117
+ * are simply dropped so a read-only open against an old index keeps working.
118
+ */
119
+ function buildRoleFilter(
120
+ filePrefix: string,
121
+ includeVendor: boolean,
122
+ includeGenerated: boolean,
123
+ hasRoleColumns: boolean,
124
+ options?: {
125
+ symbolPrefix?: string;
126
+ includeTests?: boolean;
127
+ includeDeclarations?: boolean;
128
+ includeTypeRefs?: boolean;
129
+ hasSymbolRoleColumn?: boolean;
130
+ },
131
+ ): string {
132
+ const clauses: string[] = [];
133
+ if (hasRoleColumns) {
134
+ if (!includeVendor) clauses.push(`${filePrefix}is_vendor = 0`);
135
+ if (!includeGenerated) clauses.push(`${filePrefix}is_generated = 0`);
136
+ if (options && options.includeTests === false) clauses.push(`${filePrefix}role <> 'test'`);
137
+ }
138
+ if (options?.hasSymbolRoleColumn) {
139
+ const sp = options.symbolPrefix ?? 's.';
140
+ if (options.includeDeclarations === false) clauses.push(`${sp}symbol_role <> 'declaration'`);
141
+ if (options.includeTypeRefs === false) clauses.push(`${sp}symbol_role <> 'type_ref'`);
142
+ }
143
+ return clauses.length === 0 ? '' : 'AND ' + clauses.join(' AND ');
144
+ }
145
+
146
+ /**
147
+ * Resolve the agent-facing query defaults for the include-flags. The contract:
148
+ * - vendor / generated stay hidden by default (existing behavior).
149
+ * - tests stay hidden by default for ranking/search tools, on top of the
150
+ * existing file-role classification. seer_behavior overrides via
151
+ * includeTests=true since tests ARE its content.
152
+ * - declarations stay hidden by default so callers/top-by-rank focus on
153
+ * real definition sites.
154
+ * - type_refs stay hidden by default (and aren't even produced yet).
155
+ */
156
+ function resolveSearchFlags(opts: SymbolSearchOptions): {
157
+ includeVendor: boolean;
158
+ includeGenerated: boolean;
159
+ includeTests: boolean;
160
+ includeDeclarations: boolean;
161
+ includeTypeRefs: boolean;
162
+ } {
163
+ return {
164
+ includeVendor: opts.includeVendor ?? false,
165
+ includeGenerated: opts.includeGenerated ?? false,
166
+ includeTests: opts.includeTests ?? false,
167
+ includeDeclarations: opts.includeDeclarations ?? false,
168
+ includeTypeRefs: opts.includeTypeRefs ?? false,
169
+ };
170
+ }
171
+
172
+ export interface StoreOptions {
173
+ readonly?: boolean;
174
+ busyTimeoutMs?: number;
175
+ }
176
+
177
+ export interface SchemaInfo {
178
+ dbVersion: number;
179
+ buildVersion: number;
180
+ current: boolean;
181
+ }
182
+
183
+ /**
184
+ * Split an identifier into searchable tokens. Used at FTS-insert time so a
185
+ * query for "auth" finds `AuthService`, `auth_service`, `authService`, and
186
+ * `AuthServiceImpl` alike.
187
+ *
188
+ * - splits on _ and -
189
+ * - splits camelCase boundaries (`AuthService` → "Auth Service Auth_Service")
190
+ * - splits consecutive caps like XMLParser → "XML Parser"
191
+ * - always includes the original token so prefix matches still work
192
+ */
193
+ export function splitIdentifierTokens(s: string): string {
194
+ if (!s) return '';
195
+ const seen = new Set<string>();
196
+ const push = (t: string): void => { if (t) seen.add(t.toLowerCase()); };
197
+ push(s);
198
+ // Split on . _ - / : ::
199
+ for (const part of s.split(/[._\-/:]+/)) {
200
+ push(part);
201
+ // CamelCase / PascalCase split: split before an upper-case letter that's
202
+ // either preceded by a lower-case letter, or followed by a lower-case
203
+ // letter when preceded by another upper-case letter (XMLParser → XML, Parser).
204
+ const camel = part.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
205
+ .replace(/([A-Z])([A-Z][a-z])/g, '$1 $2');
206
+ for (const tok of camel.split(/\s+/)) push(tok);
207
+ }
208
+ return Array.from(seen).join(' ');
209
+ }
210
+
211
+ export class Store {
212
+ private db: DatabaseSync;
213
+ private readonly readonly: boolean;
214
+ private cachedSchemaInfo: SchemaInfo;
215
+ private hasRoleColumns: boolean;
216
+ private hasComplexityColumns: boolean;
217
+ private hasV4Tables: boolean;
218
+ /**
219
+ * True when the v5 `symbols.symbol_role` column exists. Read-only opens
220
+ * against a pre-v5 DB transparently skip declaration/type_ref filtering;
221
+ * writer opens always have it since runMigrations() adds the column.
222
+ */
223
+ private hasSymbolRoleColumn: boolean;
224
+ /**
225
+ * True when the v6 module tables (modules / module_members / module_edges)
226
+ * exist. Read-only opens against a pre-v6 DB skip module queries gracefully
227
+ * (they return empty arrays); writer opens always have it.
228
+ */
229
+ private hasModuleTables: boolean;
230
+ /**
231
+ * True when the v7 provenance/shape_hash columns + scip_imports table exist.
232
+ * Read-only opens against a pre-v7 DB return empty arrays for SCIP / dup
233
+ * queries and skip the provenance column on selects.
234
+ */
235
+ private hasV7Columns: boolean;
236
+ /**
237
+ * True when v10 external_bundles / boundaries / symbol_history_continuity
238
+ * tables exist. Read-only opens against a pre-v10 DB return empty arrays.
239
+ */
240
+ private hasV10Tables: boolean;
241
+
242
+ // Prepared statements — initialized in constructor (writer path only)
243
+ private stmtUpsertFile!: StatementSync;
244
+ private stmtInsertSymbol!: StatementSync;
245
+ private stmtInsertEdge!: StatementSync;
246
+ private stmtInsertFileImport!: StatementSync;
247
+ private stmtInsertRoute!: StatementSync;
248
+ private stmtInsertConfigKey!: StatementSync;
249
+ private stmtInsertExternalDep!: StatementSync;
250
+ private stmtInsertServiceCall!: StatementSync;
251
+ private stmtInsertServiceLink!: StatementSync;
252
+ private stmtInsertSymbolsFts!: StatementSync;
253
+ private stmtInsertFilesFts!: StatementSync;
254
+ private stmtDeleteSymbolsFtsForFile!: StatementSync;
255
+ private stmtDeleteFilesFtsForFile!: StatementSync;
256
+
257
+ constructor(dbPath: string, options: StoreOptions = {}) {
258
+ this.readonly = Boolean(options.readonly);
259
+ const busyMs = options.busyTimeoutMs ?? 5000;
260
+
261
+ if (this.readonly) {
262
+ this.db = new DatabaseSync(dbPath, { readOnly: true });
263
+ try { this.db.exec(`PRAGMA busy_timeout = ${busyMs}; PRAGMA query_only = ON;`); }
264
+ catch { /* best effort */ }
265
+ } else {
266
+ this.db = new DatabaseSync(dbPath);
267
+ this.db.exec(SCHEMA_SQL);
268
+ try { this.db.exec(`PRAGMA busy_timeout = ${busyMs};`); }
269
+ catch { /* best effort */ }
270
+ this.runMigrations();
271
+ this.prepare();
272
+ }
273
+ this.cachedSchemaInfo = this.readSchemaInfo();
274
+ this.hasRoleColumns = this.checkHasRoleColumns();
275
+ this.hasComplexityColumns = this.hasColumn('symbols', 'cyclomatic');
276
+ this.hasV4Tables = this.checkHasV4Tables();
277
+ this.hasSymbolRoleColumn = this.hasColumn('symbols', 'symbol_role');
278
+ this.hasModuleTables = this.checkHasModuleTables();
279
+ this.hasV7Columns = this.hasColumn('symbols', 'provenance') && this.hasColumn('symbols', 'shape_hash');
280
+ this.hasV10Tables = this.checkHasV10Tables();
281
+ }
282
+
283
+ private checkHasV10Tables(): boolean {
284
+ try {
285
+ const rows = this.db.prepare(
286
+ "SELECT name FROM sqlite_master WHERE type='table' AND name IN ('external_bundles','boundaries','boundary_members','boundary_edges','symbol_history_continuity')"
287
+ ).all() as Row[];
288
+ return rows.length === 5;
289
+ } catch {
290
+ return false;
291
+ }
292
+ }
293
+
294
+ private checkHasModuleTables(): boolean {
295
+ try {
296
+ const rows = this.db.prepare(
297
+ "SELECT name FROM sqlite_master WHERE type='table' AND name IN ('modules','module_members','module_edges')"
298
+ ).all() as Row[];
299
+ return rows.length === 3;
300
+ } catch {
301
+ return false;
302
+ }
303
+ }
304
+
305
+ private checkHasRoleColumns(): boolean {
306
+ try {
307
+ const cols = this.db.prepare('PRAGMA table_info(files)').all() as Row[];
308
+ const names = new Set(cols.map(c => toStr(c.name)));
309
+ return names.has('role') && names.has('is_vendor') && names.has('is_generated');
310
+ } catch {
311
+ return false;
312
+ }
313
+ }
314
+
315
+ private checkHasV4Tables(): boolean {
316
+ try {
317
+ const rows = this.db.prepare(
318
+ "SELECT name FROM sqlite_master WHERE type='table' AND name IN ('routes','external_dependencies','config_keys','file_churn','symbol_history','git_index_state')"
319
+ ).all() as Row[];
320
+ return rows.length === 6;
321
+ } catch {
322
+ return false;
323
+ }
324
+ }
325
+
326
+ static openReadOnly(dbPath: string, busyTimeoutMs?: number): Store {
327
+ return new Store(dbPath, { readonly: true, busyTimeoutMs });
328
+ }
329
+
330
+ isReadOnly(): boolean { return this.readonly; }
331
+
332
+ private assertWritable(): void {
333
+ if (this.readonly) {
334
+ throw new Error('Store is read-only; open a writable Store to mutate the index');
335
+ }
336
+ }
337
+
338
+ schemaInfo(): SchemaInfo { return this.cachedSchemaInfo; }
339
+
340
+ /**
341
+ * v8 Track-G migration guard. When an existing v7 DB is opened by v8 code,
342
+ * service_calls/service_links tables are created empty. A normal cached
343
+ * re-index would skip every unchanged file, so service_calls would remain
344
+ * empty forever. Until an index run marks this backfill version complete,
345
+ * the indexer must force one full parse pass.
346
+ */
347
+ needsServiceCallBackfill(): boolean {
348
+ try {
349
+ const row = this.db.prepare(
350
+ "SELECT value FROM _schema_meta WHERE key = 'service_calls_backfilled'",
351
+ ).get() as Row | undefined;
352
+ if (row && toStr(row.value) === SERVICE_CALLS_BACKFILL_VERSION) return false;
353
+ const files = this.db.prepare('SELECT COUNT(*) AS c FROM files').get() as Row;
354
+ return toNum(files.c) > 0;
355
+ } catch {
356
+ return false;
357
+ }
358
+ }
359
+
360
+ markServiceCallsBackfilled(): void {
361
+ this.assertWritable();
362
+ this.db.prepare(
363
+ "INSERT INTO _schema_meta (key, value) VALUES ('service_calls_backfilled', ?) " +
364
+ "ON CONFLICT(key) DO UPDATE SET value = excluded.value",
365
+ ).run(SERVICE_CALLS_BACKFILL_VERSION);
366
+ }
367
+
368
+ private readSchemaInfo(): SchemaInfo {
369
+ let dbVersion = 0;
370
+ try {
371
+ const row = this.db.prepare(
372
+ "SELECT value FROM _schema_meta WHERE key = 'schema_version'",
373
+ ).get() as Row | undefined;
374
+ if (row) dbVersion = parseInt(toStr(row.value), 10) || 0;
375
+ } catch { /* */ }
376
+ return {
377
+ dbVersion,
378
+ buildVersion: CURRENT_SCHEMA_VERSION,
379
+ current: dbVersion === CURRENT_SCHEMA_VERSION,
380
+ };
381
+ }
382
+
383
+ private runMigrations(): void {
384
+ this.addColumnIfMissing('symbols', 'qualified_name', 'TEXT');
385
+ this.addColumnIfMissing(
386
+ 'file_imports',
387
+ 'resolved_file_id',
388
+ 'INTEGER REFERENCES files(id) ON DELETE SET NULL',
389
+ );
390
+ this.addColumnIfMissing('files', 'role', "TEXT NOT NULL DEFAULT 'project'");
391
+ this.addColumnIfMissing('files', 'is_vendor', 'INTEGER NOT NULL DEFAULT 0');
392
+ this.addColumnIfMissing('files', 'is_generated', 'INTEGER NOT NULL DEFAULT 0');
393
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_files_role ON files(role)');
394
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_files_is_vendor ON files(is_vendor)');
395
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_files_is_generated ON files(is_generated)');
396
+
397
+ // v3: is_rankable
398
+ const isV3Migration = !this.hasColumn('symbols', 'is_rankable');
399
+ this.addColumnIfMissing('symbols', 'is_rankable', 'INTEGER NOT NULL DEFAULT 1');
400
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_symbols_is_rankable ON symbols(is_rankable)');
401
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_symbols_file_name ON symbols(file_id, name)');
402
+ if (isV3Migration) {
403
+ this.db.prepare(
404
+ `UPDATE symbols SET is_rankable = 0 WHERE kind NOT IN ('function','method','constructor','class')`,
405
+ ).run();
406
+ this.db.prepare('UPDATE symbols SET pagerank = 0 WHERE is_rankable = 0').run();
407
+ }
408
+
409
+ // v4: complexity columns, symbol_key, edges.kind index
410
+ const isV4Migration = !this.hasColumn('symbols', 'symbol_key');
411
+ this.addColumnIfMissing('symbols', 'loc', 'INTEGER');
412
+ this.addColumnIfMissing('symbols', 'cyclomatic', 'INTEGER');
413
+ this.addColumnIfMissing('symbols', 'cognitive', 'INTEGER');
414
+ this.addColumnIfMissing('symbols', 'max_nesting', 'INTEGER');
415
+ this.addColumnIfMissing('symbols', 'symbol_key', 'TEXT');
416
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_symbols_symbol_key ON symbols(symbol_key)');
417
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind)');
418
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_edges_from_to_kind ON edges(from_id, to_id, kind)');
419
+
420
+ // v4.1: separate history HEAD marker so churn doesn't poison the
421
+ // skip-if-unchanged check used by buildSymbolHistory. Cheap ALTER ADD;
422
+ // existing DBs get NULL which forces history to run on next invocation.
423
+ this.addColumnIfMissing('git_index_state', 'last_history_head_sha', 'TEXT');
424
+ this.addColumnIfMissing('git_index_state', 'last_history_at', 'INTEGER');
425
+
426
+ // v5: symbol_role on symbols. The NOT NULL DEFAULT 'definition' on the
427
+ // ALTER means every pre-v5 row gets a sane default without an explicit
428
+ // UPDATE backfill. The role only changes its meaning when the indexer
429
+ // re-runs against the file (e.g. for C/C++ fixtures where field_declaration
430
+ // is now emitted as 'declaration').
431
+ this.addColumnIfMissing('symbols', 'symbol_role', "TEXT NOT NULL DEFAULT 'definition'");
432
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_symbols_symbol_role ON symbols(symbol_role)');
433
+
434
+ // v7: provenance + shape_hash on symbols/edges, plus scip_imports table.
435
+ // ALTER ADD COLUMN paths are cheap and idempotent; the index creation is
436
+ // guarded by hasColumn so a partial migration on an older DB doesn't fail.
437
+ this.addColumnIfMissing('symbols', 'provenance', "TEXT NOT NULL DEFAULT 'tree-sitter'");
438
+ this.addColumnIfMissing('symbols', 'shape_hash', 'INTEGER');
439
+ this.addColumnIfMissing('edges', 'provenance', "TEXT NOT NULL DEFAULT 'tree-sitter'");
440
+ // v7.1 — scip_import_id links a SCIP-provenance row back to the
441
+ // scip_imports table entry that produced it, so re-importing or clearing
442
+ // ONE SCIP layer doesn't nuke rows contributed by sibling layers (the
443
+ // original v7 wipe was global, which collapsed multi-layer setups).
444
+ this.addColumnIfMissing('symbols', 'scip_import_id', 'INTEGER');
445
+ this.addColumnIfMissing('edges', 'scip_import_id', 'INTEGER');
446
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_symbols_provenance ON symbols(provenance)');
447
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_symbols_shape_hash ON symbols(shape_hash) WHERE shape_hash IS NOT NULL');
448
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_edges_provenance ON edges(provenance)');
449
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_symbols_scip_import ON symbols(scip_import_id)');
450
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_edges_scip_import ON edges(scip_import_id)');
451
+ this.db.exec(`
452
+ CREATE TABLE IF NOT EXISTS scip_imports (
453
+ id INTEGER PRIMARY KEY,
454
+ path TEXT NOT NULL,
455
+ sha256 TEXT NOT NULL,
456
+ tool TEXT,
457
+ project_root TEXT,
458
+ imported_at INTEGER NOT NULL,
459
+ symbol_count INTEGER NOT NULL DEFAULT 0,
460
+ ref_count INTEGER NOT NULL DEFAULT 0,
461
+ UNIQUE(path, sha256)
462
+ );
463
+ CREATE INDEX IF NOT EXISTS idx_scip_imports_path ON scip_imports(path);
464
+ `);
465
+
466
+ // v6: modules + module_members + module_edges. CREATE TABLE IF NOT EXISTS
467
+ // is the migration — pre-v6 DBs get the tables on first writer open.
468
+ // No backfill needed: the clustering pass repopulates them on the next
469
+ // index run (it always runs when the graph changed; otherwise the cached
470
+ // membership stays valid because the graph it was built from stays valid).
471
+ this.db.exec(`
472
+ CREATE TABLE IF NOT EXISTS modules (
473
+ id INTEGER PRIMARY KEY,
474
+ label TEXT NOT NULL,
475
+ size_files INTEGER NOT NULL DEFAULT 0,
476
+ size_symbols INTEGER NOT NULL DEFAULT 0,
477
+ primary_language TEXT,
478
+ cohesion REAL NOT NULL DEFAULT 0,
479
+ centrality REAL NOT NULL DEFAULT 0,
480
+ computed_at INTEGER NOT NULL DEFAULT 0,
481
+ algorithm TEXT NOT NULL DEFAULT 'louvain'
482
+ );
483
+ CREATE INDEX IF NOT EXISTS idx_modules_label ON modules(label);
484
+ CREATE INDEX IF NOT EXISTS idx_modules_centrality ON modules(centrality DESC);
485
+ CREATE INDEX IF NOT EXISTS idx_modules_size ON modules(size_files DESC);
486
+ CREATE TABLE IF NOT EXISTS module_members (
487
+ file_id INTEGER PRIMARY KEY REFERENCES files(id) ON DELETE CASCADE,
488
+ module_id INTEGER NOT NULL REFERENCES modules(id) ON DELETE CASCADE
489
+ );
490
+ CREATE INDEX IF NOT EXISTS idx_module_members_module ON module_members(module_id);
491
+ CREATE TABLE IF NOT EXISTS module_edges (
492
+ id INTEGER PRIMARY KEY,
493
+ from_module_id INTEGER NOT NULL REFERENCES modules(id) ON DELETE CASCADE,
494
+ to_module_id INTEGER NOT NULL REFERENCES modules(id) ON DELETE CASCADE,
495
+ kind TEXT NOT NULL DEFAULT 'call',
496
+ weight INTEGER NOT NULL DEFAULT 1,
497
+ UNIQUE(from_module_id, to_module_id, kind)
498
+ );
499
+ CREATE INDEX IF NOT EXISTS idx_module_edges_from ON module_edges(from_module_id);
500
+ CREATE INDEX IF NOT EXISTS idx_module_edges_to ON module_edges(to_module_id);
501
+ `);
502
+
503
+ // v8: Track-G service_calls + service_links. CREATE TABLE IF NOT EXISTS
504
+ // is the migration. Existing cached DBs need one forced parse pass to
505
+ // populate service_calls; needsServiceCallBackfill() + the indexer marker
506
+ // handle that so unchanged hashes do not leave the tables empty forever.
507
+ this.db.exec(`
508
+ CREATE TABLE IF NOT EXISTS service_calls (
509
+ id INTEGER PRIMARY KEY,
510
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
511
+ symbol_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL,
512
+ protocol TEXT NOT NULL,
513
+ method TEXT,
514
+ raw_target TEXT NOT NULL,
515
+ normalized_path TEXT,
516
+ host_hint TEXT,
517
+ env_key TEXT,
518
+ framework TEXT NOT NULL,
519
+ line INTEGER NOT NULL DEFAULT 0,
520
+ confidence REAL NOT NULL DEFAULT 0.5
521
+ );
522
+ CREATE INDEX IF NOT EXISTS idx_service_calls_symbol_id ON service_calls(symbol_id);
523
+ CREATE INDEX IF NOT EXISTS idx_service_calls_path ON service_calls(normalized_path);
524
+ CREATE INDEX IF NOT EXISTS idx_service_calls_protocol ON service_calls(protocol);
525
+ CREATE INDEX IF NOT EXISTS idx_service_calls_file_id ON service_calls(file_id);
526
+
527
+ CREATE TABLE IF NOT EXISTS service_links (
528
+ id INTEGER PRIMARY KEY,
529
+ call_id INTEGER NOT NULL REFERENCES service_calls(id) ON DELETE CASCADE,
530
+ route_id INTEGER REFERENCES routes(id) ON DELETE CASCADE,
531
+ caller_symbol_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL,
532
+ handler_symbol_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL,
533
+ protocol TEXT NOT NULL,
534
+ match_kind TEXT NOT NULL,
535
+ confidence REAL NOT NULL,
536
+ evidence_json TEXT NOT NULL DEFAULT '{}'
537
+ );
538
+ CREATE INDEX IF NOT EXISTS idx_service_links_call_id ON service_links(call_id);
539
+ CREATE INDEX IF NOT EXISTS idx_service_links_handler ON service_links(handler_symbol_id);
540
+ CREATE INDEX IF NOT EXISTS idx_service_links_caller ON service_links(caller_symbol_id);
541
+ CREATE INDEX IF NOT EXISTS idx_service_links_protocol ON service_links(protocol);
542
+ CREATE INDEX IF NOT EXISTS idx_service_links_match_kind ON service_links(match_kind);
543
+ `);
544
+
545
+ // v9: Track-H protocol expansion. Adds generalized columns to service_calls
546
+ // and routes so non-HTTP protocols (tRPC / GraphQL / gRPC / Kafka / etc.)
547
+ // can be stored alongside HTTP without one column per protocol. All
548
+ // additions are nullable (or default 'http' for routes.protocol) so v8 DBs
549
+ // upgrade in-place with no data rewrite. Existing HTTP rows keep working
550
+ // unchanged because the resolver still matches on normalized_path + method
551
+ // when the new fields are NULL.
552
+ this.addColumnIfMissing('service_calls', 'operation', 'TEXT');
553
+ this.addColumnIfMissing('service_calls', 'topic', 'TEXT');
554
+ this.addColumnIfMissing('service_calls', 'queue', 'TEXT');
555
+ this.addColumnIfMissing('service_calls', 'exchange', 'TEXT');
556
+ this.addColumnIfMissing('service_calls', 'service', 'TEXT');
557
+ this.addColumnIfMissing('service_calls', 'broker', 'TEXT');
558
+ this.addColumnIfMissing('service_calls', 'metadata_json', 'TEXT');
559
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_service_calls_operation ON service_calls(operation) WHERE operation IS NOT NULL');
560
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_service_calls_topic ON service_calls(topic) WHERE topic IS NOT NULL');
561
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_service_calls_queue ON service_calls(queue) WHERE queue IS NOT NULL');
562
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_service_calls_service ON service_calls(service) WHERE service IS NOT NULL');
563
+
564
+ this.addColumnIfMissing('routes', 'protocol', "TEXT NOT NULL DEFAULT 'http'");
565
+ this.addColumnIfMissing('routes', 'operation', 'TEXT');
566
+ this.addColumnIfMissing('routes', 'topic', 'TEXT');
567
+ this.addColumnIfMissing('routes', 'queue', 'TEXT');
568
+ this.addColumnIfMissing('routes', 'exchange', 'TEXT');
569
+ this.addColumnIfMissing('routes', 'service', 'TEXT');
570
+ this.addColumnIfMissing('routes', 'broker', 'TEXT');
571
+ this.addColumnIfMissing('routes', 'metadata_json', 'TEXT');
572
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_routes_protocol ON routes(protocol)');
573
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_routes_operation ON routes(operation) WHERE operation IS NOT NULL');
574
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_routes_topic ON routes(topic) WHERE topic IS NOT NULL');
575
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_routes_queue ON routes(queue) WHERE queue IS NOT NULL');
576
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_routes_service ON routes(service) WHERE service IS NOT NULL');
577
+
578
+ // v10 — external bundle layers + monorepo boundaries + history continuity.
579
+ // CREATE IF NOT EXISTS + ALTER ADD COLUMN keep older DBs upgradable
580
+ // without data rewrites. The default values are chosen so HTTP/local
581
+ // behavior is unchanged on rows that don't set the new fields.
582
+ this.db.exec(`
583
+ CREATE TABLE IF NOT EXISTS external_bundles (
584
+ id INTEGER PRIMARY KEY,
585
+ source_kind TEXT NOT NULL DEFAULT 'external-bundle',
586
+ bundle_path TEXT NOT NULL,
587
+ external_project TEXT,
588
+ external_version TEXT,
589
+ external_hash TEXT,
590
+ schema_version INTEGER NOT NULL DEFAULT 0,
591
+ imported_at INTEGER NOT NULL,
592
+ routes_imported INTEGER NOT NULL DEFAULT 0,
593
+ service_calls_imported INTEGER NOT NULL DEFAULT 0,
594
+ service_links_imported INTEGER NOT NULL DEFAULT 0,
595
+ UNIQUE(bundle_path)
596
+ );
597
+ CREATE INDEX IF NOT EXISTS idx_external_bundles_project ON external_bundles(external_project);
598
+ CREATE TABLE IF NOT EXISTS boundaries (
599
+ id INTEGER PRIMARY KEY,
600
+ label TEXT NOT NULL,
601
+ kind TEXT NOT NULL DEFAULT 'package',
602
+ root_rel_path TEXT NOT NULL,
603
+ manifest_path TEXT,
604
+ ecosystem TEXT,
605
+ size_files INTEGER NOT NULL DEFAULT 0,
606
+ computed_at INTEGER NOT NULL DEFAULT 0,
607
+ UNIQUE(root_rel_path)
608
+ );
609
+ CREATE INDEX IF NOT EXISTS idx_boundaries_label ON boundaries(label);
610
+ CREATE INDEX IF NOT EXISTS idx_boundaries_kind ON boundaries(kind);
611
+ CREATE TABLE IF NOT EXISTS boundary_members (
612
+ file_id INTEGER PRIMARY KEY REFERENCES files(id) ON DELETE CASCADE,
613
+ boundary_id INTEGER NOT NULL REFERENCES boundaries(id) ON DELETE CASCADE
614
+ );
615
+ CREATE INDEX IF NOT EXISTS idx_boundary_members_boundary ON boundary_members(boundary_id);
616
+ CREATE TABLE IF NOT EXISTS boundary_edges (
617
+ id INTEGER PRIMARY KEY,
618
+ from_boundary_id INTEGER NOT NULL REFERENCES boundaries(id) ON DELETE CASCADE,
619
+ to_boundary_id INTEGER NOT NULL REFERENCES boundaries(id) ON DELETE CASCADE,
620
+ kind TEXT NOT NULL DEFAULT 'call',
621
+ weight INTEGER NOT NULL DEFAULT 1,
622
+ UNIQUE(from_boundary_id, to_boundary_id, kind)
623
+ );
624
+ CREATE INDEX IF NOT EXISTS idx_boundary_edges_from ON boundary_edges(from_boundary_id);
625
+ CREATE INDEX IF NOT EXISTS idx_boundary_edges_to ON boundary_edges(to_boundary_id);
626
+ CREATE TABLE IF NOT EXISTS symbol_history_continuity (
627
+ id INTEGER PRIMARY KEY,
628
+ symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
629
+ symbol_key TEXT NOT NULL,
630
+ previous_symbol_key TEXT,
631
+ previous_name TEXT,
632
+ previous_file TEXT,
633
+ bridging_sha TEXT,
634
+ confidence REAL NOT NULL DEFAULT 0.0,
635
+ match_reasons TEXT NOT NULL DEFAULT '[]',
636
+ recorded_at INTEGER NOT NULL,
637
+ UNIQUE(symbol_id, previous_symbol_key)
638
+ );
639
+ CREATE INDEX IF NOT EXISTS idx_symbol_history_continuity_symbol ON symbol_history_continuity(symbol_id);
640
+ CREATE INDEX IF NOT EXISTS idx_symbol_history_continuity_prev ON symbol_history_continuity(previous_symbol_key);
641
+ `);
642
+ // v10 — external_bundle_id columns on rows that can come from an external
643
+ // layer. NULL = local row (default).
644
+ this.addColumnIfMissing('routes', 'external_bundle_id', 'INTEGER');
645
+ this.addColumnIfMissing('service_calls', 'external_bundle_id', 'INTEGER');
646
+ this.addColumnIfMissing('service_links', 'external_bundle_id', 'INTEGER');
647
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_routes_external_bundle ON routes(external_bundle_id) WHERE external_bundle_id IS NOT NULL');
648
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_service_calls_external_bundle ON service_calls(external_bundle_id) WHERE external_bundle_id IS NOT NULL');
649
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_service_links_external_bundle ON service_links(external_bundle_id) WHERE external_bundle_id IS NOT NULL');
650
+
651
+ // v4 backfill — required because upsertFileWithCache() short-circuits on
652
+ // unchanged content hash, so a v3 DB upgraded to v4 would never get
653
+ // symbol_key populated (nor FTS rebuilt) for any file whose source hadn't
654
+ // changed. That left seer_history with zero candidates and FTS search
655
+ // returning empty for the entire pre-upgrade corpus until a manual
656
+ // --reset. Both backfills are cheap and idempotent.
657
+ if (isV4Migration) {
658
+ this.backfillSymbolKeysFromExistingRows();
659
+ }
660
+ // FTS rebuild: detect "v4 columns exist but FTS tables are empty while
661
+ // symbols/files have rows". Triggers on the v3→v4 upgrade AND on the rare
662
+ // case where a v4 DB lost its FTS rows (e.g. a manual schema patch). The
663
+ // check is constant-time (COUNT on empty FTS is instant).
664
+ this.rebuildFtsIfStale();
665
+
666
+ this.db.prepare(
667
+ "INSERT INTO _schema_meta (key, value) VALUES ('schema_version', ?) " +
668
+ "ON CONFLICT(key) DO UPDATE SET value = excluded.value",
669
+ ).run(String(CURRENT_SCHEMA_VERSION));
670
+ }
671
+
672
+ /**
673
+ * Populate symbols.symbol_key for every existing row. Mirrors
674
+ * makeSymbolKey() — `kind:qualified_name` (or `kind:name` if qualified is
675
+ * NULL). symbol_history is keyed on these so without the backfill,
676
+ * listSymbolsForHistoryIndex() returns zero candidates after a v3→v4
677
+ * upgrade.
678
+ */
679
+ private backfillSymbolKeysFromExistingRows(): void {
680
+ try {
681
+ this.db.exec(`
682
+ UPDATE symbols
683
+ SET symbol_key = kind || ':' || COALESCE(qualified_name, name)
684
+ WHERE symbol_key IS NULL
685
+ `);
686
+ } catch { /* table may not exist on a brand-new DB; non-fatal */ }
687
+ }
688
+
689
+ /**
690
+ * Rebuild symbols_fts / files_fts from the current symbols / files rows if
691
+ * either FTS table is empty while its source table has rows. This is the
692
+ * only safe trigger condition — Seer never deliberately leaves FTS empty
693
+ * while symbols are populated, so emptiness is a reliable "stale FTS"
694
+ * signal (post-migration or post-manual-patch).
695
+ */
696
+ private rebuildFtsIfStale(): void {
697
+ try {
698
+ const sym = this.db.prepare('SELECT COUNT(*) AS c FROM symbols').get() as Row;
699
+ const symFts = this.db.prepare('SELECT COUNT(*) AS c FROM symbols_fts').get() as Row;
700
+ if (toNum(sym.c) > 0 && toNum(symFts.c) === 0) {
701
+ const ins = this.db.prepare(
702
+ 'INSERT INTO symbols_fts(rowid, name, qualified_name, signature, split) VALUES (?, ?, ?, ?, ?)',
703
+ );
704
+ const rows = this.db.prepare(
705
+ 'SELECT id, name, qualified_name, signature FROM symbols',
706
+ ).all() as Row[];
707
+ this.db.exec('BEGIN');
708
+ try {
709
+ for (const r of rows) {
710
+ const name = toStr(r.name);
711
+ const qual = toStr(r.qualified_name ?? r.name);
712
+ ins.run(
713
+ toNum(r.id), name, qual, toStr(r.signature ?? ''),
714
+ splitIdentifierTokens(`${name} ${qual}`),
715
+ );
716
+ }
717
+ this.db.exec('COMMIT');
718
+ } catch (err) { this.db.exec('ROLLBACK'); throw err; }
719
+ }
720
+ } catch { /* FTS5 unavailable; non-fatal */ }
721
+ try {
722
+ const file = this.db.prepare('SELECT COUNT(*) AS c FROM files').get() as Row;
723
+ const fileFts = this.db.prepare('SELECT COUNT(*) AS c FROM files_fts').get() as Row;
724
+ if (toNum(file.c) > 0 && toNum(fileFts.c) === 0) {
725
+ const ins = this.db.prepare('INSERT INTO files_fts(rowid, rel_path) VALUES (?, ?)');
726
+ const rows = this.db.prepare('SELECT id, rel_path FROM files').all() as Row[];
727
+ this.db.exec('BEGIN');
728
+ try {
729
+ for (const r of rows) {
730
+ ins.run(toNum(r.id), splitIdentifierTokens(toStr(r.rel_path)));
731
+ }
732
+ this.db.exec('COMMIT');
733
+ } catch (err) { this.db.exec('ROLLBACK'); throw err; }
734
+ }
735
+ } catch { /* FTS5 unavailable; non-fatal */ }
736
+ }
737
+
738
+ private hasColumn(table: string, column: string): boolean {
739
+ try {
740
+ const cols = this.db.prepare(`PRAGMA table_info(${table})`).all() as Row[];
741
+ return cols.some(c => toStr(c.name) === column);
742
+ } catch {
743
+ return false;
744
+ }
745
+ }
746
+
747
+ private addColumnIfMissing(table: string, column: string, def: string): void {
748
+ const cols = this.db.prepare(`PRAGMA table_info(${table})`).all() as Row[];
749
+ if (cols.some(c => toStr(c.name) === column)) return;
750
+ this.db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${def}`);
751
+ }
752
+
753
+ private prepare(): void {
754
+ this.stmtUpsertFile = this.db.prepare(`
755
+ INSERT INTO files (path, rel_path, language, hash, lines, indexed_at, role, is_vendor, is_generated)
756
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
757
+ ON CONFLICT(path) DO UPDATE SET
758
+ rel_path = excluded.rel_path,
759
+ language = excluded.language,
760
+ hash = excluded.hash,
761
+ lines = excluded.lines,
762
+ indexed_at = excluded.indexed_at,
763
+ role = excluded.role,
764
+ is_vendor = excluded.is_vendor,
765
+ is_generated = excluded.is_generated
766
+ `);
767
+
768
+ this.stmtInsertSymbol = this.db.prepare(`
769
+ INSERT INTO symbols
770
+ (name, qualified_name, kind, file_id, line_start, line_end, col_start, col_end,
771
+ signature, is_rankable, loc, cyclomatic, cognitive, max_nesting, symbol_key, symbol_role)
772
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
773
+ `);
774
+
775
+ this.stmtInsertEdge = this.db.prepare(`
776
+ INSERT INTO edges (from_id, to_name, kind, line) VALUES (?, ?, ?, ?)
777
+ `);
778
+
779
+ this.stmtInsertFileImport = this.db.prepare(`
780
+ INSERT OR IGNORE INTO file_imports (from_file_id, import_name) VALUES (?, ?)
781
+ `);
782
+
783
+ this.stmtInsertRoute = this.db.prepare(`
784
+ INSERT INTO routes
785
+ (file_id, method, path, framework, handler_name, line,
786
+ protocol, operation, topic, queue, exchange, service, broker, metadata_json)
787
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
788
+ `);
789
+
790
+ this.stmtInsertConfigKey = this.db.prepare(`
791
+ INSERT INTO config_keys (key, source, file_id, symbol_id, line)
792
+ VALUES (?, ?, ?, ?, ?)
793
+ `);
794
+
795
+ this.stmtInsertExternalDep = this.db.prepare(`
796
+ INSERT OR REPLACE INTO external_dependencies
797
+ (ecosystem, name, version_range, manifest_path, is_dev)
798
+ VALUES (?, ?, ?, ?, ?)
799
+ `);
800
+
801
+ this.stmtInsertServiceCall = this.db.prepare(`
802
+ INSERT INTO service_calls
803
+ (file_id, symbol_id, protocol, method, raw_target, normalized_path,
804
+ host_hint, env_key, framework, line, confidence,
805
+ operation, topic, queue, exchange, service, broker, metadata_json)
806
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
807
+ `);
808
+
809
+ this.stmtInsertServiceLink = this.db.prepare(`
810
+ INSERT INTO service_links
811
+ (call_id, route_id, caller_symbol_id, handler_symbol_id,
812
+ protocol, match_kind, confidence, evidence_json)
813
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
814
+ `);
815
+
816
+ this.stmtInsertSymbolsFts = this.db.prepare(
817
+ 'INSERT INTO symbols_fts(rowid, name, qualified_name, signature, split) VALUES (?, ?, ?, ?, ?)',
818
+ );
819
+ this.stmtInsertFilesFts = this.db.prepare(
820
+ 'INSERT INTO files_fts(rowid, rel_path) VALUES (?, ?)',
821
+ );
822
+ this.stmtDeleteSymbolsFtsForFile = this.db.prepare(
823
+ 'DELETE FROM symbols_fts WHERE rowid IN (SELECT id FROM symbols WHERE file_id = ?)',
824
+ );
825
+ this.stmtDeleteFilesFtsForFile = this.db.prepare(
826
+ 'DELETE FROM files_fts WHERE rowid = ?',
827
+ );
828
+ }
829
+
830
+ // ── Write operations ────────────────────────────────────────────────────────
831
+
832
+ pruneFilesNotIn(keepIds: Set<number>): number {
833
+ this.assertWritable();
834
+ // v10 — external bundle phantom files use path 'external' as language so
835
+ // they're never pruned by accident on a cached re-index. The pruner adds
836
+ // them to keepIds before the delete pass so importing an external bundle,
837
+ // then running a regular `seer index`, leaves the external layer intact.
838
+ const externalIds = this.listExternalPhantomFileIds();
839
+ if (keepIds.size === 0 && externalIds.length === 0) {
840
+ const res = this.db.prepare('DELETE FROM files').run();
841
+ // FTS is contentless — wipe in parallel.
842
+ try { this.db.exec('DELETE FROM symbols_fts'); this.db.exec('DELETE FROM files_fts'); } catch { /* */ }
843
+ return toNum(res.changes);
844
+ }
845
+ this.db.exec('BEGIN');
846
+ try {
847
+ this.db.exec('CREATE TEMP TABLE IF NOT EXISTS _keep (id INTEGER PRIMARY KEY)');
848
+ this.db.exec('DELETE FROM _keep');
849
+ const insert = this.db.prepare('INSERT INTO _keep (id) VALUES (?)');
850
+ for (const id of keepIds) insert.run(id);
851
+ for (const id of externalIds) {
852
+ try { insert.run(id); } catch { /* duplicate keep id; ignore */ }
853
+ }
854
+ // Wipe FTS rows for files we're about to delete (pre-delete, before
855
+ // their ids become unrecoverable).
856
+ try {
857
+ this.db.exec(`
858
+ DELETE FROM symbols_fts WHERE rowid IN (
859
+ SELECT s.id FROM symbols s
860
+ JOIN files f ON f.id = s.file_id
861
+ WHERE f.id NOT IN (SELECT id FROM _keep)
862
+ )
863
+ `);
864
+ this.db.exec(`
865
+ DELETE FROM files_fts WHERE rowid IN (
866
+ SELECT id FROM files WHERE id NOT IN (SELECT id FROM _keep)
867
+ )
868
+ `);
869
+ } catch { /* */ }
870
+ const res = this.db.prepare(
871
+ 'DELETE FROM files WHERE id NOT IN (SELECT id FROM _keep)'
872
+ ).run();
873
+ this.db.exec('COMMIT');
874
+ return toNum(res.changes);
875
+ } catch (err) {
876
+ this.db.exec('ROLLBACK');
877
+ throw err;
878
+ }
879
+ }
880
+
881
+ upsertFile(
882
+ path: string,
883
+ relPath: string,
884
+ language: string,
885
+ hash: string,
886
+ lines: number,
887
+ classification: FileClassification = { role: 'project', isVendor: 0, isGenerated: 0 },
888
+ ): number {
889
+ this.assertWritable();
890
+ const existing = this.db.prepare('SELECT id FROM files WHERE path = ?').get(path) as Row | undefined;
891
+ if (existing) {
892
+ const fileId = toNum(existing.id);
893
+ // Wipe FTS rows + dependent table rows for this file
894
+ try { this.stmtDeleteSymbolsFtsForFile.run(fileId); } catch { /* */ }
895
+ this.db.prepare('DELETE FROM symbols WHERE file_id = ?').run(fileId);
896
+ this.db.prepare('DELETE FROM file_imports WHERE from_file_id = ?').run(fileId);
897
+ this.db.prepare('DELETE FROM routes WHERE file_id = ?').run(fileId);
898
+ this.db.prepare('DELETE FROM config_keys WHERE file_id = ?').run(fileId);
899
+ this.db.prepare('DELETE FROM service_calls WHERE file_id = ?').run(fileId);
900
+ try { this.stmtDeleteFilesFtsForFile.run(fileId); } catch { /* */ }
901
+ }
902
+
903
+ const result = this.stmtUpsertFile.run(
904
+ path, relPath, language, hash, lines, Date.now(),
905
+ classification.role, classification.isVendor, classification.isGenerated,
906
+ );
907
+ const fileId = existing ? toNum(existing.id) : toNum(result.lastInsertRowid);
908
+ try { this.stmtInsertFilesFts.run(fileId, splitIdentifierTokens(relPath)); } catch { /* */ }
909
+ return fileId;
910
+ }
911
+
912
+ upsertFileWithCache(
913
+ path: string, relPath: string, language: string, hash: string, lines: number,
914
+ classification: FileClassification = { role: 'project', isVendor: 0, isGenerated: 0 },
915
+ ): { fileId: number; unchanged: boolean } {
916
+ this.assertWritable();
917
+ const existing = this.db
918
+ .prepare('SELECT id, hash, role, is_vendor, is_generated FROM files WHERE path = ?')
919
+ .get(path) as Row | undefined;
920
+
921
+ if (existing && toStr(existing.hash) === hash) {
922
+ const fileId = toNum(existing.id);
923
+ const existingRole = toStr(existing.role);
924
+ const existingVendor = toNum(existing.is_vendor);
925
+ const existingGen = toNum(existing.is_generated);
926
+ if (
927
+ existingRole !== classification.role ||
928
+ existingVendor !== classification.isVendor ||
929
+ existingGen !== classification.isGenerated
930
+ ) {
931
+ this.db.prepare(
932
+ 'UPDATE files SET indexed_at = ?, role = ?, is_vendor = ?, is_generated = ? WHERE id = ?',
933
+ ).run(
934
+ Date.now(),
935
+ classification.role, classification.isVendor, classification.isGenerated,
936
+ fileId,
937
+ );
938
+ } else {
939
+ this.db.prepare('UPDATE files SET indexed_at = ? WHERE id = ?')
940
+ .run(Date.now(), fileId);
941
+ }
942
+ return { fileId, unchanged: true };
943
+ }
944
+
945
+ if (existing) {
946
+ const fileId = toNum(existing.id);
947
+ try { this.stmtDeleteSymbolsFtsForFile.run(fileId); } catch { /* */ }
948
+ this.db.prepare('DELETE FROM symbols WHERE file_id = ?').run(fileId);
949
+ this.db.prepare('DELETE FROM file_imports WHERE from_file_id = ?').run(fileId);
950
+ this.db.prepare('DELETE FROM routes WHERE file_id = ?').run(fileId);
951
+ this.db.prepare('DELETE FROM config_keys WHERE file_id = ?').run(fileId);
952
+ this.db.prepare('DELETE FROM service_calls WHERE file_id = ?').run(fileId);
953
+ try { this.stmtDeleteFilesFtsForFile.run(fileId); } catch { /* */ }
954
+ }
955
+
956
+ const result = this.stmtUpsertFile.run(
957
+ path, relPath, language, hash, lines, Date.now(),
958
+ classification.role, classification.isVendor, classification.isGenerated,
959
+ );
960
+ const fileId = existing ? toNum(existing.id) : toNum(result.lastInsertRowid);
961
+ try { this.stmtInsertFilesFts.run(fileId, splitIdentifierTokens(relPath)); } catch { /* */ }
962
+ return { fileId, unchanged: false };
963
+ }
964
+
965
+ insertSymbol(fileId: number, def: SymbolDef): number {
966
+ this.assertWritable();
967
+ const sig = def.signature ? def.signature.slice(0, 240) : null;
968
+ const qualified = def.qualifiedName ?? def.name;
969
+ const symbolRole: SymbolRole = def.symbolRole ?? 'definition';
970
+ // Declarations are not call targets in the same canonical sense as
971
+ // definitions, so they're excluded from PageRank just like type rows.
972
+ // The kind-based rankability still applies — a class declaration would
973
+ // already be non-rankable from the kind check; this is the belt-and-
974
+ // suspenders guard for the rarer "method declaration" case.
975
+ const rankable = (symbolRole === 'definition' && isRankableKind(def.kind)) ? 1 : 0;
976
+ const symbolKey = makeSymbolKey(def.kind, qualified);
977
+ const result = this.stmtInsertSymbol.run(
978
+ def.name, qualified, def.kind, fileId,
979
+ def.lineStart, def.lineEnd,
980
+ def.colStart, def.colEnd,
981
+ sig,
982
+ rankable,
983
+ def.loc ?? null,
984
+ def.cyclomatic ?? null,
985
+ def.cognitive ?? null,
986
+ def.maxNesting ?? null,
987
+ symbolKey,
988
+ symbolRole,
989
+ );
990
+ const symbolId = toNum(result.lastInsertRowid);
991
+ try {
992
+ this.stmtInsertSymbolsFts.run(
993
+ symbolId,
994
+ def.name,
995
+ qualified,
996
+ sig ?? '',
997
+ splitIdentifierTokens(`${def.name} ${qualified}`),
998
+ );
999
+ } catch { /* FTS5 unavailable; non-fatal */ }
1000
+ return symbolId;
1001
+ }
1002
+
1003
+ insertEdge(fromSymbolId: number, toName: string, kind: string, line: number): void {
1004
+ this.assertWritable();
1005
+ this.stmtInsertEdge.run(fromSymbolId, toName, kind, line);
1006
+ }
1007
+
1008
+ insertFileImport(fromFileId: number, importName: string): void {
1009
+ this.assertWritable();
1010
+ this.stmtInsertFileImport.run(fromFileId, importName);
1011
+ }
1012
+
1013
+ insertRoute(
1014
+ fileId: number, method: string, routePath: string, framework: string,
1015
+ handlerName: string | null, line: number,
1016
+ options: {
1017
+ protocol?: string;
1018
+ operation?: string | null;
1019
+ topic?: string | null;
1020
+ queue?: string | null;
1021
+ exchange?: string | null;
1022
+ service?: string | null;
1023
+ broker?: string | null;
1024
+ metadataJson?: string | null;
1025
+ } = {},
1026
+ ): void {
1027
+ this.assertWritable();
1028
+ this.stmtInsertRoute.run(
1029
+ fileId, method, routePath, framework, handlerName, line,
1030
+ options.protocol ?? 'http',
1031
+ options.operation ?? null,
1032
+ options.topic ?? null,
1033
+ options.queue ?? null,
1034
+ options.exchange ?? null,
1035
+ options.service ?? null,
1036
+ options.broker ?? null,
1037
+ options.metadataJson ?? null,
1038
+ );
1039
+ }
1040
+
1041
+ insertConfigKey(
1042
+ key: string, source: string, fileId: number,
1043
+ symbolId: number | null, line: number,
1044
+ ): void {
1045
+ this.assertWritable();
1046
+ this.stmtInsertConfigKey.run(key, source, fileId, symbolId, line);
1047
+ }
1048
+
1049
+ /**
1050
+ * v8 Track G — return a closure that inserts service_link rows. Used by
1051
+ * the resolver in `resolveServiceLinks(store)` so it can stream inserts
1052
+ * inside one prepared statement rather than re-resolving the statement
1053
+ * per row.
1054
+ */
1055
+ makeServiceLinkInserter(): (args: {
1056
+ callId: number;
1057
+ routeId: number | null;
1058
+ callerSymbolId: number | null;
1059
+ handlerSymbolId: number | null;
1060
+ protocol: string;
1061
+ matchKind: string;
1062
+ confidence: number;
1063
+ evidenceJson: string;
1064
+ }) => void {
1065
+ this.assertWritable();
1066
+ const stmt = this.stmtInsertServiceLink;
1067
+ return (a) => {
1068
+ stmt.run(
1069
+ a.callId, a.routeId, a.callerSymbolId, a.handlerSymbolId,
1070
+ a.protocol, a.matchKind, a.confidence, a.evidenceJson,
1071
+ );
1072
+ };
1073
+ }
1074
+
1075
+ /**
1076
+ * v8 Track G — insert a service-call row (outbound HTTP/etc. client call).
1077
+ * The post-index resolver derives service_links from these and from routes.
1078
+ * Returns the new row id so callers can attach evidence in the same batch.
1079
+ */
1080
+ insertServiceCall(args: {
1081
+ fileId: number;
1082
+ symbolId: number | null;
1083
+ protocol: string;
1084
+ method: string | null;
1085
+ rawTarget: string;
1086
+ normalizedPath: string | null;
1087
+ hostHint: string | null;
1088
+ envKey: string | null;
1089
+ framework: string;
1090
+ line: number;
1091
+ confidence: number;
1092
+ // v9 Track-H protocol expansion. All optional; protocol-specific extractors
1093
+ // fill the fields that apply to their protocol and leave the rest NULL.
1094
+ operation?: string | null;
1095
+ topic?: string | null;
1096
+ queue?: string | null;
1097
+ exchange?: string | null;
1098
+ service?: string | null;
1099
+ broker?: string | null;
1100
+ metadataJson?: string | null;
1101
+ }): number {
1102
+ this.assertWritable();
1103
+ const r = this.stmtInsertServiceCall.run(
1104
+ args.fileId,
1105
+ args.symbolId,
1106
+ args.protocol,
1107
+ args.method,
1108
+ args.rawTarget.slice(0, 240),
1109
+ args.normalizedPath,
1110
+ args.hostHint,
1111
+ args.envKey,
1112
+ args.framework,
1113
+ args.line,
1114
+ args.confidence,
1115
+ args.operation ?? null,
1116
+ args.topic ?? null,
1117
+ args.queue ?? null,
1118
+ args.exchange ?? null,
1119
+ args.service ?? null,
1120
+ args.broker ?? null,
1121
+ args.metadataJson ?? null,
1122
+ );
1123
+ return toNum(r.lastInsertRowid);
1124
+ }
1125
+
1126
+ insertExternalDep(
1127
+ ecosystem: string, name: string, versionRange: string | null,
1128
+ manifestPath: string, isDev: 0 | 1,
1129
+ ): void {
1130
+ this.assertWritable();
1131
+ this.stmtInsertExternalDep.run(ecosystem, name, versionRange, manifestPath, isDev);
1132
+ }
1133
+
1134
+ clearExternalDeps(): void {
1135
+ this.assertWritable();
1136
+ this.db.exec('DELETE FROM external_dependencies');
1137
+ }
1138
+
1139
+ // ── Import resolution ───────────────────────────────────────────────────────
1140
+
1141
+ resolveImports(): number {
1142
+ const files = this.db.prepare('SELECT id, path, language FROM files').all() as Row[];
1143
+ if (files.length === 0) return 0;
1144
+
1145
+ const fileByPath = new Map<string, number>();
1146
+ for (const f of files) {
1147
+ fileByPath.set(normalizePath(toStr(f.path)), toNum(f.id));
1148
+ }
1149
+
1150
+ const imports = this.db.prepare(`
1151
+ SELECT fi.id, fi.from_file_id, fi.import_name, f.path AS from_path, f.language
1152
+ FROM file_imports fi
1153
+ JOIN files f ON f.id = fi.from_file_id
1154
+ WHERE fi.resolved_file_id IS NULL
1155
+ `).all() as Row[];
1156
+
1157
+ const updateStmt = this.db.prepare(
1158
+ 'UPDATE file_imports SET resolved_file_id = ? WHERE id = ?',
1159
+ );
1160
+
1161
+ let resolved = 0;
1162
+ this.db.exec('BEGIN');
1163
+ try {
1164
+ for (const imp of imports) {
1165
+ const fromPath = toStr(imp.from_path);
1166
+ const language = toStr(imp.language);
1167
+ const importName = toStr(imp.import_name);
1168
+ const targetId = resolveImportToFileId(fromPath, language, importName, fileByPath);
1169
+ if (targetId !== null) {
1170
+ updateStmt.run(targetId, toNum(imp.id));
1171
+ resolved++;
1172
+ }
1173
+ }
1174
+ this.db.exec('COMMIT');
1175
+ } catch (err) {
1176
+ this.db.exec('ROLLBACK');
1177
+ throw err;
1178
+ }
1179
+ return resolved;
1180
+ }
1181
+
1182
+ // ── Edge resolution (scope-aware) ───────────────────────────────────────────
1183
+
1184
+ resolveEdges(): EdgeResolutionStats {
1185
+ const countUnresolved = (): number =>
1186
+ toNum((this.db.prepare(
1187
+ 'SELECT COUNT(*) AS c FROM edges WHERE to_id IS NULL'
1188
+ ).get() as Row).c);
1189
+
1190
+ const before0 = countUnresolved();
1191
+
1192
+ this.db.prepare(`
1193
+ UPDATE edges
1194
+ SET to_id = (
1195
+ SELECT t.id
1196
+ FROM symbols t, symbols s
1197
+ WHERE s.id = edges.from_id
1198
+ AND t.name = edges.to_name
1199
+ AND t.file_id = s.file_id
1200
+ LIMIT 1
1201
+ )
1202
+ WHERE to_id IS NULL
1203
+ AND EXISTS (
1204
+ SELECT 1
1205
+ FROM symbols t, symbols s
1206
+ WHERE s.id = edges.from_id
1207
+ AND t.name = edges.to_name
1208
+ AND t.file_id = s.file_id
1209
+ );
1210
+ `).run();
1211
+
1212
+ const after1 = countUnresolved();
1213
+ const sameFile = before0 - after1;
1214
+
1215
+ this.db.prepare(`
1216
+ UPDATE edges
1217
+ SET to_id = (
1218
+ SELECT t.id
1219
+ FROM symbols t
1220
+ JOIN file_imports fi ON fi.resolved_file_id = t.file_id
1221
+ JOIN symbols s ON s.id = edges.from_id
1222
+ WHERE fi.from_file_id = s.file_id
1223
+ AND t.name = edges.to_name
1224
+ LIMIT 1
1225
+ )
1226
+ WHERE to_id IS NULL
1227
+ AND EXISTS (
1228
+ SELECT 1
1229
+ FROM symbols t
1230
+ JOIN file_imports fi ON fi.resolved_file_id = t.file_id
1231
+ JOIN symbols s ON s.id = edges.from_id
1232
+ WHERE fi.from_file_id = s.file_id
1233
+ AND t.name = edges.to_name
1234
+ );
1235
+ `).run();
1236
+
1237
+ const after2 = countUnresolved();
1238
+ const imported = after1 - after2;
1239
+
1240
+ this.db.prepare(`
1241
+ UPDATE edges
1242
+ SET to_id = (
1243
+ SELECT id FROM symbols WHERE name = edges.to_name LIMIT 1
1244
+ )
1245
+ WHERE to_id IS NULL
1246
+ AND EXISTS (SELECT 1 FROM symbols WHERE name = edges.to_name);
1247
+ `).run();
1248
+
1249
+ const after3 = countUnresolved();
1250
+ const global = after2 - after3;
1251
+
1252
+ return {
1253
+ sameFile,
1254
+ imported,
1255
+ global,
1256
+ total: sameFile + imported + global,
1257
+ };
1258
+ }
1259
+
1260
+ /**
1261
+ * After symbol IDs are known, link routes.handler_id by name. Routes that
1262
+ * named a handler not defined in the same file stay with handler_id NULL.
1263
+ * Matching is by `handler_name = symbols.name` AND `file_id = routes.file_id`
1264
+ * — handlers nearly always live in the same file as the route registration.
1265
+ */
1266
+ resolveRouteHandlers(): number {
1267
+ const res = this.db.prepare(`
1268
+ UPDATE routes
1269
+ SET handler_id = (
1270
+ SELECT s.id FROM symbols s
1271
+ WHERE s.file_id = routes.file_id
1272
+ AND s.name = routes.handler_name
1273
+ LIMIT 1
1274
+ )
1275
+ WHERE handler_id IS NULL
1276
+ AND handler_name IS NOT NULL
1277
+ `).run();
1278
+ return toNum(res.changes);
1279
+ }
1280
+
1281
+ /**
1282
+ * Backfill config_keys.symbol_id by line span. The extractor doesn't always
1283
+ * know the enclosing symbol id (extraction precedes symbol insertion), so we
1284
+ * resolve it via "the smallest function/method containing this line."
1285
+ */
1286
+ resolveConfigKeySymbols(): number {
1287
+ const res = this.db.prepare(`
1288
+ UPDATE config_keys
1289
+ SET symbol_id = (
1290
+ SELECT s.id FROM symbols s
1291
+ WHERE s.file_id = config_keys.file_id
1292
+ AND s.line_start <= config_keys.line
1293
+ AND s.line_end >= config_keys.line
1294
+ AND s.kind IN ('function','method','constructor')
1295
+ ORDER BY (s.line_end - s.line_start) ASC
1296
+ LIMIT 1
1297
+ )
1298
+ WHERE symbol_id IS NULL
1299
+ `).run();
1300
+ return toNum(res.changes);
1301
+ }
1302
+
1303
+ // ── Test-edge synthesis ─────────────────────────────────────────────────────
1304
+
1305
+ /**
1306
+ * Promote calls from a test-file symbol to a non-test target into 'tests'
1307
+ * edges (in addition to keeping the original 'call' edge). The original
1308
+ * call edge is left in place so caller/callee queries don't double-count;
1309
+ * test edges live in their own kind so `seer_behavior` can pull them
1310
+ * directly without scanning the full edge table.
1311
+ *
1312
+ * The synthesized edge copies the SOURCE 'call' edge's `to_id` verbatim —
1313
+ * the call-edge resolution pass already did the same-file / imported /
1314
+ * global fallback work to pick the correct target. Re-resolving by name
1315
+ * via `WHERE name = edges.to_name LIMIT 1` (the old behavior) was buggy
1316
+ * when two symbols shared the same short name (`Alpha.run` / `Beta.run`):
1317
+ * `LIMIT 1` would attribute every test edge to whichever id sorted first,
1318
+ * so `seer_behavior(Beta.run)` returned tests that actually exercised
1319
+ * `Alpha.run`. Preserving the source `to_id` matches what the original
1320
+ * resolver already chose.
1321
+ *
1322
+ * Returns the number of new test edges inserted.
1323
+ */
1324
+ synthesizeTestEdges(): number {
1325
+ // Find call edges from a test file to a non-test target whose 'tests'
1326
+ // counterpart doesn't yet exist.
1327
+ const rows = this.db.prepare(`
1328
+ SELECT e.from_id, e.to_id, e.to_name, e.line
1329
+ FROM edges e
1330
+ JOIN symbols s ON s.id = e.from_id
1331
+ JOIN files fs ON fs.id = s.file_id
1332
+ JOIN symbols t ON t.id = e.to_id
1333
+ JOIN files ft ON ft.id = t.file_id
1334
+ WHERE e.kind = 'call'
1335
+ AND fs.role = 'test'
1336
+ AND ft.role <> 'test'
1337
+ AND NOT EXISTS (
1338
+ SELECT 1 FROM edges e2
1339
+ WHERE e2.from_id = e.from_id
1340
+ AND e2.to_id = e.to_id
1341
+ AND e2.kind = 'tests'
1342
+ )
1343
+ `).all() as Row[];
1344
+
1345
+ if (rows.length === 0) return 0;
1346
+ // Insert with to_id set explicitly from the source edge — no LIMIT 1
1347
+ // name re-resolution that would collapse same-short-name symbols.
1348
+ const insert = this.db.prepare(
1349
+ "INSERT INTO edges (from_id, to_name, to_id, kind, line) VALUES (?, ?, ?, 'tests', ?)",
1350
+ );
1351
+ this.db.exec('BEGIN');
1352
+ try {
1353
+ for (const r of rows) {
1354
+ insert.run(toNum(r.from_id), toStr(r.to_name), toNum(r.to_id), toNum(r.line));
1355
+ }
1356
+ this.db.exec('COMMIT');
1357
+ } catch (err) {
1358
+ this.db.exec('ROLLBACK');
1359
+ throw err;
1360
+ }
1361
+ return rows.length;
1362
+ }
1363
+
1364
+ // ── Read operations ─────────────────────────────────────────────────────────
1365
+
1366
+ findCallers(symbolName: string, limit?: number): CallerRow[] {
1367
+ const hasLimit = typeof limit === 'number' && limit > 0;
1368
+ const sql = hasLimit
1369
+ ? `
1370
+ SELECT
1371
+ s.name AS callerName,
1372
+ s.qualified_name AS callerQualifiedName,
1373
+ s.kind AS callerKind,
1374
+ f.path AS callerFile,
1375
+ e.line AS callerLine,
1376
+ e.kind AS edgeKind
1377
+ FROM edges e
1378
+ JOIN symbols s ON s.id = e.from_id
1379
+ JOIN files f ON f.id = s.file_id
1380
+ WHERE e.to_name = ? AND e.kind = 'call'
1381
+ LIMIT ?
1382
+ `
1383
+ : `
1384
+ SELECT
1385
+ s.name AS callerName,
1386
+ s.qualified_name AS callerQualifiedName,
1387
+ s.kind AS callerKind,
1388
+ f.path AS callerFile,
1389
+ e.line AS callerLine,
1390
+ e.kind AS edgeKind
1391
+ FROM edges e
1392
+ JOIN symbols s ON s.id = e.from_id
1393
+ JOIN files f ON f.id = s.file_id
1394
+ WHERE e.to_name = ? AND e.kind = 'call'
1395
+ ORDER BY f.path, e.line
1396
+ `;
1397
+ const stmt = this.db.prepare(sql);
1398
+ const rows = (hasLimit
1399
+ ? stmt.all(symbolName, limit)
1400
+ : stmt.all(symbolName)) as Row[];
1401
+
1402
+ const out = rows.map(r => ({
1403
+ callerName: toStr(r.callerName),
1404
+ callerQualifiedName: toNullStr(r.callerQualifiedName),
1405
+ callerKind: toStr(r.callerKind),
1406
+ callerFile: toStr(r.callerFile),
1407
+ callerLine: toNum(r.callerLine),
1408
+ edgeKind: toStr(r.edgeKind),
1409
+ }));
1410
+
1411
+ if (hasLimit) {
1412
+ out.sort((a, b) =>
1413
+ a.callerFile < b.callerFile ? -1 :
1414
+ a.callerFile > b.callerFile ? 1 :
1415
+ a.callerLine - b.callerLine,
1416
+ );
1417
+ }
1418
+ return out;
1419
+ }
1420
+
1421
+ countCallers(symbolName: string): number {
1422
+ const row = this.db.prepare(
1423
+ "SELECT COUNT(*) AS c FROM edges WHERE to_name = ? AND kind = 'call'",
1424
+ ).get(symbolName) as Row;
1425
+ return toNum(row.c);
1426
+ }
1427
+
1428
+ /**
1429
+ * Callers of a specific symbol id — never collapses short-name siblings.
1430
+ * Track E + any tool that already has a resolved symbol id should use
1431
+ * this instead of `findCallers(name)`. Edges whose `to_id` is NULL
1432
+ * (unresolved) are intentionally skipped: with no resolved id we can't
1433
+ * tell whether they target THIS specific symbol vs. a same-short-name
1434
+ * sibling, and Track E callers want id-specificity.
1435
+ */
1436
+ findCallersById(symbolId: number, limit?: number): CallerRow[] {
1437
+ const hasLimit = typeof limit === 'number' && limit > 0;
1438
+ const sql = hasLimit
1439
+ ? `
1440
+ SELECT
1441
+ s.name AS callerName,
1442
+ s.qualified_name AS callerQualifiedName,
1443
+ s.kind AS callerKind,
1444
+ f.path AS callerFile,
1445
+ e.line AS callerLine,
1446
+ e.kind AS edgeKind
1447
+ FROM edges e
1448
+ JOIN symbols s ON s.id = e.from_id
1449
+ JOIN files f ON f.id = s.file_id
1450
+ WHERE e.to_id = ? AND e.kind = 'call'
1451
+ LIMIT ?
1452
+ `
1453
+ : `
1454
+ SELECT
1455
+ s.name AS callerName,
1456
+ s.qualified_name AS callerQualifiedName,
1457
+ s.kind AS callerKind,
1458
+ f.path AS callerFile,
1459
+ e.line AS callerLine,
1460
+ e.kind AS edgeKind
1461
+ FROM edges e
1462
+ JOIN symbols s ON s.id = e.from_id
1463
+ JOIN files f ON f.id = s.file_id
1464
+ WHERE e.to_id = ? AND e.kind = 'call'
1465
+ ORDER BY f.path, e.line
1466
+ `;
1467
+ const stmt = this.db.prepare(sql);
1468
+ const rows = (hasLimit ? stmt.all(symbolId, limit) : stmt.all(symbolId)) as Row[];
1469
+ const out = rows.map(r => ({
1470
+ callerName: toStr(r.callerName),
1471
+ callerQualifiedName: toNullStr(r.callerQualifiedName),
1472
+ callerKind: toStr(r.callerKind),
1473
+ callerFile: toStr(r.callerFile),
1474
+ callerLine: toNum(r.callerLine),
1475
+ edgeKind: toStr(r.edgeKind),
1476
+ }));
1477
+ if (hasLimit) {
1478
+ out.sort((a, b) =>
1479
+ a.callerFile < b.callerFile ? -1 :
1480
+ a.callerFile > b.callerFile ? 1 :
1481
+ a.callerLine - b.callerLine,
1482
+ );
1483
+ }
1484
+ return out;
1485
+ }
1486
+
1487
+ /** Count of callers for a specific symbol id (id-scoped). */
1488
+ countCallersById(symbolId: number): number {
1489
+ const row = this.db.prepare(
1490
+ "SELECT COUNT(*) AS c FROM edges WHERE to_id = ? AND kind = 'call'",
1491
+ ).get(symbolId) as Row;
1492
+ return toNum(row.c);
1493
+ }
1494
+
1495
+ /**
1496
+ * Callees emitted by a specific caller symbol id — never collapses
1497
+ * short-name siblings the way `findCallees(name)` does. Returns one row
1498
+ * per call edge.
1499
+ */
1500
+ findCalleesById(symbolId: number): CalleeRow[] {
1501
+ const rows = this.db.prepare(`
1502
+ SELECT
1503
+ e.to_name AS calleeName,
1504
+ s2.kind AS calleeKind,
1505
+ f2.path AS calleeFile,
1506
+ s2.line_start AS calleeLineStart,
1507
+ e.kind AS edgeKind
1508
+ FROM edges e
1509
+ LEFT JOIN symbols s2 ON s2.id = e.to_id
1510
+ LEFT JOIN files f2 ON f2.id = s2.file_id
1511
+ WHERE e.from_id = ? AND e.kind = 'call'
1512
+ ORDER BY e.line
1513
+ `).all(symbolId) as Row[];
1514
+ return rows.map(r => ({
1515
+ calleeName: toStr(r.calleeName),
1516
+ calleeKind: toNullStr(r.calleeKind),
1517
+ calleeFile: toNullStr(r.calleeFile),
1518
+ calleeLineStart: toNullNum(r.calleeLineStart),
1519
+ edgeKind: toStr(r.edgeKind),
1520
+ }));
1521
+ }
1522
+
1523
+ findCallees(symbolName: string): CalleeRow[] {
1524
+ const rows = this.db.prepare(`
1525
+ SELECT
1526
+ e.to_name AS calleeName,
1527
+ s2.kind AS calleeKind,
1528
+ f2.path AS calleeFile,
1529
+ s2.line_start AS calleeLineStart,
1530
+ e.kind AS edgeKind
1531
+ FROM edges e
1532
+ JOIN symbols s ON s.id = e.from_id
1533
+ LEFT JOIN symbols s2 ON s2.id = e.to_id
1534
+ LEFT JOIN files f2 ON f2.id = s2.file_id
1535
+ WHERE s.name = ? AND e.kind = 'call'
1536
+ ORDER BY e.line
1537
+ `).all(symbolName) as Row[];
1538
+
1539
+ return rows.map(r => ({
1540
+ calleeName: toStr(r.calleeName),
1541
+ calleeKind: toNullStr(r.calleeKind),
1542
+ calleeFile: toNullStr(r.calleeFile),
1543
+ calleeLineStart: toNullNum(r.calleeLineStart),
1544
+ edgeKind: toStr(r.edgeKind),
1545
+ }));
1546
+ }
1547
+
1548
+ /**
1549
+ * Build the predicate suffix shared by findSymbols / getDefinition /
1550
+ * getTopSymbols / countSymbols. Returns the `AND …` string that augments a
1551
+ * WHERE clause; never starts the WHERE itself so callers control the rest.
1552
+ */
1553
+ private filterClauseFromOptions(opts: SymbolSearchOptions): string {
1554
+ const f = resolveSearchFlags(opts);
1555
+ return buildRoleFilter('f.', f.includeVendor, f.includeGenerated, this.hasRoleColumns, {
1556
+ symbolPrefix: 's.',
1557
+ includeTests: f.includeTests,
1558
+ includeDeclarations: f.includeDeclarations,
1559
+ includeTypeRefs: f.includeTypeRefs,
1560
+ hasSymbolRoleColumn: this.hasSymbolRoleColumn,
1561
+ });
1562
+ }
1563
+
1564
+ findSymbols(name: string, options: SymbolSearchOptions = {}): SymbolRow[] {
1565
+ const limit = Math.max(1, options.limit ?? 50);
1566
+ const filter = this.filterClauseFromOptions(options);
1567
+ const rows = this.db.prepare(`
1568
+ SELECT ${symbolSelectCols(this.hasComplexityColumns, this.hasSymbolRoleColumn)}
1569
+ FROM symbols s JOIN files f ON f.id = s.file_id
1570
+ WHERE (s.name LIKE ? OR s.qualified_name LIKE ?)
1571
+ ${filter}
1572
+ ORDER BY s.pagerank DESC
1573
+ LIMIT ?
1574
+ `).all(`%${name}%`, `%${name}%`, limit) as Row[];
1575
+
1576
+ return rows.map(toSymbolRow);
1577
+ }
1578
+
1579
+ /**
1580
+ * FTS5 search across symbol name / qualified_name / signature / split form.
1581
+ * Falls back to `findSymbols` (LIKE) when FTS5 isn't available or returns
1582
+ * nothing. Returns BM25-ranked results.
1583
+ */
1584
+ searchSymbolsFts(query: string, options: SymbolSearchOptions = {}): SymbolRow[] {
1585
+ const limit = Math.max(1, options.limit ?? 50);
1586
+ if (!this.hasV4Tables) return this.findSymbols(query, options);
1587
+ const matchExpr = ftsQuery(query);
1588
+ if (!matchExpr) return this.findSymbols(query, options);
1589
+ const filter = this.filterClauseFromOptions(options);
1590
+ try {
1591
+ const rows = this.db.prepare(`
1592
+ SELECT ${symbolSelectCols(this.hasComplexityColumns, this.hasSymbolRoleColumn)},
1593
+ bm25(symbols_fts) AS rank
1594
+ FROM symbols_fts
1595
+ JOIN symbols s ON s.id = symbols_fts.rowid
1596
+ JOIN files f ON f.id = s.file_id
1597
+ WHERE symbols_fts MATCH ?
1598
+ ${filter}
1599
+ ORDER BY rank, s.pagerank DESC
1600
+ LIMIT ?
1601
+ `).all(matchExpr, limit) as Row[];
1602
+ if (rows.length > 0) return rows.map(toSymbolRow);
1603
+ } catch { /* fall through */ }
1604
+ return this.findSymbols(query, options);
1605
+ }
1606
+
1607
+ /**
1608
+ * FTS5 search over file paths. Returns matching files ranked by BM25.
1609
+ */
1610
+ searchFilesFts(query: string, limit = 30, options: { includeTests?: boolean; includeVendor?: boolean; includeGenerated?: boolean } = {}): Array<{ id: number; path: string; relPath: string; language: string; role: string }> {
1611
+ if (!this.hasV4Tables) return [];
1612
+ const matchExpr = ftsQuery(query);
1613
+ if (!matchExpr) return [];
1614
+ const includeTests = options.includeTests ?? false;
1615
+ const includeVendor = options.includeVendor ?? false;
1616
+ const includeGenerated = options.includeGenerated ?? false;
1617
+ try {
1618
+ const rows = this.db.prepare(`
1619
+ SELECT f.id, f.path, f.rel_path AS relPath, f.language, f.role
1620
+ FROM files_fts
1621
+ JOIN files f ON f.id = files_fts.rowid
1622
+ WHERE files_fts MATCH ?
1623
+ ORDER BY bm25(files_fts)
1624
+ LIMIT ?
1625
+ `).all(matchExpr, limit * 2) as Row[];
1626
+ return rows
1627
+ .map(r => ({
1628
+ id: toNum(r.id),
1629
+ path: toStr(r.path),
1630
+ relPath: toStr(r.relPath),
1631
+ language: toStr(r.language),
1632
+ role: toStr(r.role),
1633
+ }))
1634
+ .filter(f =>
1635
+ (includeVendor || f.role !== 'vendor') &&
1636
+ (includeGenerated || f.role !== 'generated') &&
1637
+ (includeTests || f.role !== 'test'),
1638
+ )
1639
+ .slice(0, limit);
1640
+ } catch { return []; }
1641
+ }
1642
+
1643
+ listSymbolsInFile(filePath: string, limit = 200): SymbolRow[] {
1644
+ const rows = this.db.prepare(`
1645
+ SELECT ${symbolSelectCols(this.hasComplexityColumns, this.hasSymbolRoleColumn)}
1646
+ FROM symbols s JOIN files f ON f.id = s.file_id
1647
+ WHERE f.path = ? OR f.rel_path = ?
1648
+ ORDER BY s.line_start
1649
+ LIMIT ?
1650
+ `).all(filePath, filePath, limit) as Row[];
1651
+
1652
+ return rows.map(toSymbolRow);
1653
+ }
1654
+
1655
+ getTopSymbols(limit = 20, options: SymbolSearchOptions = {}): SymbolRow[] {
1656
+ const filter = this.filterClauseFromOptions(options);
1657
+ const where = filter ? `WHERE ${filter.replace(/^AND\s+/, '')}` : '';
1658
+ const rows = this.db.prepare(`
1659
+ SELECT ${symbolSelectCols(this.hasComplexityColumns, this.hasSymbolRoleColumn)}
1660
+ FROM symbols s JOIN files f ON f.id = s.file_id
1661
+ ${where}
1662
+ ORDER BY s.pagerank DESC
1663
+ LIMIT ?
1664
+ `).all(limit) as Row[];
1665
+
1666
+ return rows.map(toSymbolRow);
1667
+ }
1668
+
1669
+ getDefinition(name: string, options: { filePath?: string } & SymbolSearchOptions = {}): SymbolRow[] {
1670
+ const filter = this.filterClauseFromOptions(options);
1671
+ // File disambiguation accepts an absolute path, the exact rel_path, OR a
1672
+ // trailing path fragment on a segment boundary (`weird.c` matches
1673
+ // `src/weird.c`; `auth/service.ts` matches `packages/api/auth/service.ts`).
1674
+ // Without this an agent had to know the full rel_path or the filter
1675
+ // silently returned nothing — a wasted round-trip. Matching stays
1676
+ // deterministic: the fragment must align to a `/` boundary (so `auth.ts`
1677
+ // never matches `oauth.ts`), and LIKE metacharacters are escaped so a `_`
1678
+ // in a filename can't act as a wildcard.
1679
+ const fp = options.filePath;
1680
+ let fileClause = '';
1681
+ let fileArgs: string[] = [];
1682
+ if (fp) {
1683
+ const norm = fp.replace(/\\/g, '/').replace(/^\.\//, '').replace(/\/+$/,'');
1684
+ const suffix = '%/' + escapeLike(norm);
1685
+ fileClause = 'AND (f.path = ? OR f.rel_path = ? OR f.rel_path LIKE ? ESCAPE \'\\\')';
1686
+ fileArgs = [fp, norm, suffix];
1687
+ }
1688
+ const stmt = this.db.prepare(`
1689
+ SELECT ${symbolSelectCols(this.hasComplexityColumns, this.hasSymbolRoleColumn)}
1690
+ FROM symbols s JOIN files f ON f.id = s.file_id
1691
+ WHERE (s.name = ? OR s.qualified_name = ?)
1692
+ ${filter}
1693
+ ${fileClause}
1694
+ ORDER BY s.pagerank DESC
1695
+ LIMIT 50
1696
+ `);
1697
+ const rows = stmt.all(name, name, ...fileArgs) as Row[];
1698
+
1699
+ return rows.map(toSymbolRow);
1700
+ }
1701
+
1702
+ getSymbolById(id: number): SymbolRow | null {
1703
+ const row = this.db.prepare(`
1704
+ SELECT ${symbolSelectCols(this.hasComplexityColumns, this.hasSymbolRoleColumn)}
1705
+ FROM symbols s JOIN files f ON f.id = s.file_id
1706
+ WHERE s.id = ?
1707
+ `).get(id) as Row | undefined;
1708
+ return row ? toSymbolRow(row) : null;
1709
+ }
1710
+
1711
+ countSymbols(name: string, options: SymbolSearchOptions = {}): number {
1712
+ const filter = this.filterClauseFromOptions(options);
1713
+ const row = this.db.prepare(`
1714
+ SELECT COUNT(*) AS c
1715
+ FROM symbols s JOIN files f ON f.id = s.file_id
1716
+ WHERE (s.name LIKE ? OR s.qualified_name LIKE ?) ${filter}
1717
+ `).get(`%${name}%`, `%${name}%`) as Row;
1718
+ return toNum(row.c);
1719
+ }
1720
+
1721
+ listFiles(): Array<{
1722
+ id: number; path: string; relPath: string; language: string; hash: string;
1723
+ indexedAt: number; role: string; isVendor: number; isGenerated: number;
1724
+ }> {
1725
+ const rows = this.db.prepare(`
1726
+ SELECT id, path, rel_path AS relPath, language, hash, indexed_at AS indexedAt,
1727
+ role, is_vendor AS isVendor, is_generated AS isGenerated
1728
+ FROM files
1729
+ `).all() as Row[];
1730
+ return rows.map(r => ({
1731
+ id: toNum(r.id),
1732
+ path: toStr(r.path),
1733
+ relPath: toStr(r.relPath),
1734
+ language: toStr(r.language),
1735
+ hash: toStr(r.hash),
1736
+ indexedAt: toNum(r.indexedAt),
1737
+ role: toStr(r.role),
1738
+ isVendor: toNum(r.isVendor),
1739
+ isGenerated: toNum(r.isGenerated),
1740
+ }));
1741
+ }
1742
+
1743
+ getRoleCounts(): { project: number; vendor: number; generated: number; test: number } {
1744
+ const out = { project: 0, vendor: 0, generated: 0, test: 0 } as Record<string, number>;
1745
+ try {
1746
+ const rows = this.db.prepare(
1747
+ 'SELECT role, COUNT(*) AS c FROM files GROUP BY role',
1748
+ ).all() as Row[];
1749
+ for (const r of rows) {
1750
+ const role = toStr(r.role);
1751
+ if (role in out) out[role] = toNum(r.c);
1752
+ }
1753
+ } catch { /* */ }
1754
+ return out as { project: number; vendor: number; generated: number; test: number };
1755
+ }
1756
+
1757
+ // ── Routes ──────────────────────────────────────────────────────────────────
1758
+
1759
+ listRoutes(options: {
1760
+ method?: string;
1761
+ pathSubstr?: string;
1762
+ framework?: string;
1763
+ /** v9 Track-H — filter by protocol ('http' / 'trpc' / 'graphql' / 'grpc' / 'kafka' / ...). */
1764
+ protocol?: string;
1765
+ operation?: string;
1766
+ topic?: string;
1767
+ queue?: string;
1768
+ service?: string;
1769
+ limit?: number;
1770
+ } = {}): RouteRow[] {
1771
+ if (!this.hasV4Tables) return [];
1772
+ const hasProtocol = this.hasColumn('routes', 'protocol');
1773
+ const where: string[] = [];
1774
+ const args: Array<string | number | null> = [];
1775
+ if (options.method) { where.push('r.method = ?'); args.push(options.method.toUpperCase()); }
1776
+ if (options.pathSubstr){ where.push('r.path LIKE ?'); args.push(`%${options.pathSubstr}%`); }
1777
+ if (options.framework) { where.push('r.framework = ?'); args.push(options.framework); }
1778
+ if (hasProtocol) {
1779
+ if (options.protocol) { where.push('r.protocol = ?'); args.push(options.protocol); }
1780
+ if (options.operation) { where.push('r.operation = ?'); args.push(options.operation); }
1781
+ if (options.topic) { where.push('r.topic = ?'); args.push(options.topic); }
1782
+ if (options.queue) { where.push('r.queue = ?'); args.push(options.queue); }
1783
+ if (options.service) { where.push('r.service = ?'); args.push(options.service); }
1784
+ }
1785
+ const limit = options.limit ?? 200;
1786
+ const protocolCols = hasProtocol
1787
+ ? ', r.protocol, r.operation, r.topic, r.queue, r.exchange, r.service, r.broker, r.metadata_json AS metadataJson'
1788
+ : '';
1789
+ const sql = `
1790
+ SELECT r.id, r.method, r.path, r.framework, r.handler_name AS handlerName,
1791
+ r.handler_id AS handlerId,
1792
+ s.qualified_name AS handlerSymbol,
1793
+ sf.path AS handlerFile,
1794
+ f.path AS filePath, r.line
1795
+ ${protocolCols}
1796
+ FROM routes r
1797
+ JOIN files f ON f.id = r.file_id
1798
+ LEFT JOIN symbols s ON s.id = r.handler_id
1799
+ LEFT JOIN files sf ON sf.id = s.file_id
1800
+ ${where.length ? 'WHERE ' + where.join(' AND ') : ''}
1801
+ ORDER BY r.path, r.method
1802
+ LIMIT ?
1803
+ `;
1804
+ args.push(limit);
1805
+ const rows = this.db.prepare(sql).all(...args) as Row[];
1806
+ return rows.map(r => ({
1807
+ id: toNum(r.id),
1808
+ method: toStr(r.method),
1809
+ path: toStr(r.path),
1810
+ framework: toStr(r.framework),
1811
+ handlerName: toNullStr(r.handlerName),
1812
+ handlerId: toNullNum(r.handlerId),
1813
+ handlerSymbol: toNullStr(r.handlerSymbol),
1814
+ handlerFile: toNullStr(r.handlerFile),
1815
+ filePath: toStr(r.filePath),
1816
+ line: toNum(r.line),
1817
+ protocol: hasProtocol ? toNullStr(r.protocol) : null,
1818
+ operation: hasProtocol ? toNullStr(r.operation) : null,
1819
+ topic: hasProtocol ? toNullStr(r.topic) : null,
1820
+ queue: hasProtocol ? toNullStr(r.queue) : null,
1821
+ exchange: hasProtocol ? toNullStr(r.exchange) : null,
1822
+ service: hasProtocol ? toNullStr(r.service) : null,
1823
+ broker: hasProtocol ? toNullStr(r.broker) : null,
1824
+ metadataJson: hasProtocol ? toNullStr(r.metadataJson) : null,
1825
+ }));
1826
+ }
1827
+
1828
+ countRoutes(): number {
1829
+ if (!this.hasV4Tables) return 0;
1830
+ const row = this.db.prepare('SELECT COUNT(*) AS c FROM routes').get() as Row;
1831
+ return toNum(row.c);
1832
+ }
1833
+
1834
+ // ── v8 Track-G service calls + links ────────────────────────────────────
1835
+
1836
+ /** Total count of service_calls rows. */
1837
+ countServiceCalls(): number {
1838
+ try {
1839
+ const row = this.db.prepare('SELECT COUNT(*) AS c FROM service_calls').get() as Row;
1840
+ return toNum(row.c);
1841
+ } catch { return 0; }
1842
+ }
1843
+
1844
+ /** Total count of service_links rows. */
1845
+ countServiceLinks(): number {
1846
+ try {
1847
+ const row = this.db.prepare('SELECT COUNT(*) AS c FROM service_links').get() as Row;
1848
+ return toNum(row.c);
1849
+ } catch { return 0; }
1850
+ }
1851
+
1852
+ /** List service_calls with the AST-attributed caller joined in. */
1853
+ listServiceCalls(options: {
1854
+ protocol?: string;
1855
+ method?: string;
1856
+ pathSubstr?: string;
1857
+ framework?: string;
1858
+ callerSymbolId?: number;
1859
+ minConfidence?: number;
1860
+ /** v9 Track-H — filter by tRPC procedure / GraphQL operation / gRPC method. */
1861
+ operation?: string;
1862
+ /** v9 Track-H — filter by Kafka / pubsub topic. */
1863
+ topic?: string;
1864
+ /** v9 Track-H — filter by SQS / RabbitMQ queue. */
1865
+ queue?: string;
1866
+ /** v9 Track-H — filter by gRPC service / k8s service host. */
1867
+ service?: string;
1868
+ limit?: number;
1869
+ offset?: number;
1870
+ } = {}): import('../types.js').ServiceCallRow[] {
1871
+ const where: string[] = [];
1872
+ const args: Array<string | number | null> = [];
1873
+ if (options.protocol) { where.push('sc.protocol = ?'); args.push(options.protocol); }
1874
+ if (options.method) { where.push('sc.method = ?'); args.push(options.method.toUpperCase()); }
1875
+ if (options.framework) { where.push('sc.framework = ?'); args.push(options.framework); }
1876
+ if (options.pathSubstr) { where.push('sc.normalized_path LIKE ?'); args.push(`%${options.pathSubstr}%`); }
1877
+ if (options.callerSymbolId != null) { where.push('sc.symbol_id = ?'); args.push(options.callerSymbolId); }
1878
+ if (options.minConfidence != null) { where.push('sc.confidence >= ?'); args.push(options.minConfidence); }
1879
+ if (options.operation) { where.push('sc.operation = ?'); args.push(options.operation); }
1880
+ if (options.topic) { where.push('sc.topic = ?'); args.push(options.topic); }
1881
+ if (options.queue) { where.push('sc.queue = ?'); args.push(options.queue); }
1882
+ if (options.service) { where.push('sc.service = ?'); args.push(options.service); }
1883
+ const limit = Math.min(options.limit ?? 100, 1000);
1884
+ const offset = options.offset ?? 0;
1885
+ args.push(limit, offset);
1886
+ const sql = `
1887
+ SELECT sc.id, sc.protocol, sc.method, sc.raw_target AS rawTarget,
1888
+ sc.normalized_path AS normalizedPath, sc.host_hint AS hostHint,
1889
+ sc.env_key AS envKey, sc.framework, sc.line, sc.confidence,
1890
+ sc.operation, sc.topic, sc.queue, sc.exchange, sc.service,
1891
+ sc.broker, sc.metadata_json AS metadataJson,
1892
+ f.rel_path AS filePath,
1893
+ sc.symbol_id AS callerSymbolId,
1894
+ s.name AS callerName, s.qualified_name AS callerQualifiedName,
1895
+ s.kind AS callerKind
1896
+ FROM service_calls sc
1897
+ JOIN files f ON f.id = sc.file_id
1898
+ LEFT JOIN symbols s ON s.id = sc.symbol_id
1899
+ ${where.length ? 'WHERE ' + where.join(' AND ') : ''}
1900
+ ORDER BY sc.id ASC
1901
+ LIMIT ? OFFSET ?
1902
+ `;
1903
+ try {
1904
+ const rows = this.db.prepare(sql).all(...args) as Row[];
1905
+ return rows.map(r => ({
1906
+ id: toNum(r.id),
1907
+ protocol: toStr(r.protocol),
1908
+ method: toNullStr(r.method),
1909
+ rawTarget: toStr(r.rawTarget),
1910
+ normalizedPath: toNullStr(r.normalizedPath),
1911
+ hostHint: toNullStr(r.hostHint),
1912
+ envKey: toNullStr(r.envKey),
1913
+ framework: toStr(r.framework),
1914
+ line: toNum(r.line),
1915
+ confidence: Number(r.confidence ?? 0),
1916
+ filePath: toStr(r.filePath),
1917
+ callerSymbolId: r.callerSymbolId == null ? null : toNum(r.callerSymbolId),
1918
+ callerName: toNullStr(r.callerName),
1919
+ callerQualifiedName: toNullStr(r.callerQualifiedName),
1920
+ callerKind: toNullStr(r.callerKind),
1921
+ operation: toNullStr(r.operation),
1922
+ topic: toNullStr(r.topic),
1923
+ queue: toNullStr(r.queue),
1924
+ exchange: toNullStr(r.exchange),
1925
+ service: toNullStr(r.service),
1926
+ broker: toNullStr(r.broker),
1927
+ metadataJson: toNullStr(r.metadataJson),
1928
+ }));
1929
+ } catch { return []; }
1930
+ }
1931
+
1932
+ /** List service_links with caller + handler + route joined in. */
1933
+ listServiceLinks(options: {
1934
+ protocol?: string;
1935
+ method?: string;
1936
+ pathSubstr?: string;
1937
+ callerSymbolId?: number;
1938
+ handlerSymbolId?: number;
1939
+ matchKind?: string;
1940
+ minConfidence?: number;
1941
+ limit?: number;
1942
+ offset?: number;
1943
+ } = {}): import('../types.js').ServiceLinkRow[] {
1944
+ const where: string[] = [];
1945
+ const args: Array<string | number | null> = [];
1946
+ if (options.protocol) { where.push('sl.protocol = ?'); args.push(options.protocol); }
1947
+ if (options.matchKind) { where.push('sl.match_kind = ?'); args.push(options.matchKind); }
1948
+ if (options.minConfidence != null) { where.push('sl.confidence >= ?'); args.push(options.minConfidence); }
1949
+ if (options.callerSymbolId != null) { where.push('sl.caller_symbol_id = ?'); args.push(options.callerSymbolId); }
1950
+ if (options.handlerSymbolId != null){ where.push('sl.handler_symbol_id = ?'); args.push(options.handlerSymbolId); }
1951
+ if (options.method) { where.push('sc.method = ?'); args.push(options.method.toUpperCase()); }
1952
+ if (options.pathSubstr) { where.push('(sc.normalized_path LIKE ? OR r.path LIKE ?)');
1953
+ args.push(`%${options.pathSubstr}%`, `%${options.pathSubstr}%`); }
1954
+ const limit = Math.min(options.limit ?? 100, 1000);
1955
+ const offset = options.offset ?? 0;
1956
+ args.push(limit, offset);
1957
+ const sql = `
1958
+ SELECT sl.id, sl.call_id AS callId, sl.route_id AS routeId,
1959
+ sl.protocol, sl.match_kind AS matchKind,
1960
+ sl.confidence, sl.evidence_json AS evidenceJson,
1961
+ sl.caller_symbol_id AS callerSymbolId,
1962
+ cs.name AS callerName, cs.qualified_name AS callerQualifiedName,
1963
+ cf.rel_path AS callerFile,
1964
+ sc.line AS callerLine,
1965
+ sc.method AS callMethod, sc.raw_target AS callRawTarget,
1966
+ sc.normalized_path AS callNormalizedPath, sc.framework AS callFramework,
1967
+ sc.env_key AS callEnvKey, sc.host_hint AS callHostHint,
1968
+ sc.operation AS callOperation, sc.topic AS callTopic,
1969
+ sc.queue AS callQueue, sc.service AS callService,
1970
+ sl.handler_symbol_id AS handlerSymbolId,
1971
+ hs.name AS handlerName, hs.qualified_name AS handlerQualifiedName,
1972
+ hf.rel_path AS handlerFile, hs.line_start AS handlerLine,
1973
+ r.method AS routeMethod, r.path AS routePath, r.framework AS routeFramework,
1974
+ r.operation AS routeOperation, r.topic AS routeTopic,
1975
+ r.queue AS routeQueue, r.service AS routeService
1976
+ FROM service_links sl
1977
+ LEFT JOIN service_calls sc ON sc.id = sl.call_id
1978
+ LEFT JOIN files cf ON cf.id = sc.file_id
1979
+ LEFT JOIN symbols cs ON cs.id = sl.caller_symbol_id
1980
+ LEFT JOIN symbols hs ON hs.id = sl.handler_symbol_id
1981
+ LEFT JOIN files hf ON hf.id = hs.file_id
1982
+ LEFT JOIN routes r ON r.id = sl.route_id
1983
+ ${where.length ? 'WHERE ' + where.join(' AND ') : ''}
1984
+ ORDER BY sl.id ASC
1985
+ LIMIT ? OFFSET ?
1986
+ `;
1987
+ try {
1988
+ const rows = this.db.prepare(sql).all(...args) as Row[];
1989
+ return rows.map(r => ({
1990
+ id: toNum(r.id),
1991
+ callId: toNum(r.callId),
1992
+ routeId: r.routeId == null ? null : toNum(r.routeId),
1993
+ protocol: toStr(r.protocol),
1994
+ matchKind: toStr(r.matchKind),
1995
+ confidence: Number(r.confidence ?? 0),
1996
+ evidenceJson: toStr(r.evidenceJson),
1997
+ callerSymbolId: r.callerSymbolId == null ? null : toNum(r.callerSymbolId),
1998
+ callerName: toNullStr(r.callerName),
1999
+ callerQualifiedName: toNullStr(r.callerQualifiedName),
2000
+ callerFile: toNullStr(r.callerFile),
2001
+ callerLine: toNum(r.callerLine ?? 0),
2002
+ callMethod: toNullStr(r.callMethod),
2003
+ callRawTarget: toStr(r.callRawTarget),
2004
+ callNormalizedPath: toNullStr(r.callNormalizedPath),
2005
+ callFramework: toStr(r.callFramework),
2006
+ callEnvKey: toNullStr(r.callEnvKey),
2007
+ callHostHint: toNullStr(r.callHostHint),
2008
+ callOperation: toNullStr(r.callOperation),
2009
+ callTopic: toNullStr(r.callTopic),
2010
+ callQueue: toNullStr(r.callQueue),
2011
+ callService: toNullStr(r.callService),
2012
+ handlerSymbolId: r.handlerSymbolId == null ? null : toNum(r.handlerSymbolId),
2013
+ handlerName: toNullStr(r.handlerName),
2014
+ handlerQualifiedName: toNullStr(r.handlerQualifiedName),
2015
+ handlerFile: toNullStr(r.handlerFile),
2016
+ handlerLine: r.handlerLine == null ? null : toNum(r.handlerLine),
2017
+ routeMethod: toNullStr(r.routeMethod),
2018
+ routePath: toNullStr(r.routePath),
2019
+ routeFramework: toNullStr(r.routeFramework),
2020
+ routeOperation: toNullStr(r.routeOperation),
2021
+ routeTopic: toNullStr(r.routeTopic),
2022
+ routeQueue: toNullStr(r.routeQueue),
2023
+ routeService: toNullStr(r.routeService),
2024
+ }));
2025
+ } catch { return []; }
2026
+ }
2027
+
2028
+ /** id-scoped helper: every service_link whose caller is symbolId. */
2029
+ serviceLinksForCaller(symbolId: number, options: { limit?: number } = {}): import('../types.js').ServiceLinkRow[] {
2030
+ return this.listServiceLinks({ callerSymbolId: symbolId, limit: options.limit });
2031
+ }
2032
+
2033
+ /** id-scoped helper: every service_link whose handler is symbolId. */
2034
+ serviceLinksForHandler(symbolId: number, options: { limit?: number } = {}): import('../types.js').ServiceLinkRow[] {
2035
+ return this.listServiceLinks({ handlerSymbolId: symbolId, limit: options.limit });
2036
+ }
2037
+
2038
+ /**
2039
+ * Bounded BFS over service_links from caller to handler. Treats each
2040
+ * service_link as a directed edge `caller_symbol_id → handler_symbol_id`.
2041
+ * Returns the shortest path as an array of symbol ids, or [] if unreachable
2042
+ * within maxDepth. Combines with the normal call-graph trace done by
2043
+ * `tracePath`; this one is service-link only.
2044
+ */
2045
+ traceServicePath(fromSymbolId: number, toSymbolId: number, maxDepth: number = 6): number[] {
2046
+ if (fromSymbolId === toSymbolId) return [fromSymbolId];
2047
+ if (maxDepth <= 0) return [];
2048
+ try {
2049
+ const stmt = this.db.prepare(
2050
+ `SELECT DISTINCT handler_symbol_id AS h
2051
+ FROM service_links
2052
+ WHERE caller_symbol_id = ? AND handler_symbol_id IS NOT NULL`,
2053
+ );
2054
+ const parents = new Map<number, number>();
2055
+ const visited = new Set<number>([fromSymbolId]);
2056
+ let frontier = [fromSymbolId];
2057
+ for (let depth = 0; depth < maxDepth; depth++) {
2058
+ const next: number[] = [];
2059
+ for (const cur of frontier) {
2060
+ const rows = stmt.all(cur) as Array<{ h: unknown }>;
2061
+ for (const r of rows) {
2062
+ const h = toNum(r.h);
2063
+ if (visited.has(h)) continue;
2064
+ visited.add(h);
2065
+ parents.set(h, cur);
2066
+ if (h === toSymbolId) {
2067
+ // Reconstruct path
2068
+ const path: number[] = [h];
2069
+ let cursor = cur;
2070
+ while (cursor !== fromSymbolId) {
2071
+ path.push(cursor);
2072
+ cursor = parents.get(cursor)!;
2073
+ }
2074
+ path.push(fromSymbolId);
2075
+ path.reverse();
2076
+ return path;
2077
+ }
2078
+ next.push(h);
2079
+ }
2080
+ }
2081
+ if (next.length === 0) break;
2082
+ frontier = next;
2083
+ }
2084
+ return [];
2085
+ } catch { return []; }
2086
+ }
2087
+
2088
+ /**
2089
+ * v9 Track-H — bounded service-link traversal from a single symbol.
2090
+ *
2091
+ * Walks the directed service-link graph starting at `fromSymbolId`. Each
2092
+ * step follows `caller_symbol_id → handler_symbol_id` edges, recording the
2093
+ * protocol / matchKind / hop chain for every reachable handler.
2094
+ *
2095
+ * Bounds (all configurable; defaults are conservative):
2096
+ * - maxDepth limit hops away from the source (default 4)
2097
+ * - maxNodes stop after expanding this many handlers (default 200)
2098
+ * - maxFanout stop expanding a node after this many outgoing service
2099
+ * links (default 20)
2100
+ *
2101
+ * Returns one record per reached handler with the protocols and match-kinds
2102
+ * encountered along the path; `cutoff` flags the limit that fired (if any).
2103
+ */
2104
+ traceServiceDependencies(
2105
+ fromSymbolId: number,
2106
+ options: { maxDepth?: number; maxNodes?: number; maxFanout?: number } = {},
2107
+ ): {
2108
+ reached: Array<{
2109
+ symbolId: number;
2110
+ depth: number;
2111
+ protocols: string[];
2112
+ matchKinds: string[];
2113
+ hops: number[];
2114
+ }>;
2115
+ cutoff: 'maxNodes' | 'maxDepth' | 'maxFanout' | null;
2116
+ fromExpanded: number;
2117
+ } {
2118
+ const maxDepth = options.maxDepth ?? 4;
2119
+ const maxNodes = options.maxNodes ?? 200;
2120
+ const maxFanout = options.maxFanout ?? 20;
2121
+
2122
+ const reached = new Map<number, { depth: number; protocols: Set<string>; matchKinds: Set<string>; parent: number | null }>();
2123
+ let cutoff: 'maxNodes' | 'maxDepth' | 'maxFanout' | null = null;
2124
+ let expanded = 0;
2125
+
2126
+ try {
2127
+ // Deterministic ordering by handler symbol id ASC inside each step.
2128
+ const stmt = this.db.prepare(
2129
+ `SELECT handler_symbol_id AS h, protocol AS p, match_kind AS mk
2130
+ FROM service_links
2131
+ WHERE caller_symbol_id = ? AND handler_symbol_id IS NOT NULL
2132
+ ORDER BY confidence DESC, handler_symbol_id ASC
2133
+ LIMIT ?`,
2134
+ );
2135
+
2136
+ let frontier: number[] = [fromSymbolId];
2137
+ let maxDepthFrontier: number[] = [];
2138
+ reached.set(fromSymbolId, { depth: 0, protocols: new Set(), matchKinds: new Set(), parent: null });
2139
+ for (let depth = 0; depth < maxDepth; depth++) {
2140
+ const next: number[] = [];
2141
+ for (const cur of frontier) {
2142
+ // +1 so we can detect fanout-cap hits cleanly (over-by-one).
2143
+ const rows = stmt.all(cur, maxFanout + 1) as Array<{ h: unknown; p: unknown; mk: unknown }>;
2144
+ if (rows.length > maxFanout) cutoff = 'maxFanout';
2145
+ for (let i = 0; i < Math.min(rows.length, maxFanout); i++) {
2146
+ const h = toNum(rows[i].h);
2147
+ const p = toStr(rows[i].p);
2148
+ const mk = toStr(rows[i].mk);
2149
+ if (reached.has(h)) {
2150
+ const entry = reached.get(h)!;
2151
+ entry.protocols.add(p);
2152
+ entry.matchKinds.add(mk);
2153
+ continue;
2154
+ }
2155
+ reached.set(h, {
2156
+ depth: depth + 1,
2157
+ protocols: new Set([p]),
2158
+ matchKinds: new Set([mk]),
2159
+ parent: cur,
2160
+ });
2161
+ next.push(h);
2162
+ if (reached.size > maxNodes) {
2163
+ cutoff = 'maxNodes';
2164
+ break;
2165
+ }
2166
+ }
2167
+ expanded++;
2168
+ if (cutoff === 'maxNodes') break;
2169
+ }
2170
+ if (cutoff === 'maxNodes') break;
2171
+ if (next.length === 0) break;
2172
+ if (depth + 1 >= maxDepth) {
2173
+ maxDepthFrontier = next;
2174
+ break;
2175
+ }
2176
+ frontier = next;
2177
+ }
2178
+ if (!cutoff && reached.size >= maxNodes) cutoff = 'maxNodes';
2179
+ if (!cutoff && maxDepthFrontier.length > 0) {
2180
+ const placeholders = maxDepthFrontier.map(() => '?').join(',');
2181
+ const row = this.db.prepare(
2182
+ `SELECT 1 AS ok
2183
+ FROM service_links
2184
+ WHERE caller_symbol_id IN (${placeholders})
2185
+ AND handler_symbol_id IS NOT NULL
2186
+ LIMIT 1`,
2187
+ ).get(...maxDepthFrontier) as Row | undefined;
2188
+ if (row) cutoff = 'maxDepth';
2189
+ }
2190
+ } catch { /* fall through with what we have */ }
2191
+
2192
+ // Build hop chains for each reached handler.
2193
+ const out: Array<{
2194
+ symbolId: number; depth: number;
2195
+ protocols: string[]; matchKinds: string[]; hops: number[];
2196
+ }> = [];
2197
+ for (const [id, entry] of reached) {
2198
+ if (id === fromSymbolId) continue;
2199
+ const hops: number[] = [id];
2200
+ let p = entry.parent;
2201
+ while (p !== null && p !== fromSymbolId) {
2202
+ hops.push(p);
2203
+ p = reached.get(p)?.parent ?? null;
2204
+ }
2205
+ hops.push(fromSymbolId);
2206
+ hops.reverse();
2207
+ out.push({
2208
+ symbolId: id,
2209
+ depth: entry.depth,
2210
+ protocols: Array.from(entry.protocols).sort(),
2211
+ matchKinds: Array.from(entry.matchKinds).sort(),
2212
+ hops,
2213
+ });
2214
+ }
2215
+ // Deterministic order: by depth ASC, then symbolId ASC.
2216
+ out.sort((a, b) => a.depth - b.depth || a.symbolId - b.symbolId);
2217
+ return { reached: out, cutoff, fromExpanded: expanded };
2218
+ }
2219
+
2220
+ /**
2221
+ * v9 Track-H — bounded service-link traversal at module granularity.
2222
+ *
2223
+ * Returns the set of modules reachable from `fromModuleId` by following
2224
+ * cross-module service links (one or more service_link edges whose caller
2225
+ * and handler live in different modules). For each reached module the
2226
+ * result includes the minimum hop depth and which protocols carry traffic
2227
+ * into it.
2228
+ *
2229
+ * Useful for "which modules depend on `billing` through HTTP/Kafka/etc?".
2230
+ */
2231
+ traceModuleServiceDependencies(
2232
+ fromModuleId: number,
2233
+ options: { maxDepth?: number; maxNodes?: number } = {},
2234
+ ): {
2235
+ reached: Array<{ moduleId: number; depth: number; protocols: string[]; viaLinks: number }>;
2236
+ cutoff: 'maxNodes' | 'maxDepth' | null;
2237
+ } {
2238
+ const maxDepth = options.maxDepth ?? 3;
2239
+ const maxNodes = options.maxNodes ?? 50;
2240
+ if (!this.hasModuleTables) return { reached: [], cutoff: null };
2241
+
2242
+ // Materialize module → module service-link weights once for the BFS.
2243
+ type ModuleEdge = { from: number; to: number; protocol: string; n: number };
2244
+ const edges = this.db.prepare(
2245
+ `SELECT mm1.module_id AS f, mm2.module_id AS t, sl.protocol AS p, COUNT(*) AS n
2246
+ FROM service_links sl
2247
+ JOIN service_calls sc ON sc.id = sl.call_id
2248
+ JOIN module_members mm1 ON mm1.file_id = sc.file_id
2249
+ JOIN symbols hs ON hs.id = sl.handler_symbol_id
2250
+ JOIN module_members mm2 ON mm2.file_id = hs.file_id
2251
+ WHERE mm1.module_id <> mm2.module_id
2252
+ GROUP BY mm1.module_id, mm2.module_id, sl.protocol
2253
+ ORDER BY mm1.module_id ASC, mm2.module_id ASC, sl.protocol ASC`,
2254
+ ).all() as Array<{ f: unknown; t: unknown; p: unknown; n: unknown }>;
2255
+ const adj = new Map<number, ModuleEdge[]>();
2256
+ for (const e of edges) {
2257
+ const from = toNum(e.f);
2258
+ const list = adj.get(from) ?? [];
2259
+ list.push({ from, to: toNum(e.t), protocol: toStr(e.p), n: toNum(e.n) });
2260
+ adj.set(from, list);
2261
+ }
2262
+
2263
+ type Reached = { depth: number; protocols: Set<string>; viaLinks: number };
2264
+ const reached = new Map<number, Reached>();
2265
+ reached.set(fromModuleId, { depth: 0, protocols: new Set(), viaLinks: 0 });
2266
+ let cutoff: 'maxNodes' | 'maxDepth' | null = null;
2267
+
2268
+ let frontier: number[] = [fromModuleId];
2269
+ let maxDepthFrontier: number[] = [];
2270
+ for (let depth = 0; depth < maxDepth; depth++) {
2271
+ const next: number[] = [];
2272
+ for (const cur of frontier) {
2273
+ const outs = adj.get(cur) ?? [];
2274
+ for (const e of outs) {
2275
+ if (e.to === fromModuleId) continue;
2276
+ let entry = reached.get(e.to);
2277
+ if (!entry) {
2278
+ entry = { depth: depth + 1, protocols: new Set(), viaLinks: 0 };
2279
+ reached.set(e.to, entry);
2280
+ next.push(e.to);
2281
+ if (reached.size > maxNodes) { cutoff = 'maxNodes'; break; }
2282
+ }
2283
+ entry.protocols.add(e.protocol);
2284
+ entry.viaLinks += e.n;
2285
+ }
2286
+ if (cutoff) break;
2287
+ }
2288
+ if (cutoff) break;
2289
+ if (next.length === 0) break;
2290
+ if (depth + 1 >= maxDepth) {
2291
+ maxDepthFrontier = next;
2292
+ break;
2293
+ }
2294
+ frontier = next;
2295
+ }
2296
+ if (!cutoff && maxDepthFrontier.some(id => (adj.get(id)?.length ?? 0) > 0)) {
2297
+ cutoff = 'maxDepth';
2298
+ }
2299
+
2300
+ const out: Array<{ moduleId: number; depth: number; protocols: string[]; viaLinks: number }> = [];
2301
+ for (const [id, r] of reached) {
2302
+ if (id === fromModuleId) continue;
2303
+ out.push({
2304
+ moduleId: id, depth: r.depth,
2305
+ protocols: Array.from(r.protocols).sort(),
2306
+ viaLinks: r.viaLinks,
2307
+ });
2308
+ }
2309
+ out.sort((a, b) => a.depth - b.depth || a.moduleId - b.moduleId);
2310
+ return { reached: out, cutoff };
2311
+ }
2312
+
2313
+ // ── v10 External bundle layers ─────────────────────────────────────────────
2314
+
2315
+ /** True iff the v10 external/boundary/continuity tables exist on disk. */
2316
+ hasV10(): boolean { return this.hasV10Tables; }
2317
+
2318
+ /** Replace the boundaries / boundary_members / boundary_edges tables.
2319
+ * Atomic — wrapped in a single transaction. */
2320
+ replaceBoundaries(
2321
+ boundaries: Array<{
2322
+ label: string;
2323
+ kind: string;
2324
+ rootRelPath: string;
2325
+ manifestPath: string | null;
2326
+ ecosystem: string | null;
2327
+ fileIds: number[];
2328
+ }>,
2329
+ edges: Array<{ fromIndex: number; toIndex: number; kind: string; weight: number }>,
2330
+ ): void {
2331
+ this.assertWritable();
2332
+ if (!this.hasV10Tables) return;
2333
+ this.db.exec('BEGIN');
2334
+ try {
2335
+ this.db.exec('DELETE FROM boundary_edges');
2336
+ this.db.exec('DELETE FROM boundary_members');
2337
+ this.db.exec('DELETE FROM boundaries');
2338
+ const insBoundary = this.db.prepare(`
2339
+ INSERT INTO boundaries
2340
+ (label, kind, root_rel_path, manifest_path, ecosystem, size_files, computed_at)
2341
+ VALUES (?, ?, ?, ?, ?, ?, ?)
2342
+ `);
2343
+ const insMember = this.db.prepare(
2344
+ 'INSERT OR REPLACE INTO boundary_members (file_id, boundary_id) VALUES (?, ?)',
2345
+ );
2346
+ const insEdge = this.db.prepare(
2347
+ 'INSERT OR REPLACE INTO boundary_edges (from_boundary_id, to_boundary_id, kind, weight) VALUES (?, ?, ?, ?)',
2348
+ );
2349
+ const now = Date.now();
2350
+ const indexToId: number[] = [];
2351
+ for (const b of boundaries) {
2352
+ const res = insBoundary.run(
2353
+ b.label, b.kind, b.rootRelPath, b.manifestPath, b.ecosystem,
2354
+ b.fileIds.length, now,
2355
+ );
2356
+ const id = toNum(res.lastInsertRowid);
2357
+ indexToId.push(id);
2358
+ for (const fid of b.fileIds) insMember.run(fid, id);
2359
+ }
2360
+ for (const e of edges) {
2361
+ const f = indexToId[e.fromIndex];
2362
+ const t = indexToId[e.toIndex];
2363
+ if (f == null || t == null) continue;
2364
+ insEdge.run(f, t, e.kind, e.weight);
2365
+ }
2366
+ this.db.exec('COMMIT');
2367
+ } catch (err) {
2368
+ this.db.exec('ROLLBACK');
2369
+ throw err;
2370
+ }
2371
+ }
2372
+
2373
+ /** True iff boundaries were populated this build. */
2374
+ hasBoundariesData(): boolean {
2375
+ if (!this.hasV10Tables) return false;
2376
+ try {
2377
+ const row = this.db.prepare('SELECT COUNT(*) AS c FROM boundaries').get() as Row;
2378
+ return toNum(row.c) > 0;
2379
+ } catch { return false; }
2380
+ }
2381
+
2382
+ countBoundaries(): number {
2383
+ if (!this.hasV10Tables) return 0;
2384
+ try {
2385
+ return toNum((this.db.prepare('SELECT COUNT(*) AS c FROM boundaries').get() as Row).c);
2386
+ } catch { return 0; }
2387
+ }
2388
+
2389
+ listBoundaries(limit = 200): Array<{
2390
+ id: number; label: string; kind: string; rootRelPath: string;
2391
+ manifestPath: string | null; ecosystem: string | null; sizeFiles: number;
2392
+ }> {
2393
+ if (!this.hasV10Tables) return [];
2394
+ try {
2395
+ const rows = this.db.prepare(`
2396
+ SELECT id, label, kind, root_rel_path AS rootRelPath,
2397
+ manifest_path AS manifestPath, ecosystem,
2398
+ size_files AS sizeFiles
2399
+ FROM boundaries
2400
+ ORDER BY size_files DESC, label
2401
+ LIMIT ?
2402
+ `).all(limit) as Row[];
2403
+ return rows.map(r => ({
2404
+ id: toNum(r.id), label: toStr(r.label), kind: toStr(r.kind),
2405
+ rootRelPath: toStr(r.rootRelPath),
2406
+ manifestPath: toNullStr(r.manifestPath),
2407
+ ecosystem: toNullStr(r.ecosystem),
2408
+ sizeFiles: toNum(r.sizeFiles),
2409
+ }));
2410
+ } catch { return []; }
2411
+ }
2412
+
2413
+ /** Boundary that owns a file id (or null). */
2414
+ boundaryForFile(fileId: number): { id: number; label: string; kind: string; rootRelPath: string } | null {
2415
+ if (!this.hasV10Tables) return null;
2416
+ try {
2417
+ const row = this.db.prepare(`
2418
+ SELECT b.id, b.label, b.kind, b.root_rel_path AS rootRelPath
2419
+ FROM boundary_members bm JOIN boundaries b ON b.id = bm.boundary_id
2420
+ WHERE bm.file_id = ?
2421
+ `).get(fileId) as Row | undefined;
2422
+ if (!row) return null;
2423
+ return {
2424
+ id: toNum(row.id),
2425
+ label: toStr(row.label),
2426
+ kind: toStr(row.kind),
2427
+ rootRelPath: toStr(row.rootRelPath),
2428
+ };
2429
+ } catch { return null; }
2430
+ }
2431
+
2432
+ /** Cross-boundary dependency edges from a boundary (outgoing by default). */
2433
+ boundaryDependencies(
2434
+ boundaryId: number,
2435
+ options: { direction?: 'in' | 'out'; limit?: number } = {},
2436
+ ): Array<{ boundaryId: number; label: string; kind: string; weight: number }> {
2437
+ if (!this.hasV10Tables) return [];
2438
+ const direction = options.direction ?? 'out';
2439
+ const limit = options.limit ?? 100;
2440
+ const sideThis = direction === 'out' ? 'from_boundary_id' : 'to_boundary_id';
2441
+ const sideOther = direction === 'out' ? 'to_boundary_id' : 'from_boundary_id';
2442
+ try {
2443
+ const rows = this.db.prepare(`
2444
+ SELECT b.id AS boundaryId, b.label, be.kind, be.weight
2445
+ FROM boundary_edges be JOIN boundaries b ON b.id = be.${sideOther}
2446
+ WHERE be.${sideThis} = ?
2447
+ ORDER BY be.weight DESC
2448
+ LIMIT ?
2449
+ `).all(boundaryId, limit) as Row[];
2450
+ return rows.map(r => ({
2451
+ boundaryId: toNum(r.boundaryId),
2452
+ label: toStr(r.label),
2453
+ kind: toStr(r.kind),
2454
+ weight: toNum(r.weight),
2455
+ }));
2456
+ } catch { return []; }
2457
+ }
2458
+
2459
+ /** For a given symbol id, return the boundaries of each of its callees. */
2460
+ calleeBoundariesOf(symbolId: number): Array<{ calleeId: number; boundaryId: number }> {
2461
+ if (!this.hasV10Tables) return [];
2462
+ try {
2463
+ const rows = this.db.prepare(`
2464
+ SELECT DISTINCT e.to_id AS calleeId, bm.boundary_id AS boundaryId
2465
+ FROM edges e
2466
+ JOIN symbols s ON s.id = e.to_id
2467
+ JOIN boundary_members bm ON bm.file_id = s.file_id
2468
+ WHERE e.from_id = ? AND e.kind = 'call' AND e.to_id IS NOT NULL
2469
+ `).all(symbolId) as Row[];
2470
+ return rows.map(r => ({
2471
+ calleeId: toNum(r.calleeId), boundaryId: toNum(r.boundaryId),
2472
+ }));
2473
+ } catch { return []; }
2474
+ }
2475
+
2476
+ /**
2477
+ * Return every files.id that's actually a phantom file backing an external
2478
+ * bundle layer. The indexer's prune pass preserves these so a local
2479
+ * re-index never drops external-imported rows.
2480
+ */
2481
+ listExternalPhantomFileIds(): number[] {
2482
+ try {
2483
+ const rows = this.db.prepare(
2484
+ "SELECT id FROM files WHERE path LIKE '__external_bundle__/%'",
2485
+ ).all() as Row[];
2486
+ return rows.map(r => toNum(r.id));
2487
+ } catch { return []; }
2488
+ }
2489
+
2490
+ /** Insert (or replace) an external_bundles row for a given bundle path. */
2491
+ upsertExternalBundle(args: {
2492
+ bundlePath: string;
2493
+ externalProject: string | null;
2494
+ externalVersion: string | null;
2495
+ externalHash: string | null;
2496
+ schemaVersion: number;
2497
+ routesImported: number;
2498
+ serviceCallsImported: number;
2499
+ serviceLinksImported: number;
2500
+ }): number {
2501
+ this.assertWritable();
2502
+ if (!this.hasV10Tables) return 0;
2503
+ const existing = this.db.prepare(
2504
+ 'SELECT id FROM external_bundles WHERE bundle_path = ?',
2505
+ ).get(args.bundlePath) as Row | undefined;
2506
+ if (existing) {
2507
+ const id = toNum(existing.id);
2508
+ this.db.prepare(`
2509
+ UPDATE external_bundles
2510
+ SET external_project = ?, external_version = ?, external_hash = ?,
2511
+ schema_version = ?, imported_at = ?, routes_imported = ?,
2512
+ service_calls_imported = ?, service_links_imported = ?
2513
+ WHERE id = ?
2514
+ `).run(
2515
+ args.externalProject, args.externalVersion, args.externalHash,
2516
+ args.schemaVersion, Date.now(),
2517
+ args.routesImported, args.serviceCallsImported, args.serviceLinksImported,
2518
+ id,
2519
+ );
2520
+ return id;
2521
+ }
2522
+ const r = this.db.prepare(`
2523
+ INSERT INTO external_bundles
2524
+ (source_kind, bundle_path, external_project, external_version, external_hash,
2525
+ schema_version, imported_at, routes_imported, service_calls_imported, service_links_imported)
2526
+ VALUES ('external-bundle', ?, ?, ?, ?, ?, ?, ?, ?, ?)
2527
+ `).run(
2528
+ args.bundlePath, args.externalProject, args.externalVersion, args.externalHash,
2529
+ args.schemaVersion, Date.now(),
2530
+ args.routesImported, args.serviceCallsImported, args.serviceLinksImported,
2531
+ );
2532
+ return toNum(r.lastInsertRowid);
2533
+ }
2534
+
2535
+ /**
2536
+ * Look up an existing external_bundles row by its bundle path. Returns the
2537
+ * id and the imported_at/external_hash for the existing layer when present.
2538
+ */
2539
+ findExternalBundleByPath(bundlePath: string): {
2540
+ id: number; bundlePath: string; externalProject: string | null;
2541
+ externalVersion: string | null; externalHash: string | null;
2542
+ } | null {
2543
+ if (!this.hasV10Tables) return null;
2544
+ try {
2545
+ const row = this.db.prepare(`
2546
+ SELECT id, bundle_path AS bundlePath, external_project AS externalProject,
2547
+ external_version AS externalVersion, external_hash AS externalHash
2548
+ FROM external_bundles WHERE bundle_path = ?
2549
+ `).get(bundlePath) as Row | undefined;
2550
+ if (!row) return null;
2551
+ return {
2552
+ id: toNum(row.id),
2553
+ bundlePath: toStr(row.bundlePath),
2554
+ externalProject: toNullStr(row.externalProject),
2555
+ externalVersion: toNullStr(row.externalVersion),
2556
+ externalHash: toNullStr(row.externalHash),
2557
+ };
2558
+ } catch { return null; }
2559
+ }
2560
+
2561
+ /** List every external_bundles row (newest first). */
2562
+ listExternalBundles(): Array<{
2563
+ id: number; sourceKind: string; bundlePath: string;
2564
+ externalProject: string | null; externalVersion: string | null;
2565
+ externalHash: string | null; schemaVersion: number; importedAt: number;
2566
+ routesImported: number; serviceCallsImported: number; serviceLinksImported: number;
2567
+ }> {
2568
+ if (!this.hasV10Tables) return [];
2569
+ try {
2570
+ const rows = this.db.prepare(`
2571
+ SELECT id, source_kind AS sourceKind, bundle_path AS bundlePath,
2572
+ external_project AS externalProject,
2573
+ external_version AS externalVersion,
2574
+ external_hash AS externalHash,
2575
+ schema_version AS schemaVersion,
2576
+ imported_at AS importedAt,
2577
+ routes_imported AS routesImported,
2578
+ service_calls_imported AS serviceCallsImported,
2579
+ service_links_imported AS serviceLinksImported
2580
+ FROM external_bundles
2581
+ ORDER BY imported_at DESC
2582
+ `).all() as Row[];
2583
+ return rows.map(r => ({
2584
+ id: toNum(r.id),
2585
+ sourceKind: toStr(r.sourceKind),
2586
+ bundlePath: toStr(r.bundlePath),
2587
+ externalProject: toNullStr(r.externalProject),
2588
+ externalVersion: toNullStr(r.externalVersion),
2589
+ externalHash: toNullStr(r.externalHash),
2590
+ schemaVersion: toNum(r.schemaVersion),
2591
+ importedAt: toNum(r.importedAt),
2592
+ routesImported: toNum(r.routesImported),
2593
+ serviceCallsImported: toNum(r.serviceCallsImported),
2594
+ serviceLinksImported: toNum(r.serviceLinksImported),
2595
+ }));
2596
+ } catch { return []; }
2597
+ }
2598
+
2599
+ /**
2600
+ * Delete every row associated with a given external_bundles.id — its
2601
+ * routes/service_calls/service_links rows and the bundle row itself. Used
2602
+ * during re-import so a fresh import is fully replacing the previous
2603
+ * snapshot of that bundle.
2604
+ */
2605
+ clearExternalBundle(bundleId: number): {
2606
+ routes: number; serviceCalls: number; serviceLinks: number;
2607
+ } {
2608
+ this.assertWritable();
2609
+ if (!this.hasV10Tables) return { routes: 0, serviceCalls: 0, serviceLinks: 0 };
2610
+ let routes = 0, serviceCalls = 0, serviceLinks = 0;
2611
+ this.db.exec('BEGIN');
2612
+ try {
2613
+ try {
2614
+ routes = toNum(this.db.prepare(
2615
+ 'DELETE FROM routes WHERE external_bundle_id = ?',
2616
+ ).run(bundleId).changes);
2617
+ } catch { /* */ }
2618
+ try {
2619
+ serviceCalls = toNum(this.db.prepare(
2620
+ 'DELETE FROM service_calls WHERE external_bundle_id = ?',
2621
+ ).run(bundleId).changes);
2622
+ } catch { /* */ }
2623
+ try {
2624
+ serviceLinks = toNum(this.db.prepare(
2625
+ 'DELETE FROM service_links WHERE external_bundle_id = ?',
2626
+ ).run(bundleId).changes);
2627
+ } catch { /* */ }
2628
+ // Drop the phantom file row that owned this layer's external routes so a
2629
+ // forced re-import (which mints a new bundle id + phantom path) does not
2630
+ // leak orphaned `__external_bundle__/...` rows alongside sibling layers.
2631
+ try {
2632
+ this.db.prepare(
2633
+ "DELETE FROM files WHERE hash = ? AND path LIKE '__external_bundle__/%'",
2634
+ ).run(`external:${bundleId}`);
2635
+ } catch { /* */ }
2636
+ this.db.prepare('DELETE FROM external_bundles WHERE id = ?').run(bundleId);
2637
+ this.db.exec('COMMIT');
2638
+ } catch (err) {
2639
+ this.db.exec('ROLLBACK');
2640
+ throw err;
2641
+ }
2642
+ return { routes, serviceCalls, serviceLinks };
2643
+ }
2644
+
2645
+ /**
2646
+ * Insert a route from an external bundle. file_id is intentionally NULL —
2647
+ * external routes do not belong to any local file. The Store schema does
2648
+ * not allow NULL on routes.file_id by default; v10 keeps file_id NOT NULL,
2649
+ * so we have to ensure an external "phantom" file row exists per bundle to
2650
+ * own the routes. The route stays linked to the external_bundle_id so we
2651
+ * can wipe them as a layer.
2652
+ */
2653
+ insertExternalRoute(args: {
2654
+ bundleId: number;
2655
+ externalFileId: number;
2656
+ method: string;
2657
+ path: string;
2658
+ framework: string;
2659
+ handlerName: string | null;
2660
+ line: number;
2661
+ protocol?: string;
2662
+ operation?: string | null;
2663
+ topic?: string | null;
2664
+ queue?: string | null;
2665
+ exchange?: string | null;
2666
+ service?: string | null;
2667
+ broker?: string | null;
2668
+ metadataJson?: string | null;
2669
+ }): number {
2670
+ this.assertWritable();
2671
+ if (!this.hasV10Tables) return 0;
2672
+ const r = this.db.prepare(`
2673
+ INSERT INTO routes
2674
+ (file_id, method, path, framework, handler_name, line,
2675
+ protocol, operation, topic, queue, exchange, service, broker, metadata_json,
2676
+ external_bundle_id)
2677
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2678
+ `).run(
2679
+ args.externalFileId, args.method, args.path, args.framework, args.handlerName, args.line,
2680
+ args.protocol ?? 'http',
2681
+ args.operation ?? null, args.topic ?? null, args.queue ?? null,
2682
+ args.exchange ?? null, args.service ?? null, args.broker ?? null,
2683
+ args.metadataJson ?? null,
2684
+ args.bundleId,
2685
+ );
2686
+ return toNum(r.lastInsertRowid);
2687
+ }
2688
+
2689
+ /**
2690
+ * Create (or reuse) an "external" phantom file row that owns external
2691
+ * bundle rows. Each external_bundles.id gets its own external-phantom file
2692
+ * so deleting a layer doesn't disturb sibling layers. The phantom file
2693
+ * carries role='vendor' so it stays out of project-first defaults.
2694
+ */
2695
+ ensureExternalFile(bundleId: number, externalProject: string): number {
2696
+ this.assertWritable();
2697
+ const phantomPath = `__external_bundle__/${externalProject}/${bundleId}`;
2698
+ const existing = this.db.prepare('SELECT id FROM files WHERE path = ?')
2699
+ .get(phantomPath) as Row | undefined;
2700
+ if (existing) return toNum(existing.id);
2701
+ const r = this.stmtUpsertFile.run(
2702
+ phantomPath, phantomPath, 'external',
2703
+ `external:${bundleId}`, 0, Date.now(),
2704
+ 'vendor', 1, 0,
2705
+ );
2706
+ return toNum(r.lastInsertRowid);
2707
+ }
2708
+
2709
+ /** Count of routes that came from an external bundle. */
2710
+ countExternalRoutes(): number {
2711
+ if (!this.hasV10Tables) return 0;
2712
+ try {
2713
+ const row = this.db.prepare(
2714
+ 'SELECT COUNT(*) AS c FROM routes WHERE external_bundle_id IS NOT NULL',
2715
+ ).get() as Row;
2716
+ return toNum(row.c);
2717
+ } catch { return 0; }
2718
+ }
2719
+
2720
+ /**
2721
+ * List routes filtered to external bundles only. Useful for verifying that
2722
+ * an external import landed and for the seer_external_bundles MCP tool.
2723
+ */
2724
+ listExternalRoutes(options: {
2725
+ bundleId?: number;
2726
+ method?: string;
2727
+ pathSubstr?: string;
2728
+ protocol?: string;
2729
+ limit?: number;
2730
+ } = {}): Array<{
2731
+ id: number;
2732
+ method: string;
2733
+ path: string;
2734
+ framework: string;
2735
+ handlerName: string | null;
2736
+ line: number;
2737
+ protocol: string | null;
2738
+ operation: string | null;
2739
+ topic: string | null;
2740
+ queue: string | null;
2741
+ service: string | null;
2742
+ externalBundleId: number;
2743
+ externalProject: string | null;
2744
+ }> {
2745
+ if (!this.hasV10Tables) return [];
2746
+ const where: string[] = ['r.external_bundle_id IS NOT NULL'];
2747
+ const args: Array<string | number | null> = [];
2748
+ if (options.bundleId != null) { where.push('r.external_bundle_id = ?'); args.push(options.bundleId); }
2749
+ if (options.method) { where.push('r.method = ?'); args.push(options.method.toUpperCase()); }
2750
+ if (options.pathSubstr) { where.push('r.path LIKE ?'); args.push(`%${options.pathSubstr}%`); }
2751
+ if (options.protocol) { where.push('r.protocol = ?'); args.push(options.protocol); }
2752
+ const limit = options.limit ?? 200;
2753
+ args.push(limit);
2754
+ try {
2755
+ const rows = this.db.prepare(`
2756
+ SELECT r.id, r.method, r.path, r.framework, r.handler_name AS handlerName,
2757
+ r.line, r.protocol, r.operation, r.topic, r.queue, r.service,
2758
+ r.external_bundle_id AS externalBundleId,
2759
+ eb.external_project AS externalProject
2760
+ FROM routes r
2761
+ JOIN external_bundles eb ON eb.id = r.external_bundle_id
2762
+ WHERE ${where.join(' AND ')}
2763
+ ORDER BY r.path, r.method
2764
+ LIMIT ?
2765
+ `).all(...args) as Row[];
2766
+ return rows.map(r => ({
2767
+ id: toNum(r.id),
2768
+ method: toStr(r.method),
2769
+ path: toStr(r.path),
2770
+ framework: toStr(r.framework),
2771
+ handlerName: toNullStr(r.handlerName),
2772
+ line: toNum(r.line),
2773
+ protocol: toNullStr(r.protocol),
2774
+ operation: toNullStr(r.operation),
2775
+ topic: toNullStr(r.topic),
2776
+ queue: toNullStr(r.queue),
2777
+ service: toNullStr(r.service),
2778
+ externalBundleId: toNum(r.externalBundleId),
2779
+ externalProject: toNullStr(r.externalProject),
2780
+ }));
2781
+ } catch { return []; }
2782
+ }
2783
+
2784
+ // ── External dependencies ───────────────────────────────────────────────────
2785
+
2786
+ listExternalDeps(options: { ecosystem?: string; nameSubstr?: string; limit?: number } = {}): ExternalDepRow[] {
2787
+ if (!this.hasV4Tables) return [];
2788
+ const where: string[] = [];
2789
+ const args: Array<string | number | null> = [];
2790
+ if (options.ecosystem) { where.push('ecosystem = ?'); args.push(options.ecosystem); }
2791
+ if (options.nameSubstr) { where.push('name LIKE ?'); args.push(`%${options.nameSubstr}%`); }
2792
+ const limit = options.limit ?? 500;
2793
+ args.push(limit);
2794
+ const sql = `
2795
+ SELECT id, ecosystem, name, version_range AS versionRange,
2796
+ manifest_path AS manifestPath, is_dev AS isDev
2797
+ FROM external_dependencies
2798
+ ${where.length ? 'WHERE ' + where.join(' AND ') : ''}
2799
+ ORDER BY ecosystem, name
2800
+ LIMIT ?
2801
+ `;
2802
+ const rows = this.db.prepare(sql).all(...args) as Row[];
2803
+ return rows.map(r => ({
2804
+ id: toNum(r.id),
2805
+ ecosystem: toStr(r.ecosystem),
2806
+ name: toStr(r.name),
2807
+ versionRange: toNullStr(r.versionRange),
2808
+ manifestPath: toStr(r.manifestPath),
2809
+ isDev: toNum(r.isDev),
2810
+ }));
2811
+ }
2812
+
2813
+ countExternalDeps(): number {
2814
+ if (!this.hasV4Tables) return 0;
2815
+ const row = this.db.prepare('SELECT COUNT(*) AS c FROM external_dependencies').get() as Row;
2816
+ return toNum(row.c);
2817
+ }
2818
+
2819
+ // ── Config keys ─────────────────────────────────────────────────────────────
2820
+
2821
+ listConfigKeys(options: { key?: string; source?: string; limit?: number } = {}): ConfigKeyRow[] {
2822
+ if (!this.hasV4Tables) return [];
2823
+ const where: string[] = [];
2824
+ const args: Array<string | number | null> = [];
2825
+ if (options.key) { where.push('c.key LIKE ?'); args.push(`%${options.key}%`); }
2826
+ if (options.source) { where.push('c.source = ?'); args.push(options.source); }
2827
+ const limit = options.limit ?? 200;
2828
+ args.push(limit);
2829
+ const sql = `
2830
+ SELECT c.id, c.key, c.source, f.path AS filePath,
2831
+ c.symbol_id AS symbolId,
2832
+ s.qualified_name AS symbolName,
2833
+ c.line
2834
+ FROM config_keys c
2835
+ JOIN files f ON f.id = c.file_id
2836
+ LEFT JOIN symbols s ON s.id = c.symbol_id
2837
+ ${where.length ? 'WHERE ' + where.join(' AND ') : ''}
2838
+ ORDER BY c.key, f.path
2839
+ LIMIT ?
2840
+ `;
2841
+ const rows = this.db.prepare(sql).all(...args) as Row[];
2842
+ return rows.map(r => ({
2843
+ id: toNum(r.id),
2844
+ key: toStr(r.key),
2845
+ source: toStr(r.source),
2846
+ filePath: toStr(r.filePath),
2847
+ symbolId: toNullNum(r.symbolId),
2848
+ symbolName: toNullStr(r.symbolName),
2849
+ line: toNum(r.line),
2850
+ }));
2851
+ }
2852
+
2853
+ countConfigKeys(): number {
2854
+ if (!this.hasV4Tables) return 0;
2855
+ const row = this.db.prepare('SELECT COUNT(*) AS c FROM config_keys').get() as Row;
2856
+ return toNum(row.c);
2857
+ }
2858
+
2859
+ // ── File churn ──────────────────────────────────────────────────────────────
2860
+
2861
+ upsertFileChurn(
2862
+ fileId: number, commitCount: number, lastCommitSha: string | null,
2863
+ lastCommitAt: number | null, topAuthor: string | null, secondAuthor: string | null,
2864
+ ): void {
2865
+ this.db.prepare(`
2866
+ INSERT INTO file_churn (file_id, commit_count, last_commit_sha, last_commit_at, top_author, second_author, collected_at)
2867
+ VALUES (?, ?, ?, ?, ?, ?, ?)
2868
+ ON CONFLICT(file_id) DO UPDATE SET
2869
+ commit_count = excluded.commit_count,
2870
+ last_commit_sha = excluded.last_commit_sha,
2871
+ last_commit_at = excluded.last_commit_at,
2872
+ top_author = excluded.top_author,
2873
+ second_author = excluded.second_author,
2874
+ collected_at = excluded.collected_at
2875
+ `).run(fileId, commitCount, lastCommitSha, lastCommitAt, topAuthor, secondAuthor, Date.now());
2876
+ }
2877
+
2878
+ getFileChurn(filePath: string): FileChurnRow | null {
2879
+ if (!this.hasV4Tables) return null;
2880
+ const row = this.db.prepare(`
2881
+ SELECT c.file_id AS fileId, f.path AS filePath,
2882
+ c.commit_count AS commitCount,
2883
+ c.last_commit_sha AS lastCommitSha,
2884
+ c.last_commit_at AS lastCommitAt,
2885
+ c.top_author AS topAuthor,
2886
+ c.second_author AS secondAuthor
2887
+ FROM file_churn c JOIN files f ON f.id = c.file_id
2888
+ WHERE f.path = ? OR f.rel_path = ?
2889
+ `).get(filePath, filePath) as Row | undefined;
2890
+ if (!row) return null;
2891
+ return {
2892
+ fileId: toNum(row.fileId),
2893
+ filePath: toStr(row.filePath),
2894
+ commitCount: toNum(row.commitCount),
2895
+ lastCommitSha: toNullStr(row.lastCommitSha),
2896
+ lastCommitAt: toNullNum(row.lastCommitAt),
2897
+ topAuthor: toNullStr(row.topAuthor),
2898
+ secondAuthor: toNullStr(row.secondAuthor),
2899
+ };
2900
+ }
2901
+
2902
+ topChurnedFiles(limit = 20): FileChurnRow[] {
2903
+ if (!this.hasV4Tables) return [];
2904
+ const rows = this.db.prepare(`
2905
+ SELECT c.file_id AS fileId, f.path AS filePath,
2906
+ c.commit_count AS commitCount,
2907
+ c.last_commit_sha AS lastCommitSha,
2908
+ c.last_commit_at AS lastCommitAt,
2909
+ c.top_author AS topAuthor,
2910
+ c.second_author AS secondAuthor
2911
+ FROM file_churn c JOIN files f ON f.id = c.file_id
2912
+ ORDER BY c.commit_count DESC
2913
+ LIMIT ?
2914
+ `).all(limit) as Row[];
2915
+ return rows.map(r => ({
2916
+ fileId: toNum(r.fileId),
2917
+ filePath: toStr(r.filePath),
2918
+ commitCount: toNum(r.commitCount),
2919
+ lastCommitSha: toNullStr(r.lastCommitSha),
2920
+ lastCommitAt: toNullNum(r.lastCommitAt),
2921
+ topAuthor: toNullStr(r.topAuthor),
2922
+ secondAuthor: toNullStr(r.secondAuthor),
2923
+ }));
2924
+ }
2925
+
2926
+ // ── Symbol history ──────────────────────────────────────────────────────────
2927
+
2928
+ insertSymbolHistory(
2929
+ symbolId: number, symbolKey: string, commitSha: string,
2930
+ authorName: string | null, authorEmail: string | null,
2931
+ committedAt: number, message: string | null,
2932
+ linesAdded: number, linesRemoved: number,
2933
+ prNumber: number | null, prUrl: string | null,
2934
+ matchStrategy: string, confidence: number,
2935
+ ): void {
2936
+ this.db.prepare(`
2937
+ INSERT OR IGNORE INTO symbol_history
2938
+ (symbol_id, symbol_key, commit_sha, author_name, author_email, committed_at, message,
2939
+ lines_added, lines_removed, pr_number, pr_url, match_strategy, confidence)
2940
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2941
+ `).run(symbolId, symbolKey, commitSha, authorName, authorEmail, committedAt, message,
2942
+ linesAdded, linesRemoved, prNumber, prUrl, matchStrategy, confidence);
2943
+ }
2944
+
2945
+ getSymbolHistory(symbolId: number, options: { limit?: number; since?: number } = {}): SymbolHistoryRow[] {
2946
+ if (!this.hasV4Tables) return [];
2947
+ const limit = Math.max(1, options.limit ?? 50);
2948
+ const since = options.since;
2949
+ const where = since != null ? 'AND committed_at >= ?' : '';
2950
+ const args: Array<string | number | null> = [symbolId];
2951
+ if (since != null) args.push(since);
2952
+ args.push(limit);
2953
+ const rows = this.db.prepare(`
2954
+ SELECT id, symbol_id AS symbolId, symbol_key AS symbolKey, commit_sha AS commitSha,
2955
+ author_name AS authorName, author_email AS authorEmail,
2956
+ committed_at AS committedAt, message,
2957
+ lines_added AS linesAdded, lines_removed AS linesRemoved,
2958
+ pr_number AS prNumber, pr_url AS prUrl,
2959
+ match_strategy AS matchStrategy, confidence
2960
+ FROM symbol_history
2961
+ WHERE symbol_id = ? ${where}
2962
+ ORDER BY committed_at DESC
2963
+ LIMIT ?
2964
+ `).all(...args) as Row[];
2965
+ return rows.map(r => ({
2966
+ id: toNum(r.id),
2967
+ symbolId: toNum(r.symbolId),
2968
+ symbolKey: toStr(r.symbolKey),
2969
+ commitSha: toStr(r.commitSha),
2970
+ authorName: toNullStr(r.authorName),
2971
+ authorEmail: toNullStr(r.authorEmail),
2972
+ committedAt: toNum(r.committedAt),
2973
+ message: toNullStr(r.message),
2974
+ linesAdded: toNum(r.linesAdded),
2975
+ linesRemoved: toNum(r.linesRemoved),
2976
+ prNumber: toNullNum(r.prNumber),
2977
+ prUrl: toNullStr(r.prUrl),
2978
+ matchStrategy: toStr(r.matchStrategy),
2979
+ confidence: Number(r.confidence),
2980
+ }));
2981
+ }
2982
+
2983
+ /** Total history count for a symbol — for "showing N of M commits" headers. */
2984
+ countSymbolHistory(symbolId: number): number {
2985
+ if (!this.hasV4Tables) return 0;
2986
+ const row = this.db.prepare('SELECT COUNT(*) AS c FROM symbol_history WHERE symbol_id = ?').get(symbolId) as Row;
2987
+ return toNum(row.c);
2988
+ }
2989
+
2990
+ getGitIndexState(): {
2991
+ repoRoot: string;
2992
+ lastHeadSha: string | null;
2993
+ lastProcessedAt: number;
2994
+ remoteUrl: string | null;
2995
+ algorithmVersion: number;
2996
+ lastHistoryHeadSha: string | null;
2997
+ lastHistoryAt: number | null;
2998
+ } | null {
2999
+ if (!this.hasV4Tables) return null;
3000
+ const row = this.db.prepare(
3001
+ `SELECT repo_root AS repoRoot, last_head_sha AS lastHeadSha,
3002
+ last_processed_at AS lastProcessedAt, remote_url AS remoteUrl,
3003
+ algorithm_version AS algorithmVersion,
3004
+ last_history_head_sha AS lastHistoryHeadSha,
3005
+ last_history_at AS lastHistoryAt
3006
+ FROM git_index_state WHERE id = 1`
3007
+ ).get() as Row | undefined;
3008
+ if (!row) return null;
3009
+ return {
3010
+ repoRoot: toStr(row.repoRoot),
3011
+ lastHeadSha: toNullStr(row.lastHeadSha),
3012
+ lastProcessedAt: toNum(row.lastProcessedAt),
3013
+ remoteUrl: toNullStr(row.remoteUrl),
3014
+ algorithmVersion: toNum(row.algorithmVersion),
3015
+ lastHistoryHeadSha: toNullStr(row.lastHistoryHeadSha),
3016
+ lastHistoryAt: row.lastHistoryAt == null ? null : toNum(row.lastHistoryAt),
3017
+ };
3018
+ }
3019
+
3020
+ /**
3021
+ * Generic "the indexer has seen this HEAD" stamp — used by churn and any
3022
+ * other read-only git pass. Does NOT touch the history-specific marker.
3023
+ * symbol-history has its own setHistoryHeadSha() so the two passes can't
3024
+ * mask each other.
3025
+ */
3026
+ setGitIndexState(
3027
+ repoRoot: string, lastHeadSha: string | null, remoteUrl: string | null, algorithmVersion = 1,
3028
+ ): void {
3029
+ this.db.prepare(`
3030
+ INSERT INTO git_index_state (id, repo_root, last_head_sha, last_processed_at, remote_url, algorithm_version)
3031
+ VALUES (1, ?, ?, ?, ?, ?)
3032
+ ON CONFLICT(id) DO UPDATE SET
3033
+ repo_root = excluded.repo_root,
3034
+ last_head_sha = excluded.last_head_sha,
3035
+ last_processed_at = excluded.last_processed_at,
3036
+ remote_url = excluded.remote_url,
3037
+ algorithm_version = excluded.algorithm_version
3038
+ `).run(repoRoot, lastHeadSha, Date.now(), remoteUrl, algorithmVersion);
3039
+ }
3040
+
3041
+ /**
3042
+ * Stamp the HEAD that symbol-history was last built against. Independent of
3043
+ * setGitIndexState() so running file-level churn never makes a subsequent
3044
+ * buildSymbolHistory() skip.
3045
+ */
3046
+ setHistoryHeadSha(repoRoot: string, lastHistoryHeadSha: string | null, remoteUrl: string | null): void {
3047
+ // Upsert: insert a fresh row if churn hasn't run yet; otherwise just
3048
+ // update the history columns. repo_root + remote_url are kept in sync
3049
+ // either way so the row stays self-describing.
3050
+ this.db.prepare(`
3051
+ INSERT INTO git_index_state
3052
+ (id, repo_root, last_processed_at, remote_url, algorithm_version,
3053
+ last_history_head_sha, last_history_at)
3054
+ VALUES (1, ?, ?, ?, 1, ?, ?)
3055
+ ON CONFLICT(id) DO UPDATE SET
3056
+ repo_root = excluded.repo_root,
3057
+ remote_url = COALESCE(excluded.remote_url, git_index_state.remote_url),
3058
+ last_history_head_sha = excluded.last_history_head_sha,
3059
+ last_history_at = excluded.last_history_at
3060
+ `).run(repoRoot, Date.now(), remoteUrl, lastHistoryHeadSha, Date.now());
3061
+ }
3062
+
3063
+ /** All symbols matching a symbol_key — used by `seer_history` to find the
3064
+ * current id for a key that came from the indexed graph. */
3065
+ findSymbolsByKey(symbolKey: string): SymbolRow[] {
3066
+ const rows = this.db.prepare(`
3067
+ SELECT ${symbolSelectCols(this.hasComplexityColumns, this.hasSymbolRoleColumn)}
3068
+ FROM symbols s JOIN files f ON f.id = s.file_id
3069
+ WHERE s.symbol_key = ?
3070
+ ORDER BY s.pagerank DESC
3071
+ `).all(symbolKey) as Row[];
3072
+ return rows.map(toSymbolRow);
3073
+ }
3074
+
3075
+ /** Iterate over (id, file_id, line_start, line_end, symbol_key) — used by
3076
+ * the symbol-history indexer to map historical line ranges to current ids. */
3077
+ listSymbolsForHistoryIndex(): Array<{ id: number; fileId: number; filePath: string; relPath: string; lineStart: number; lineEnd: number; symbolKey: string }> {
3078
+ const rows = this.db.prepare(`
3079
+ SELECT s.id, s.file_id AS fileId, f.path AS filePath, f.rel_path AS relPath,
3080
+ s.line_start AS lineStart, s.line_end AS lineEnd, s.symbol_key AS symbolKey
3081
+ FROM symbols s JOIN files f ON f.id = s.file_id
3082
+ WHERE s.symbol_key IS NOT NULL
3083
+ AND s.kind IN ('function','method','constructor','class')
3084
+ `).all() as Row[];
3085
+ return rows.map(r => ({
3086
+ id: toNum(r.id), fileId: toNum(r.fileId),
3087
+ filePath: toStr(r.filePath), relPath: toStr(r.relPath),
3088
+ lineStart: toNum(r.lineStart), lineEnd: toNum(r.lineEnd),
3089
+ symbolKey: toStr(r.symbolKey),
3090
+ }));
3091
+ }
3092
+
3093
+ // ── PageRank helpers ────────────────────────────────────────────────────────
3094
+
3095
+ getAllEdges(): Array<{ from: number; to: number }> {
3096
+ const rows = this.db.prepare(`
3097
+ SELECT e.from_id AS \`from\`, e.to_id AS \`to\`
3098
+ FROM edges e
3099
+ JOIN symbols sf ON sf.id = e.from_id AND sf.is_rankable = 1
3100
+ JOIN symbols st ON st.id = e.to_id AND st.is_rankable = 1
3101
+ WHERE e.to_id IS NOT NULL
3102
+ AND e.kind = 'call'
3103
+ `).all() as Row[];
3104
+
3105
+ return rows.map(r => ({ from: toNum(r.from), to: toNum(r.to) }));
3106
+ }
3107
+
3108
+ getAllSymbolIds(): number[] {
3109
+ const rows = this.db.prepare(
3110
+ 'SELECT id FROM symbols WHERE is_rankable = 1',
3111
+ ).all() as Row[];
3112
+ return rows.map(r => toNum(r.id));
3113
+ }
3114
+
3115
+ updatePageRanks(ranks: Map<number, number>): void {
3116
+ const stmt = this.db.prepare('UPDATE symbols SET pagerank = ? WHERE id = ?');
3117
+ this.db.exec('BEGIN');
3118
+ try {
3119
+ this.db.prepare('UPDATE symbols SET pagerank = 0 WHERE is_rankable = 0 AND pagerank != 0').run();
3120
+ for (const [id, rank] of ranks) {
3121
+ stmt.run(rank, id);
3122
+ }
3123
+ this.db.exec('COMMIT');
3124
+ } catch (err) {
3125
+ this.db.exec('ROLLBACK');
3126
+ throw err;
3127
+ }
3128
+ }
3129
+
3130
+ // ── Graph traversal ─────────────────────────────────────────────────────────
3131
+
3132
+ /**
3133
+ * Bounded breadth-first search over the call graph. Returns one shortest
3134
+ * path from `fromId` to `toId` (by edge count), or null if none found.
3135
+ * The search expands at most `maxDepth` hops and at most `maxNodes` nodes
3136
+ * visited overall — without those caps a cycle in the graph would explode.
3137
+ */
3138
+ tracePath(fromId: number, toId: number, maxDepth = 6, maxNodes = 20_000): Array<{ id: number; name: string; qualifiedName: string | null; kind: string; filePath: string }> | null {
3139
+ if (fromId === toId) {
3140
+ const row = this.getSymbolById(fromId);
3141
+ return row ? [{ id: row.id, name: row.name, qualifiedName: row.qualifiedName, kind: row.kind, filePath: row.filePath }] : null;
3142
+ }
3143
+ const adjStmt = this.db.prepare("SELECT DISTINCT to_id FROM edges WHERE from_id = ? AND to_id IS NOT NULL AND kind = 'call'");
3144
+ const parent = new Map<number, number>();
3145
+ parent.set(fromId, -1);
3146
+ const queue: Array<{ id: number; depth: number }> = [{ id: fromId, depth: 0 }];
3147
+ let visited = 0;
3148
+ while (queue.length > 0) {
3149
+ const { id, depth } = queue.shift()!;
3150
+ visited++;
3151
+ if (visited > maxNodes) return null;
3152
+ if (depth >= maxDepth) continue;
3153
+ const rows = adjStmt.all(id) as Row[];
3154
+ for (const r of rows) {
3155
+ const next = toNum(r.to_id);
3156
+ if (parent.has(next)) continue;
3157
+ parent.set(next, id);
3158
+ if (next === toId) {
3159
+ // Reconstruct
3160
+ const path: number[] = [];
3161
+ let cur: number = next;
3162
+ while (cur !== -1) { path.push(cur); cur = parent.get(cur)!; }
3163
+ path.reverse();
3164
+ return path.map(pid => {
3165
+ const s = this.getSymbolById(pid);
3166
+ return s ? { id: s.id, name: s.name, qualifiedName: s.qualifiedName, kind: s.kind, filePath: s.filePath }
3167
+ : { id: pid, name: '', qualifiedName: null, kind: '', filePath: '' };
3168
+ });
3169
+ }
3170
+ queue.push({ id: next, depth: depth + 1 });
3171
+ }
3172
+ }
3173
+ return null;
3174
+ }
3175
+
3176
+ /** Reverse BFS from a symbol — for "everything that transitively calls X". */
3177
+ reverseReachable(toId: number, maxDepth = 4, maxNodes = 20_000): number[] {
3178
+ const stmt = this.db.prepare("SELECT DISTINCT from_id FROM edges WHERE to_id = ? AND kind = 'call'");
3179
+ const seen = new Set<number>([toId]);
3180
+ const queue: Array<{ id: number; depth: number }> = [{ id: toId, depth: 0 }];
3181
+ while (queue.length > 0) {
3182
+ const { id, depth } = queue.shift()!;
3183
+ if (seen.size > maxNodes) break;
3184
+ if (depth >= maxDepth) continue;
3185
+ const rows = stmt.all(id) as Row[];
3186
+ for (const r of rows) {
3187
+ const next = toNum(r.from_id);
3188
+ if (seen.has(next)) continue;
3189
+ seen.add(next);
3190
+ queue.push({ id: next, depth: depth + 1 });
3191
+ }
3192
+ }
3193
+ seen.delete(toId);
3194
+ return Array.from(seen);
3195
+ }
3196
+
3197
+ /**
3198
+ * Bounded reverse-reachable callers WITH depth, for risk/context callers.
3199
+ * Same termination semantics as reverseReachable() but returns the depth
3200
+ * at which each id was first discovered (1-indexed; direct callers = 1).
3201
+ */
3202
+ reverseReachableWithDepth(
3203
+ toId: number,
3204
+ maxDepth = 4,
3205
+ maxNodes = 20_000,
3206
+ ): Array<{ id: number; depth: number }> {
3207
+ const stmt = this.db.prepare(
3208
+ "SELECT DISTINCT from_id FROM edges WHERE to_id = ? AND kind = 'call'",
3209
+ );
3210
+ const seen = new Map<number, number>([[toId, 0]]);
3211
+ const queue: Array<{ id: number; depth: number }> = [{ id: toId, depth: 0 }];
3212
+ while (queue.length > 0) {
3213
+ const { id, depth } = queue.shift()!;
3214
+ if (seen.size > maxNodes) break;
3215
+ if (depth >= maxDepth) continue;
3216
+ const rows = stmt.all(id) as Row[];
3217
+ for (const r of rows) {
3218
+ const next = toNum(r.from_id);
3219
+ if (seen.has(next)) continue;
3220
+ seen.set(next, depth + 1);
3221
+ queue.push({ id: next, depth: depth + 1 });
3222
+ }
3223
+ }
3224
+ seen.delete(toId);
3225
+ return Array.from(seen.entries()).map(([id, depth]) => ({ id, depth }));
3226
+ }
3227
+
3228
+ /**
3229
+ * Bounded forward-reachable callees with depth — for callee blast-radius
3230
+ * questions and behavioral indirect-coverage. Mirror of
3231
+ * reverseReachableWithDepth().
3232
+ */
3233
+ forwardReachableWithDepth(
3234
+ fromId: number,
3235
+ maxDepth = 4,
3236
+ maxNodes = 20_000,
3237
+ ): Array<{ id: number; depth: number }> {
3238
+ const stmt = this.db.prepare(
3239
+ "SELECT DISTINCT to_id FROM edges WHERE from_id = ? AND to_id IS NOT NULL AND kind = 'call'",
3240
+ );
3241
+ const seen = new Map<number, number>([[fromId, 0]]);
3242
+ const queue: Array<{ id: number; depth: number }> = [{ id: fromId, depth: 0 }];
3243
+ while (queue.length > 0) {
3244
+ const { id, depth } = queue.shift()!;
3245
+ if (seen.size > maxNodes) break;
3246
+ if (depth >= maxDepth) continue;
3247
+ const rows = stmt.all(id) as Row[];
3248
+ for (const r of rows) {
3249
+ const next = toNum(r.to_id);
3250
+ if (seen.has(next)) continue;
3251
+ seen.set(next, depth + 1);
3252
+ queue.push({ id: next, depth: depth + 1 });
3253
+ }
3254
+ }
3255
+ seen.delete(fromId);
3256
+ return Array.from(seen.entries()).map(([id, depth]) => ({ id, depth }));
3257
+ }
3258
+
3259
+ /**
3260
+ * Bounded BFS over the file-import graph. Used by
3261
+ * seer_trace_file_dependencies — returns each reachable file with the BFS
3262
+ * depth at which we first saw it.
3263
+ */
3264
+ fileImportClosure(
3265
+ fileId: number,
3266
+ maxDepth = 4,
3267
+ maxNodes = 5_000,
3268
+ ): Array<{ id: number; depth: number; relPath: string; language: string }> {
3269
+ const stmt = this.db.prepare(
3270
+ 'SELECT DISTINCT resolved_file_id FROM file_imports WHERE from_file_id = ? AND resolved_file_id IS NOT NULL',
3271
+ );
3272
+ const seen = new Map<number, number>([[fileId, 0]]);
3273
+ const queue: Array<{ id: number; depth: number }> = [{ id: fileId, depth: 0 }];
3274
+ while (queue.length > 0) {
3275
+ const { id, depth } = queue.shift()!;
3276
+ if (seen.size > maxNodes) break;
3277
+ if (depth >= maxDepth) continue;
3278
+ const rows = stmt.all(id) as Row[];
3279
+ for (const r of rows) {
3280
+ const next = toNum(r.resolved_file_id);
3281
+ if (seen.has(next)) continue;
3282
+ seen.set(next, depth + 1);
3283
+ queue.push({ id: next, depth: depth + 1 });
3284
+ }
3285
+ }
3286
+ seen.delete(fileId);
3287
+ if (seen.size === 0) return [];
3288
+ const ids = Array.from(seen.keys());
3289
+ const placeholders = ids.map(() => '?').join(',');
3290
+ const rows = this.db.prepare(
3291
+ `SELECT id, rel_path AS relPath, language FROM files WHERE id IN (${placeholders})`,
3292
+ ).all(...ids) as Row[];
3293
+ const meta = new Map(rows.map(r => [
3294
+ toNum(r.id),
3295
+ { relPath: toStr(r.relPath), language: toStr(r.language) },
3296
+ ]));
3297
+ return ids.map(id => {
3298
+ const m = meta.get(id);
3299
+ return {
3300
+ id,
3301
+ depth: seen.get(id)!,
3302
+ relPath: m?.relPath ?? '',
3303
+ language: m?.language ?? '',
3304
+ };
3305
+ });
3306
+ }
3307
+
3308
+ // ── Track-E: file/module aggregate graph helpers ────────────────────────────
3309
+
3310
+ /**
3311
+ * All cross-file call edges as (fromFile, toFile, weight) triples.
3312
+ * Used by the Louvain clusterer; only resolved 'call' edges count.
3313
+ */
3314
+ fileCallEdgeWeights(): Array<{ from: number; to: number; weight: number }> {
3315
+ const rows = this.db.prepare(`
3316
+ SELECT sf.file_id AS fromFile, st.file_id AS toFile, COUNT(*) AS w
3317
+ FROM edges e
3318
+ JOIN symbols sf ON sf.id = e.from_id
3319
+ JOIN symbols st ON st.id = e.to_id
3320
+ WHERE e.kind = 'call' AND e.to_id IS NOT NULL
3321
+ AND sf.file_id <> st.file_id
3322
+ GROUP BY sf.file_id, st.file_id
3323
+ `).all() as Row[];
3324
+ return rows.map(r => ({
3325
+ from: toNum(r.fromFile),
3326
+ to: toNum(r.toFile),
3327
+ weight: toNum(r.w),
3328
+ }));
3329
+ }
3330
+
3331
+ /** Resolved cross-file import edges as (fromFile, toFile, weight). */
3332
+ fileImportEdgeWeights(): Array<{ from: number; to: number; weight: number }> {
3333
+ const rows = this.db.prepare(`
3334
+ SELECT from_file_id AS fromFile, resolved_file_id AS toFile, COUNT(*) AS w
3335
+ FROM file_imports
3336
+ WHERE resolved_file_id IS NOT NULL
3337
+ AND from_file_id <> resolved_file_id
3338
+ GROUP BY from_file_id, resolved_file_id
3339
+ `).all() as Row[];
3340
+ return rows.map(r => ({
3341
+ from: toNum(r.fromFile),
3342
+ to: toNum(r.toFile),
3343
+ weight: toNum(r.w),
3344
+ }));
3345
+ }
3346
+
3347
+ /** Synthesized test → production edges, file-aggregated. */
3348
+ fileTestEdgeWeights(): Array<{ from: number; to: number; weight: number }> {
3349
+ const rows = this.db.prepare(`
3350
+ SELECT sf.file_id AS fromFile, st.file_id AS toFile, COUNT(*) AS w
3351
+ FROM edges e
3352
+ JOIN symbols sf ON sf.id = e.from_id
3353
+ JOIN symbols st ON st.id = e.to_id
3354
+ WHERE e.kind = 'tests' AND e.to_id IS NOT NULL
3355
+ AND sf.file_id <> st.file_id
3356
+ GROUP BY sf.file_id, st.file_id
3357
+ `).all() as Row[];
3358
+ return rows.map(r => ({
3359
+ from: toNum(r.fromFile),
3360
+ to: toNum(r.toFile),
3361
+ weight: toNum(r.w),
3362
+ }));
3363
+ }
3364
+
3365
+ /**
3366
+ * v8 Track-G — service-link file-aggregated edges. Each link contributes one
3367
+ * cross-file edge from the call-site file (service_calls.file_id) to the
3368
+ * handler-symbol's file. Used by the module clusterer to surface
3369
+ * client→handler dependencies as architecturally important.
3370
+ */
3371
+ fileServiceLinkEdgeWeights(): Array<{ from: number; to: number; weight: number }> {
3372
+ try {
3373
+ const rows = this.db.prepare(`
3374
+ SELECT sc.file_id AS fromFile, hs.file_id AS toFile, COUNT(*) AS w
3375
+ FROM service_links sl
3376
+ JOIN service_calls sc ON sc.id = sl.call_id
3377
+ LEFT JOIN symbols hs ON hs.id = sl.handler_symbol_id
3378
+ WHERE hs.file_id IS NOT NULL
3379
+ AND sc.file_id <> hs.file_id
3380
+ GROUP BY sc.file_id, hs.file_id
3381
+ `).all() as Row[];
3382
+ return rows.map(r => ({
3383
+ from: toNum(r.fromFile),
3384
+ to: toNum(r.toFile),
3385
+ weight: toNum(r.w),
3386
+ }));
3387
+ } catch { return []; }
3388
+ }
3389
+
3390
+ /** All file ids + their language + rel path — feeds the clusterer. */
3391
+ listFileSummaries(): Array<{ id: number; relPath: string; language: string; role: string }> {
3392
+ const rows = this.db.prepare(
3393
+ 'SELECT id, rel_path AS relPath, language, role FROM files',
3394
+ ).all() as Row[];
3395
+ return rows.map(r => ({
3396
+ id: toNum(r.id), relPath: toStr(r.relPath),
3397
+ language: toStr(r.language), role: toStr(r.role),
3398
+ }));
3399
+ }
3400
+
3401
+ // ── Track-E: modules persistence ────────────────────────────────────────────
3402
+
3403
+ /**
3404
+ * Replace the modules / module_members / module_edges tables with the
3405
+ * provided clustering. Atomic — wrapped in a single transaction so a
3406
+ * partial write can't leave inconsistent membership.
3407
+ */
3408
+ replaceModules(
3409
+ modules: Array<{
3410
+ label: string;
3411
+ sizeFiles: number;
3412
+ sizeSymbols: number;
3413
+ primaryLanguage: string | null;
3414
+ cohesion: number;
3415
+ centrality: number;
3416
+ fileIds: number[];
3417
+ }>,
3418
+ edges: Array<{ fromIndex: number; toIndex: number; kind: string; weight: number }>,
3419
+ algorithm = 'louvain',
3420
+ ): void {
3421
+ if (!this.hasModuleTables) return;
3422
+ this.db.exec('BEGIN');
3423
+ try {
3424
+ this.db.exec('DELETE FROM module_edges');
3425
+ this.db.exec('DELETE FROM module_members');
3426
+ this.db.exec('DELETE FROM modules');
3427
+ const insModule = this.db.prepare(`
3428
+ INSERT INTO modules (label, size_files, size_symbols, primary_language, cohesion, centrality, computed_at, algorithm)
3429
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
3430
+ `);
3431
+ const insMember = this.db.prepare(
3432
+ 'INSERT INTO module_members (file_id, module_id) VALUES (?, ?)',
3433
+ );
3434
+ const insEdge = this.db.prepare(
3435
+ 'INSERT OR REPLACE INTO module_edges (from_module_id, to_module_id, kind, weight) VALUES (?, ?, ?, ?)',
3436
+ );
3437
+ const now = Date.now();
3438
+ const indexToId: number[] = [];
3439
+ for (const m of modules) {
3440
+ const res = insModule.run(
3441
+ m.label, m.sizeFiles, m.sizeSymbols, m.primaryLanguage,
3442
+ m.cohesion, m.centrality, now, algorithm,
3443
+ );
3444
+ const id = toNum(res.lastInsertRowid);
3445
+ indexToId.push(id);
3446
+ for (const fid of m.fileIds) insMember.run(fid, id);
3447
+ }
3448
+ for (const e of edges) {
3449
+ const f = indexToId[e.fromIndex];
3450
+ const t = indexToId[e.toIndex];
3451
+ if (f == null || t == null) continue;
3452
+ insEdge.run(f, t, e.kind, e.weight);
3453
+ }
3454
+ this.db.exec('COMMIT');
3455
+ } catch (err) {
3456
+ this.db.exec('ROLLBACK');
3457
+ throw err;
3458
+ }
3459
+ }
3460
+
3461
+ hasModulesData(): boolean {
3462
+ if (!this.hasModuleTables) return false;
3463
+ try {
3464
+ const row = this.db.prepare('SELECT COUNT(*) AS c FROM modules').get() as Row;
3465
+ return toNum(row.c) > 0;
3466
+ } catch { return false; }
3467
+ }
3468
+
3469
+ countModules(): number {
3470
+ if (!this.hasModuleTables) return 0;
3471
+ try {
3472
+ return toNum((this.db.prepare('SELECT COUNT(*) AS c FROM modules').get() as Row).c);
3473
+ } catch { return 0; }
3474
+ }
3475
+
3476
+ listModules(options: { limit?: number; sortBy?: 'centrality' | 'size' | 'label' } = {}): Array<{
3477
+ id: number; label: string; sizeFiles: number; sizeSymbols: number;
3478
+ primaryLanguage: string | null; cohesion: number; centrality: number;
3479
+ }> {
3480
+ if (!this.hasModuleTables) return [];
3481
+ const limit = options.limit ?? 100;
3482
+ const sortBy = options.sortBy ?? 'centrality';
3483
+ const order =
3484
+ sortBy === 'label' ? 'label ASC'
3485
+ : sortBy === 'size' ? 'size_files DESC, size_symbols DESC'
3486
+ : 'centrality DESC, size_files DESC';
3487
+ try {
3488
+ const rows = this.db.prepare(`
3489
+ SELECT id, label, size_files AS sizeFiles, size_symbols AS sizeSymbols,
3490
+ primary_language AS primaryLanguage, cohesion, centrality
3491
+ FROM modules
3492
+ ORDER BY ${order}
3493
+ LIMIT ?
3494
+ `).all(limit) as Row[];
3495
+ return rows.map(r => ({
3496
+ id: toNum(r.id),
3497
+ label: toStr(r.label),
3498
+ sizeFiles: toNum(r.sizeFiles),
3499
+ sizeSymbols: toNum(r.sizeSymbols),
3500
+ primaryLanguage: toNullStr(r.primaryLanguage),
3501
+ cohesion: Number(r.cohesion),
3502
+ centrality: Number(r.centrality),
3503
+ }));
3504
+ } catch { return []; }
3505
+ }
3506
+
3507
+ getModuleById(id: number): {
3508
+ id: number; label: string; sizeFiles: number; sizeSymbols: number;
3509
+ primaryLanguage: string | null; cohesion: number; centrality: number;
3510
+ } | null {
3511
+ if (!this.hasModuleTables) return null;
3512
+ try {
3513
+ const row = this.db.prepare(`
3514
+ SELECT id, label, size_files AS sizeFiles, size_symbols AS sizeSymbols,
3515
+ primary_language AS primaryLanguage, cohesion, centrality
3516
+ FROM modules WHERE id = ?
3517
+ `).get(id) as Row | undefined;
3518
+ if (!row) return null;
3519
+ return {
3520
+ id: toNum(row.id),
3521
+ label: toStr(row.label),
3522
+ sizeFiles: toNum(row.sizeFiles),
3523
+ sizeSymbols: toNum(row.sizeSymbols),
3524
+ primaryLanguage: toNullStr(row.primaryLanguage),
3525
+ cohesion: Number(row.cohesion),
3526
+ centrality: Number(row.centrality),
3527
+ };
3528
+ } catch { return null; }
3529
+ }
3530
+
3531
+ /** Module label → row. Used by CLI/MCP module lookups by name. */
3532
+ getModuleByLabel(label: string): {
3533
+ id: number; label: string; sizeFiles: number; sizeSymbols: number;
3534
+ primaryLanguage: string | null; cohesion: number; centrality: number;
3535
+ } | null {
3536
+ if (!this.hasModuleTables) return null;
3537
+ try {
3538
+ const row = this.db.prepare(`
3539
+ SELECT id, label, size_files AS sizeFiles, size_symbols AS sizeSymbols,
3540
+ primary_language AS primaryLanguage, cohesion, centrality
3541
+ FROM modules WHERE label = ?
3542
+ `).get(label) as Row | undefined;
3543
+ if (!row) return null;
3544
+ return {
3545
+ id: toNum(row.id),
3546
+ label: toStr(row.label),
3547
+ sizeFiles: toNum(row.sizeFiles),
3548
+ sizeSymbols: toNum(row.sizeSymbols),
3549
+ primaryLanguage: toNullStr(row.primaryLanguage),
3550
+ cohesion: Number(row.cohesion),
3551
+ centrality: Number(row.centrality),
3552
+ };
3553
+ } catch { return null; }
3554
+ }
3555
+
3556
+ /**
3557
+ * Files in a module, sorted by file path. Returns empty array if the
3558
+ * module id doesn't exist or modules haven't been built.
3559
+ */
3560
+ listModuleMembers(moduleId: number, limit = 1000): Array<{
3561
+ fileId: number; path: string; relPath: string; language: string; role: string;
3562
+ }> {
3563
+ if (!this.hasModuleTables) return [];
3564
+ try {
3565
+ const rows = this.db.prepare(`
3566
+ SELECT f.id AS fileId, f.path, f.rel_path AS relPath, f.language, f.role
3567
+ FROM module_members mm
3568
+ JOIN files f ON f.id = mm.file_id
3569
+ WHERE mm.module_id = ?
3570
+ ORDER BY f.rel_path
3571
+ LIMIT ?
3572
+ `).all(moduleId, limit) as Row[];
3573
+ return rows.map(r => ({
3574
+ fileId: toNum(r.fileId), path: toStr(r.path), relPath: toStr(r.relPath),
3575
+ language: toStr(r.language), role: toStr(r.role),
3576
+ }));
3577
+ } catch { return []; }
3578
+ }
3579
+
3580
+ /** Top symbols (by PageRank) inside a module. Useful for "what does this module own?" */
3581
+ listModuleTopSymbols(moduleId: number, limit = 20): SymbolRow[] {
3582
+ if (!this.hasModuleTables) return [];
3583
+ try {
3584
+ const rows = this.db.prepare(`
3585
+ SELECT ${symbolSelectCols(this.hasComplexityColumns, this.hasSymbolRoleColumn)}
3586
+ FROM symbols s
3587
+ JOIN files f ON f.id = s.file_id
3588
+ JOIN module_members mm ON mm.file_id = s.file_id
3589
+ WHERE mm.module_id = ? AND s.is_rankable = 1
3590
+ ORDER BY s.pagerank DESC
3591
+ LIMIT ?
3592
+ `).all(moduleId, limit) as Row[];
3593
+ return rows.map(toSymbolRow);
3594
+ } catch { return []; }
3595
+ }
3596
+
3597
+ /** Module containing a file id, or null when the file has no membership row. */
3598
+ moduleForFile(fileId: number): { id: number; label: string } | null {
3599
+ if (!this.hasModuleTables) return null;
3600
+ try {
3601
+ const row = this.db.prepare(`
3602
+ SELECT m.id, m.label
3603
+ FROM module_members mm JOIN modules m ON m.id = mm.module_id
3604
+ WHERE mm.file_id = ?
3605
+ `).get(fileId) as Row | undefined;
3606
+ if (!row) return null;
3607
+ return { id: toNum(row.id), label: toStr(row.label) };
3608
+ } catch { return null; }
3609
+ }
3610
+
3611
+ /**
3612
+ * Cross-module dependency edges. Direction is configurable:
3613
+ * - 'out' (default) → modules this one depends on (from = moduleId)
3614
+ * - 'in' → modules that depend on this one (to = moduleId)
3615
+ * Aggregates across all edge kinds; the kind is preserved per row.
3616
+ */
3617
+ moduleDependencies(
3618
+ moduleId: number,
3619
+ options: { direction?: 'in' | 'out'; limit?: number } = {},
3620
+ ): Array<{
3621
+ moduleId: number; label: string; kind: string; weight: number;
3622
+ }> {
3623
+ if (!this.hasModuleTables) return [];
3624
+ const direction = options.direction ?? 'out';
3625
+ const limit = options.limit ?? 100;
3626
+ const sideThis = direction === 'out' ? 'from_module_id' : 'to_module_id';
3627
+ const sideOther = direction === 'out' ? 'to_module_id' : 'from_module_id';
3628
+ try {
3629
+ const rows = this.db.prepare(`
3630
+ SELECT m.id AS moduleId, m.label, me.kind, me.weight
3631
+ FROM module_edges me JOIN modules m ON m.id = me.${sideOther}
3632
+ WHERE me.${sideThis} = ?
3633
+ ORDER BY me.weight DESC
3634
+ LIMIT ?
3635
+ `).all(moduleId, limit) as Row[];
3636
+ return rows.map(r => ({
3637
+ moduleId: toNum(r.moduleId),
3638
+ label: toStr(r.label),
3639
+ kind: toStr(r.kind),
3640
+ weight: toNum(r.weight),
3641
+ }));
3642
+ } catch { return []; }
3643
+ }
3644
+
3645
+ // ── Track-E: behavioral / risk helpers ──────────────────────────────────────
3646
+
3647
+ /**
3648
+ * Raw 'tests' edges into a specific symbol id — id-scoped so short-name
3649
+ * siblings (`Alpha.run` / `Beta.run`) don't share a behavioral contract.
3650
+ * Returns the test-side caller info (name, file, line) so the ranker can
3651
+ * compute path-convention and naming-convention signals without
3652
+ * re-fetching.
3653
+ *
3654
+ * The id-based filter is correct because `synthesizeTestEdges()` now
3655
+ * preserves the source call edge's resolved `to_id` verbatim instead of
3656
+ * re-resolving via `WHERE name = edges.to_name LIMIT 1` (which collapsed
3657
+ * same-short-name symbols).
3658
+ */
3659
+ directTestEdgesForId(symbolId: number, limit = 200): Array<{
3660
+ callerId: number; callerName: string; callerQualifiedName: string | null;
3661
+ callerKind: string; callerFile: string; callerLineStart: number; callerLineEnd: number;
3662
+ edgeLine: number; assertionCount: number;
3663
+ }> {
3664
+ if (!this.hasV4Tables) return [];
3665
+ try {
3666
+ const rows = this.db.prepare(`
3667
+ SELECT
3668
+ s.id AS callerId,
3669
+ s.name AS callerName,
3670
+ s.qualified_name AS callerQualifiedName,
3671
+ s.kind AS callerKind,
3672
+ f.path AS callerFile,
3673
+ s.line_start AS callerLineStart,
3674
+ s.line_end AS callerLineEnd,
3675
+ e.line AS edgeLine
3676
+ FROM edges e
3677
+ JOIN symbols s ON s.id = e.from_id
3678
+ JOIN files f ON f.id = s.file_id
3679
+ WHERE e.to_id = ? AND e.kind = 'tests'
3680
+ ORDER BY f.path, e.line
3681
+ LIMIT ?
3682
+ `).all(symbolId, limit) as Row[];
3683
+ return rows.map(r => ({
3684
+ callerId: toNum(r.callerId),
3685
+ callerName: toStr(r.callerName),
3686
+ callerQualifiedName: toNullStr(r.callerQualifiedName),
3687
+ callerKind: toStr(r.callerKind),
3688
+ callerFile: toStr(r.callerFile),
3689
+ callerLineStart: toNum(r.callerLineStart),
3690
+ callerLineEnd: toNum(r.callerLineEnd),
3691
+ edgeLine: toNum(r.edgeLine),
3692
+ // Computed in JS — needs the file contents.
3693
+ assertionCount: 0,
3694
+ }));
3695
+ } catch { return []; }
3696
+ }
3697
+
3698
+ /**
3699
+ * Count how many distinct routes have this symbol as their resolved handler.
3700
+ * Used by seer_risk for the "route exposure" signal.
3701
+ */
3702
+ routesForHandler(symbolId: number): Array<{ method: string; path: string; framework: string }> {
3703
+ if (!this.hasV4Tables) return [];
3704
+ try {
3705
+ const rows = this.db.prepare(`
3706
+ SELECT method, path, framework
3707
+ FROM routes WHERE handler_id = ?
3708
+ `).all(symbolId) as Row[];
3709
+ return rows.map(r => ({
3710
+ method: toStr(r.method), path: toStr(r.path), framework: toStr(r.framework),
3711
+ }));
3712
+ } catch { return []; }
3713
+ }
3714
+
3715
+ /** Distinct config keys read inside a symbol's body. */
3716
+ configKeysForSymbol(symbolId: number): Array<{ key: string; source: string; line: number }> {
3717
+ if (!this.hasV4Tables) return [];
3718
+ try {
3719
+ const rows = this.db.prepare(`
3720
+ SELECT DISTINCT key, source, line
3721
+ FROM config_keys WHERE symbol_id = ?
3722
+ ORDER BY line
3723
+ `).all(symbolId) as Row[];
3724
+ return rows.map(r => ({
3725
+ key: toStr(r.key), source: toStr(r.source), line: toNum(r.line),
3726
+ }));
3727
+ } catch { return []; }
3728
+ }
3729
+
3730
+ /**
3731
+ * For each call edge OUT of a symbol, return the callee's module id (when
3732
+ * resolved). Used by seer_risk for the "module-boundary crossing" signal.
3733
+ * NULL module ids are filtered out — those are external/unresolved calls.
3734
+ */
3735
+ calleeModulesOf(symbolId: number): Array<{ calleeId: number; moduleId: number }> {
3736
+ if (!this.hasModuleTables) return [];
3737
+ try {
3738
+ const rows = this.db.prepare(`
3739
+ SELECT DISTINCT e.to_id AS calleeId, mm.module_id AS moduleId
3740
+ FROM edges e
3741
+ JOIN symbols s ON s.id = e.to_id
3742
+ JOIN module_members mm ON mm.file_id = s.file_id
3743
+ WHERE e.from_id = ? AND e.kind = 'call' AND e.to_id IS NOT NULL
3744
+ `).all(symbolId) as Row[];
3745
+ return rows.map(r => ({
3746
+ calleeId: toNum(r.calleeId), moduleId: toNum(r.moduleId),
3747
+ }));
3748
+ } catch { return []; }
3749
+ }
3750
+
3751
+ /**
3752
+ * For each file id, return the symbols that match the given line ranges.
3753
+ * Used by `detect_changes` to compute the blast radius of a diff.
3754
+ */
3755
+ symbolsTouchingLines(fileId: number, lineRanges: Array<[number, number]>): SymbolRow[] {
3756
+ if (lineRanges.length === 0) return [];
3757
+ const clauses = lineRanges.map(() => '(s.line_start <= ? AND s.line_end >= ?)').join(' OR ');
3758
+ const args: Array<string | number | null> = [fileId];
3759
+ for (const [start, end] of lineRanges) {
3760
+ args.push(end); // s.line_start <= rangeEnd
3761
+ args.push(start); // s.line_end >= rangeStart
3762
+ }
3763
+ const rows = this.db.prepare(`
3764
+ SELECT ${symbolSelectCols(this.hasComplexityColumns, this.hasSymbolRoleColumn)}
3765
+ FROM symbols s JOIN files f ON f.id = s.file_id
3766
+ WHERE s.file_id = ? AND (${clauses})
3767
+ ORDER BY s.line_start
3768
+ `).all(...args) as Row[];
3769
+ return rows.map(toSymbolRow);
3770
+ }
3771
+
3772
+ // ── Track-F: SCIP imports tracking ──────────────────────────────────────────
3773
+
3774
+ /**
3775
+ * Record (or refresh) a SCIP import. Returns the row id. UNIQUE on
3776
+ * (path, sha256) — if the same file with the same content is re-imported,
3777
+ * the existing row is kept (the caller's idempotency guarantee).
3778
+ */
3779
+ recordScipImport(
3780
+ scipPath: string, sha256: string, tool: string | null,
3781
+ projectRoot: string | null, symbolCount: number, refCount: number,
3782
+ ): number {
3783
+ if (!this.hasV7Columns) return 0;
3784
+ const existing = this.db.prepare(
3785
+ 'SELECT id FROM scip_imports WHERE path = ? AND sha256 = ?',
3786
+ ).get(scipPath, sha256) as Row | undefined;
3787
+ if (existing) {
3788
+ this.db.prepare(
3789
+ 'UPDATE scip_imports SET imported_at = ?, tool = ?, project_root = ?, symbol_count = ?, ref_count = ? WHERE id = ?',
3790
+ ).run(Date.now(), tool, projectRoot, symbolCount, refCount, toNum(existing.id));
3791
+ return toNum(existing.id);
3792
+ }
3793
+ const res = this.db.prepare(
3794
+ 'INSERT INTO scip_imports (path, sha256, tool, project_root, imported_at, symbol_count, ref_count) VALUES (?, ?, ?, ?, ?, ?, ?)',
3795
+ ).run(scipPath, sha256, tool, projectRoot, Date.now(), symbolCount, refCount);
3796
+ return toNum(res.lastInsertRowid);
3797
+ }
3798
+
3799
+ /**
3800
+ * Has this exact SCIP file (by sha) been imported already? Lets callers
3801
+ * short-circuit a re-parse on no-op CI re-runs.
3802
+ */
3803
+ hasScipImport(scipPath: string, sha256: string): boolean {
3804
+ if (!this.hasV7Columns) return false;
3805
+ const row = this.db.prepare(
3806
+ 'SELECT 1 FROM scip_imports WHERE path = ? AND sha256 = ?',
3807
+ ).get(scipPath, sha256) as Row | undefined;
3808
+ return row != null;
3809
+ }
3810
+
3811
+ /** Listing for `seer_scip_imports` / the bundle manifest. */
3812
+ listScipImports(): Array<{
3813
+ id: number; path: string; sha256: string; tool: string | null;
3814
+ projectRoot: string | null; importedAt: number;
3815
+ symbolCount: number; refCount: number;
3816
+ }> {
3817
+ if (!this.hasV7Columns) return [];
3818
+ const rows = this.db.prepare(`
3819
+ SELECT id, path, sha256, tool, project_root AS projectRoot,
3820
+ imported_at AS importedAt, symbol_count AS symbolCount, ref_count AS refCount
3821
+ FROM scip_imports ORDER BY imported_at DESC
3822
+ `).all() as Row[];
3823
+ return rows.map(r => ({
3824
+ id: toNum(r.id), path: toStr(r.path), sha256: toStr(r.sha256),
3825
+ tool: toNullStr(r.tool), projectRoot: toNullStr(r.projectRoot),
3826
+ importedAt: toNum(r.importedAt),
3827
+ symbolCount: toNum(r.symbolCount), refCount: toNum(r.refCount),
3828
+ }));
3829
+ }
3830
+
3831
+ /**
3832
+ * Insert (or upsert) a SCIP-sourced symbol. Returns the row id. Uses
3833
+ * (file_id, qualified_name, line_start, kind) as the dedup key when the
3834
+ * existing row was also SCIP-sourced — we never delete tree-sitter rows.
3835
+ * Tree-sitter rows with the same identifier and overlapping line range are
3836
+ * marked 'scip-merge' (precision confirmed by SCIP) instead of being
3837
+ * duplicated, so the agent-facing default lens stays compact.
3838
+ *
3839
+ * `scipImportId` is the `scip_imports.id` row this symbol came from — it
3840
+ * gets persisted on both fresh inserts and merge updates so a later
3841
+ * `clearScipProvenance(path)` can scope its wipe to a single layer instead
3842
+ * of nuking every SCIP row in the DB.
3843
+ */
3844
+ insertOrMergeScipSymbol(
3845
+ fileId: number, def: SymbolDef, scipImportId: number,
3846
+ ): { id: number; merged: boolean } {
3847
+ if (!this.hasV7Columns) {
3848
+ const id = this.insertSymbol(fileId, def);
3849
+ return { id, merged: false };
3850
+ }
3851
+ const qualified = def.qualifiedName ?? def.name;
3852
+ // Look for a tree-sitter row with the same qualified name and overlapping
3853
+ // line range — that's the "SCIP confirms our row" case.
3854
+ const existing = this.db.prepare(`
3855
+ SELECT id, provenance FROM symbols
3856
+ WHERE file_id = ?
3857
+ AND (qualified_name = ? OR name = ?)
3858
+ AND kind = ?
3859
+ AND line_start <= ?
3860
+ AND line_end >= ?
3861
+ `).get(
3862
+ fileId, qualified, def.name, def.kind,
3863
+ def.lineEnd, def.lineStart,
3864
+ ) as Row | undefined;
3865
+
3866
+ if (existing) {
3867
+ const existingId = toNum(existing.id);
3868
+ const prov = toStr(existing.provenance);
3869
+ // tree-sitter rows get re-labeled scip-merge AND linked to the import
3870
+ // id, so clearScipProvenance(path) can demote them back. Pre-existing
3871
+ // scip-merge / scip rows keep their original import id so two different
3872
+ // SCIP layers confirming the same tree-sitter row don't fight over it.
3873
+ if (prov === 'tree-sitter') {
3874
+ this.db.prepare("UPDATE symbols SET provenance = 'scip-merge', scip_import_id = ? WHERE id = ?")
3875
+ .run(scipImportId, existingId);
3876
+ }
3877
+ // Stay using the existing id — SCIP-sourced references can point at it.
3878
+ return { id: existingId, merged: true };
3879
+ }
3880
+
3881
+ // No overlap → insert a fresh SCIP-provenance row.
3882
+ const sig = def.signature ? def.signature.slice(0, 240) : null;
3883
+ const symbolKey = makeSymbolKey(def.kind, qualified);
3884
+ const res = this.db.prepare(`
3885
+ INSERT INTO symbols
3886
+ (name, qualified_name, kind, file_id, line_start, line_end, col_start, col_end,
3887
+ signature, is_rankable, loc, cyclomatic, cognitive, max_nesting, symbol_key, symbol_role, provenance, shape_hash, scip_import_id)
3888
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'scip', NULL, ?)
3889
+ `).run(
3890
+ def.name, qualified, def.kind, fileId,
3891
+ def.lineStart, def.lineEnd,
3892
+ def.colStart, def.colEnd,
3893
+ sig,
3894
+ (isRankableKind(def.kind) ? 1 : 0),
3895
+ def.loc ?? null, def.cyclomatic ?? null,
3896
+ def.cognitive ?? null, def.maxNesting ?? null,
3897
+ symbolKey, 'definition',
3898
+ scipImportId,
3899
+ );
3900
+ return { id: toNum(res.lastInsertRowid), merged: false };
3901
+ }
3902
+
3903
+ /**
3904
+ * Insert a SCIP-sourced reference edge. `to_id` is set immediately because
3905
+ * SCIP gives us precise targets — no need for the same-file/imported/global
3906
+ * fallback resolver used for tree-sitter call edges.
3907
+ *
3908
+ * `scipImportId` ties the edge to the contributing SCIP layer so per-layer
3909
+ * wipes are clean.
3910
+ */
3911
+ insertScipEdge(
3912
+ fromSymbolId: number, toSymbolId: number, toName: string, kind: string,
3913
+ line: number, scipImportId: number,
3914
+ ): void {
3915
+ if (!this.hasV7Columns) return;
3916
+ this.db.prepare(
3917
+ "INSERT INTO edges (from_id, to_name, to_id, kind, line, provenance, scip_import_id) VALUES (?, ?, ?, ?, ?, 'scip', ?)",
3918
+ ).run(fromSymbolId, toName, toSymbolId, kind, line, scipImportId);
3919
+ }
3920
+
3921
+ /**
3922
+ * Wipe SCIP-sourced rows so a fresh import can replace them. Tree-sitter
3923
+ * rows are preserved; only the rows that came from the specified SCIP layer
3924
+ * are touched.
3925
+ *
3926
+ * - scipPath omitted → ALL SCIP layers are wiped (every scip-provenance
3927
+ * row is dropped, every scip-merge row demoted to tree-sitter, and
3928
+ * the scip_imports table emptied). Useful for "I want my baseline
3929
+ * back."
3930
+ * - scipPath provided → only rows linked to scip_imports.id for that
3931
+ * path are touched. Sibling layers stay intact. This is what
3932
+ * importScip() calls before re-ingesting the same path, so a
3933
+ * multi-layer setup (rust+ts SCIPs) stays correct on partial refresh.
3934
+ */
3935
+ clearScipProvenance(scipPath?: string): number {
3936
+ if (!this.hasV7Columns) return 0;
3937
+ let edgeDeletes = 0, symDeletes = 0;
3938
+ this.db.exec('BEGIN');
3939
+ try {
3940
+ if (scipPath == null) {
3941
+ // Global wipe — every SCIP layer collapses.
3942
+ this.db.exec("UPDATE symbols SET provenance = 'tree-sitter', scip_import_id = NULL WHERE provenance = 'scip-merge'");
3943
+ const eRes = this.db.prepare("DELETE FROM edges WHERE provenance = 'scip'").run();
3944
+ edgeDeletes = toNum(eRes.changes);
3945
+ const sRes = this.db.prepare("DELETE FROM symbols WHERE provenance = 'scip'").run();
3946
+ symDeletes = toNum(sRes.changes);
3947
+ this.db.exec('DELETE FROM scip_imports');
3948
+ } else {
3949
+ // Per-layer wipe — look up the import id for this path. If there's
3950
+ // no row, treat it as "nothing to do" rather than failing (callers
3951
+ // can blindly call clearScipProvenance(path) before insertion).
3952
+ const rows = this.db.prepare(
3953
+ 'SELECT id FROM scip_imports WHERE path = ?',
3954
+ ).all(scipPath) as Row[];
3955
+ const ids = rows.map(r => toNum(r.id));
3956
+ if (ids.length > 0) {
3957
+ const ph = ids.map(() => '?').join(',');
3958
+ this.db.prepare(
3959
+ `UPDATE symbols SET provenance = 'tree-sitter', scip_import_id = NULL
3960
+ WHERE provenance = 'scip-merge' AND scip_import_id IN (${ph})`,
3961
+ ).run(...ids);
3962
+ const eRes = this.db.prepare(
3963
+ `DELETE FROM edges WHERE provenance = 'scip' AND scip_import_id IN (${ph})`,
3964
+ ).run(...ids);
3965
+ edgeDeletes = toNum(eRes.changes);
3966
+ const sRes = this.db.prepare(
3967
+ `DELETE FROM symbols WHERE provenance = 'scip' AND scip_import_id IN (${ph})`,
3968
+ ).run(...ids);
3969
+ symDeletes = toNum(sRes.changes);
3970
+ this.db.prepare('DELETE FROM scip_imports WHERE path = ?').run(scipPath);
3971
+ }
3972
+ }
3973
+ this.db.exec('COMMIT');
3974
+ } catch (err) {
3975
+ this.db.exec('ROLLBACK');
3976
+ throw err;
3977
+ }
3978
+ return symDeletes + edgeDeletes;
3979
+ }
3980
+
3981
+ /** Provenance breakdown for `seer_health` / `seer_stats`. */
3982
+ getProvenanceCounts(): { symbols: Record<string, number>; edges: Record<string, number> } {
3983
+ const out = {
3984
+ symbols: { 'tree-sitter': 0, scip: 0, 'scip-merge': 0 } as Record<string, number>,
3985
+ edges: { 'tree-sitter': 0, scip: 0, 'scip-merge': 0 } as Record<string, number>,
3986
+ };
3987
+ if (!this.hasV7Columns) return out;
3988
+ try {
3989
+ for (const r of this.db.prepare('SELECT provenance, COUNT(*) AS c FROM symbols GROUP BY provenance').all() as Row[]) {
3990
+ out.symbols[toStr(r.provenance)] = toNum(r.c);
3991
+ }
3992
+ for (const r of this.db.prepare('SELECT provenance, COUNT(*) AS c FROM edges GROUP BY provenance').all() as Row[]) {
3993
+ out.edges[toStr(r.provenance)] = toNum(r.c);
3994
+ }
3995
+ } catch { /* */ }
3996
+ return out;
3997
+ }
3998
+
3999
+ // ── Track-F: shape-hash (structural SimHash) ────────────────────────────────
4000
+
4001
+ /** Set a symbol's shape_hash. NULL clears it. Persisted as INTEGER. */
4002
+ setShapeHash(symbolId: number, hash: bigint | null): void {
4003
+ if (!this.hasV7Columns) return;
4004
+ // node:sqlite accepts bigint for INTEGER columns; convert to signed range.
4005
+ const value = hash == null ? null : toSignedI64(hash);
4006
+ this.db.prepare('UPDATE symbols SET shape_hash = ? WHERE id = ?').run(value, symbolId);
4007
+ }
4008
+
4009
+ /**
4010
+ * Fetch all symbols that have a non-null shape_hash. Used as the candidate
4011
+ * pool for duplicate detection. Returns minimal fields to keep the working
4012
+ * set small on huge codebases.
4013
+ */
4014
+ listSymbolsWithShapeHash(opts: {
4015
+ minLoc?: number; includeTests?: boolean; limit?: number;
4016
+ } = {}): Array<{
4017
+ id: number; name: string; qualifiedName: string | null; kind: string;
4018
+ filePath: string; lineStart: number; lineEnd: number;
4019
+ loc: number | null; shapeHash: bigint;
4020
+ }> {
4021
+ if (!this.hasV7Columns) return [];
4022
+ const conds: string[] = ['s.shape_hash IS NOT NULL'];
4023
+ const args: Array<string | number> = [];
4024
+ if (opts.minLoc != null) {
4025
+ conds.push('s.loc >= ?');
4026
+ args.push(opts.minLoc);
4027
+ }
4028
+ if (opts.includeTests === false) {
4029
+ conds.push("f.role <> 'test'");
4030
+ }
4031
+ const limit = opts.limit ?? 50000;
4032
+ args.push(limit);
4033
+ const stmt = this.db.prepare(`
4034
+ SELECT s.id, s.name, s.qualified_name AS qualifiedName, s.kind,
4035
+ f.path AS filePath, s.line_start AS lineStart, s.line_end AS lineEnd,
4036
+ s.loc, s.shape_hash AS shapeHash
4037
+ FROM symbols s JOIN files f ON f.id = s.file_id
4038
+ WHERE ${conds.join(' AND ')}
4039
+ ORDER BY s.id
4040
+ LIMIT ?
4041
+ `);
4042
+ // shape_hash regularly overflows JS safe-integer range; without this flag
4043
+ // node:sqlite throws on row materialization, which the outer try-catch
4044
+ // would swallow into an empty result. We opt the entire row into bigint
4045
+ // and convert the small-int columns back to plain numbers.
4046
+ try { stmt.setReadBigInts(true); } catch { /* */ }
4047
+ try {
4048
+ const rows = stmt.all(...args) as Row[];
4049
+ return rows.map(r => ({
4050
+ id: toNum(r.id),
4051
+ name: toStr(r.name),
4052
+ qualifiedName: toNullStr(r.qualifiedName),
4053
+ kind: toStr(r.kind),
4054
+ filePath: toStr(r.filePath),
4055
+ lineStart: toNum(r.lineStart),
4056
+ lineEnd: toNum(r.lineEnd),
4057
+ loc: toNullNum(r.loc),
4058
+ shapeHash: toUnsignedI64(r.shapeHash),
4059
+ }));
4060
+ } catch { return []; }
4061
+ }
4062
+
4063
+ /** v7 read-flag accessor for downstream features that need to gate on it. */
4064
+ hasV7(): boolean { return this.hasV7Columns; }
4065
+
4066
+ /**
4067
+ * Are there function-like symbols (kind function/method/constructor, role
4068
+ * not 'declaration', loc >= 4) that don't yet have a shape_hash? Used by
4069
+ * the indexer to decide whether to run buildShapeHashes() on a cached
4070
+ * re-run — when a pre-v7 DB migrates to v7, every existing row still has
4071
+ * shape_hash NULL even though the file is "cached" (its content hash
4072
+ * didn't change), so the normal graphChanged predicate misses the
4073
+ * backfill. This check catches that.
4074
+ */
4075
+ hasMissingShapeHashes(minLoc = 4): boolean {
4076
+ if (!this.hasV7Columns) return false;
4077
+ try {
4078
+ const row = this.db.prepare(`
4079
+ SELECT 1 FROM symbols
4080
+ WHERE shape_hash IS NULL
4081
+ AND kind IN ('function','method','constructor')
4082
+ AND symbol_role <> 'declaration'
4083
+ AND loc >= ?
4084
+ LIMIT 1
4085
+ `).get(minLoc) as Row | undefined;
4086
+ return row != null;
4087
+ } catch { return false; }
4088
+ }
4089
+
4090
+ // ── Stats ───────────────────────────────────────────────────────────────────
4091
+
4092
+ getStats(): StatsRow {
4093
+ const files = toNum((this.db.prepare('SELECT COUNT(*) AS c FROM files').get() as Row).c);
4094
+ const symbols = toNum((this.db.prepare('SELECT COUNT(*) AS c FROM symbols').get() as Row).c);
4095
+ const edges = toNum((this.db.prepare("SELECT COUNT(*) AS c FROM edges WHERE kind = 'call'").get() as Row).c);
4096
+ const resolvedEdges = toNum(
4097
+ (this.db.prepare("SELECT COUNT(*) AS c FROM edges WHERE to_id IS NOT NULL AND kind = 'call'").get() as Row).c
4098
+ );
4099
+
4100
+ const langRows = this.db.prepare(
4101
+ 'SELECT language, COUNT(*) AS c FROM files GROUP BY language'
4102
+ ).all() as Row[];
4103
+ const languages: Record<string, number> = {};
4104
+ for (const r of langRows) languages[toStr(r.language)] = toNum(r.c);
4105
+
4106
+ let routes = 0, externalDependencies = 0, configKeys = 0, symbolHistory = 0, modules = 0;
4107
+ try { routes = this.countRoutes(); } catch { /* */ }
4108
+ try { externalDependencies = this.countExternalDeps(); } catch { /* */ }
4109
+ try { configKeys = this.countConfigKeys(); } catch { /* */ }
4110
+ try {
4111
+ if (this.hasV4Tables) {
4112
+ symbolHistory = toNum((this.db.prepare('SELECT COUNT(*) AS c FROM symbol_history').get() as Row).c);
4113
+ }
4114
+ } catch { /* */ }
4115
+ try { modules = this.countModules(); } catch { /* */ }
4116
+
4117
+ // v7 extras — provenance breakdown and SCIP imports + shape_hash coverage.
4118
+ let scipImports = 0;
4119
+ let shapeHashed = 0;
4120
+ if (this.hasV7Columns) {
4121
+ try {
4122
+ scipImports = toNum((this.db.prepare('SELECT COUNT(*) AS c FROM scip_imports').get() as Row).c);
4123
+ } catch { /* */ }
4124
+ try {
4125
+ shapeHashed = toNum((this.db.prepare('SELECT COUNT(*) AS c FROM symbols WHERE shape_hash IS NOT NULL').get() as Row).c);
4126
+ } catch { /* */ }
4127
+ }
4128
+
4129
+ // v8 Track G — service-link counts.
4130
+ let serviceCalls = 0;
4131
+ let serviceLinks = 0;
4132
+ try { serviceCalls = this.countServiceCalls(); } catch { /* */ }
4133
+ try { serviceLinks = this.countServiceLinks(); } catch { /* */ }
4134
+
4135
+ return {
4136
+ files, symbols, edges, resolvedEdges, languages,
4137
+ roles: this.getRoleCounts(),
4138
+ routes,
4139
+ externalDependencies,
4140
+ configKeys,
4141
+ symbolHistory,
4142
+ modules,
4143
+ scipImports,
4144
+ shapeHashed,
4145
+ provenance: this.getProvenanceCounts(),
4146
+ serviceCalls,
4147
+ serviceLinks,
4148
+ };
4149
+ }
4150
+
4151
+ /** Direct access to the underlying DB for niche callers (history indexer). */
4152
+ rawDb(): DatabaseSync { return this.db; }
4153
+
4154
+ begin(): void { this.db.exec('BEGIN'); }
4155
+ commit(): void { this.db.exec('COMMIT'); }
4156
+ rollback(): void { this.db.exec('ROLLBACK'); }
4157
+
4158
+ close(): void {
4159
+ this.db.close();
4160
+ }
4161
+ }
4162
+
4163
+ function symbolSelectCols(hasComplexity: boolean, hasSymbolRole: boolean): string {
4164
+ let cols =
4165
+ `s.id, s.name, s.qualified_name AS qualifiedName, s.kind, s.file_id AS fileId,
4166
+ f.path AS filePath, s.line_start AS lineStart,
4167
+ s.line_end AS lineEnd, s.signature, s.pagerank`;
4168
+ if (hasComplexity) cols += `, s.loc, s.cyclomatic, s.cognitive, s.max_nesting AS maxNesting`;
4169
+ if (hasSymbolRole) cols += `, s.symbol_role AS symbolRole`;
4170
+ return cols;
4171
+ }
4172
+
4173
+ function toSymbolRow(r: Row): SymbolRow {
4174
+ return {
4175
+ id: toNum(r.id),
4176
+ name: toStr(r.name),
4177
+ qualifiedName: toNullStr(r.qualifiedName),
4178
+ kind: toStr(r.kind),
4179
+ fileId: toNum(r.fileId),
4180
+ filePath: toStr(r.filePath),
4181
+ lineStart: toNum(r.lineStart),
4182
+ lineEnd: toNum(r.lineEnd),
4183
+ signature: toNullStr(r.signature),
4184
+ pagerank: toNum(r.pagerank),
4185
+ loc: toNullNum(r.loc),
4186
+ cyclomatic: toNullNum(r.cyclomatic),
4187
+ cognitive: toNullNum(r.cognitive),
4188
+ maxNesting: toNullNum(r.maxNesting),
4189
+ symbolRole: r.symbolRole == null ? null : (toStr(r.symbolRole) as 'definition' | 'declaration' | 'type_ref'),
4190
+ };
4191
+ }
4192
+
4193
+ /**
4194
+ * Build a stable symbol-history key for a symbol. The shape is
4195
+ * `kind:qualified_name` — coarse on purpose so a function rename within a
4196
+ * file collapses history to the new name (we'd rather lose precision than
4197
+ * lose history entirely when extractors disagree about parameter shape).
4198
+ *
4199
+ * Future: include parameter arity or signature-hash for overload distinction.
4200
+ */
4201
+ export function makeSymbolKey(kind: string, qualifiedName: string): string {
4202
+ return `${kind}:${qualifiedName}`;
4203
+ }
4204
+
4205
+ /**
4206
+ * Build an FTS5 MATCH expression from a free-text query. Strategy:
4207
+ * - lower-case
4208
+ * - split on whitespace and identifier punctuation
4209
+ * - quote each non-empty token and OR them together with `*` for prefix
4210
+ *
4211
+ * Empty / invalid → null (the caller falls back to LIKE).
4212
+ */
4213
+ export function ftsQuery(input: string): string | null {
4214
+ if (!input) return null;
4215
+ const tokens = splitIdentifierTokens(input)
4216
+ .split(/\s+/)
4217
+ .filter(t => t.length > 0 && /^[a-z0-9]/i.test(t))
4218
+ .map(t => t.replace(/["'*]/g, ''))
4219
+ .filter(t => t.length > 0);
4220
+ if (tokens.length === 0) return null;
4221
+ return tokens.map(t => `"${t}"*`).join(' OR ');
4222
+ }
4223
+
4224
+ // ── Import path resolution ───────────────────────────────────────────────────
4225
+
4226
+ function normalizePath(p: string): string {
4227
+ return p.replace(/\\/g, '/');
4228
+ }
4229
+
4230
+ const TS_JS_EXTS = ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'];
4231
+
4232
+ function resolveImportToFileId(
4233
+ fromPath: string,
4234
+ language: string,
4235
+ importName: string,
4236
+ fileByPath: Map<string, number>,
4237
+ ): number | null {
4238
+ if (language === 'typescript' || language === 'javascript') {
4239
+ return resolveJsImport(fromPath, importName, fileByPath);
4240
+ }
4241
+ if (language === 'python') {
4242
+ return resolvePythonImport(fromPath, importName, fileByPath);
4243
+ }
4244
+ return null;
4245
+ }
4246
+
4247
+ function resolveJsImport(
4248
+ fromPath: string,
4249
+ importName: string,
4250
+ fileByPath: Map<string, number>,
4251
+ ): number | null {
4252
+ if (!importName.startsWith('./') && !importName.startsWith('../')) return null;
4253
+
4254
+ const fromDir = path.dirname(fromPath);
4255
+ const target = path.resolve(fromDir, importName);
4256
+
4257
+ const ext = path.extname(target);
4258
+ if (ext && TS_JS_EXTS.includes(ext)) {
4259
+ const id = fileByPath.get(normalizePath(target));
4260
+ if (id !== undefined) return id;
4261
+ }
4262
+
4263
+ for (const e of TS_JS_EXTS) {
4264
+ const id = fileByPath.get(normalizePath(target + e));
4265
+ if (id !== undefined) return id;
4266
+ }
4267
+
4268
+ for (const e of TS_JS_EXTS) {
4269
+ const id = fileByPath.get(normalizePath(path.join(target, 'index' + e)));
4270
+ if (id !== undefined) return id;
4271
+ }
4272
+
4273
+ return null;
4274
+ }
4275
+
4276
+ function resolvePythonImport(
4277
+ fromPath: string,
4278
+ importName: string,
4279
+ fileByPath: Map<string, number>,
4280
+ ): number | null {
4281
+ if (!importName.startsWith('.')) return null;
4282
+
4283
+ let levelsUp = 0;
4284
+ while (levelsUp < importName.length && importName[levelsUp] === '.') {
4285
+ levelsUp++;
4286
+ }
4287
+ const modulePath = importName.slice(levelsUp);
4288
+
4289
+ if (modulePath.length === 0) return null;
4290
+
4291
+ let baseDir = path.dirname(fromPath);
4292
+ for (let i = 1; i < levelsUp; i++) {
4293
+ baseDir = path.dirname(baseDir);
4294
+ }
4295
+
4296
+ const parts = modulePath.split('.');
4297
+ const target = path.join(baseDir, ...parts);
4298
+
4299
+ const fileCandidate = fileByPath.get(normalizePath(target + '.py'));
4300
+ if (fileCandidate !== undefined) return fileCandidate;
4301
+
4302
+ const pkgCandidate = fileByPath.get(normalizePath(path.join(target, '__init__.py')));
4303
+ if (pkgCandidate !== undefined) return pkgCandidate;
4304
+
4305
+ return null;
4306
+ }