@rkarim08/sia 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/.claude-plugin/marketplace.json +35 -0
  2. package/.claude-plugin/plugin.json +27 -0
  3. package/.mcp.json +13 -0
  4. package/CLAUDE.md +226 -0
  5. package/LICENSE +202 -0
  6. package/PLUGIN_README.md +253 -0
  7. package/README.md +1013 -0
  8. package/agents/sia-changelog-writer.md +89 -0
  9. package/agents/sia-code-reviewer.md +86 -0
  10. package/agents/sia-conflict-resolver.md +100 -0
  11. package/agents/sia-convention-enforcer.md +69 -0
  12. package/agents/sia-debug.md +106 -0
  13. package/agents/sia-decision-reviewer.md +101 -0
  14. package/agents/sia-dependency-tracker.md +80 -0
  15. package/agents/sia-explain.md +126 -0
  16. package/agents/sia-feature.md +116 -0
  17. package/agents/sia-knowledge-capture.md +117 -0
  18. package/agents/sia-lead-architecture-advisor.md +93 -0
  19. package/agents/sia-lead-team-health.md +107 -0
  20. package/agents/sia-migration.md +100 -0
  21. package/agents/sia-onboarding.md +115 -0
  22. package/agents/sia-orientation.md +99 -0
  23. package/agents/sia-pm-briefing.md +106 -0
  24. package/agents/sia-pm-risk-advisor.md +82 -0
  25. package/agents/sia-qa-analyst.md +116 -0
  26. package/agents/sia-qa-regression-map.md +94 -0
  27. package/agents/sia-refactor.md +115 -0
  28. package/agents/sia-regression.md +112 -0
  29. package/agents/sia-security-audit.md +125 -0
  30. package/agents/sia-test-advisor.md +91 -0
  31. package/hooks/hooks.json +98 -0
  32. package/migrations/bridge/001_initial.sql +34 -0
  33. package/migrations/episodic/001_initial.sql +35 -0
  34. package/migrations/meta/001_initial.sql +68 -0
  35. package/migrations/semantic/001_initial.sql +292 -0
  36. package/migrations/semantic/002_ontology.sql +89 -0
  37. package/migrations/semantic/003_freshness.sql +63 -0
  38. package/migrations/semantic/004_v5_unified_schema.sql +194 -0
  39. package/migrations/semantic/005_backfill_event_kinds.sql +8 -0
  40. package/migrations/semantic/006_tree_sitter.sql +6 -0
  41. package/migrations/semantic/007_branch_snapshots.sql +22 -0
  42. package/package.json +110 -0
  43. package/scripts/branch-switch.sh +13 -0
  44. package/scripts/build-wasm-grammars.sh +81 -0
  45. package/scripts/post-compact.sh +8 -0
  46. package/scripts/post-tool-use.sh +10 -0
  47. package/scripts/pre-compact.sh +8 -0
  48. package/scripts/session-end.sh +8 -0
  49. package/scripts/session-start.sh +8 -0
  50. package/scripts/start-mcp.ts +45 -0
  51. package/scripts/stop-hook.sh +8 -0
  52. package/scripts/user-prompt-submit.sh +8 -0
  53. package/scripts/viz-server.ts +152 -0
  54. package/skills/sia-brainstorm/SKILL.md +156 -0
  55. package/skills/sia-brainstorm/scripts/frame-template.html +214 -0
  56. package/skills/sia-brainstorm/scripts/helper.js +95 -0
  57. package/skills/sia-brainstorm/scripts/server.cjs +338 -0
  58. package/skills/sia-brainstorm/scripts/start-server.sh +153 -0
  59. package/skills/sia-brainstorm/scripts/stop-server.sh +55 -0
  60. package/skills/sia-brainstorm/spec-document-reviewer-prompt.md +49 -0
  61. package/skills/sia-brainstorm/visual-companion.md +286 -0
  62. package/skills/sia-capture/SKILL.md +64 -0
  63. package/skills/sia-compare/SKILL.md +33 -0
  64. package/skills/sia-conflicts/SKILL.md +38 -0
  65. package/skills/sia-debug-workflow/SKILL.md +120 -0
  66. package/skills/sia-debug-workflow/root-cause-tracing.md +70 -0
  67. package/skills/sia-debug-workflow/scripts/find-polluter.sh +64 -0
  68. package/skills/sia-debug-workflow/temporal-investigation.md +72 -0
  69. package/skills/sia-digest/SKILL.md +23 -0
  70. package/skills/sia-dispatch/SKILL.md +69 -0
  71. package/skills/sia-dispatch/agent-task-template.md +99 -0
  72. package/skills/sia-doctor/SKILL.md +39 -0
  73. package/skills/sia-execute/SKILL.md +70 -0
  74. package/skills/sia-execute-plan/SKILL.md +85 -0
  75. package/skills/sia-export-import/SKILL.md +49 -0
  76. package/skills/sia-export-knowledge/SKILL.md +46 -0
  77. package/skills/sia-finish/SKILL.md +100 -0
  78. package/skills/sia-finish/pr-summary-template.md +54 -0
  79. package/skills/sia-freshness/SKILL.md +38 -0
  80. package/skills/sia-history/SKILL.md +42 -0
  81. package/skills/sia-impact/SKILL.md +70 -0
  82. package/skills/sia-index/SKILL.md +54 -0
  83. package/skills/sia-install/SKILL.md +39 -0
  84. package/skills/sia-lead-compliance/SKILL.md +16 -0
  85. package/skills/sia-lead-drift-report/SKILL.md +16 -0
  86. package/skills/sia-lead-knowledge-map/SKILL.md +16 -0
  87. package/skills/sia-learn/SKILL.md +58 -0
  88. package/skills/sia-plan/SKILL.md +68 -0
  89. package/skills/sia-plan/plan-reviewer-prompt.md +63 -0
  90. package/skills/sia-playbooks/SKILL.md +29 -0
  91. package/skills/sia-playbooks/reference-feature.md +100 -0
  92. package/skills/sia-playbooks/reference-flagging.md +50 -0
  93. package/skills/sia-playbooks/reference-orientation.md +92 -0
  94. package/skills/sia-playbooks/reference-regression.md +115 -0
  95. package/skills/sia-playbooks/reference-review.md +64 -0
  96. package/skills/sia-playbooks/reference-tools.md +239 -0
  97. package/skills/sia-pm-decision-log/SKILL.md +28 -0
  98. package/skills/sia-pm-risk-dashboard/SKILL.md +24 -0
  99. package/skills/sia-pm-sprint-summary/SKILL.md +27 -0
  100. package/skills/sia-prune/SKILL.md +45 -0
  101. package/skills/sia-qa-coverage/SKILL.md +28 -0
  102. package/skills/sia-qa-flaky/SKILL.md +20 -0
  103. package/skills/sia-qa-report/SKILL.md +26 -0
  104. package/skills/sia-reindex/SKILL.md +30 -0
  105. package/skills/sia-review-respond/SKILL.md +88 -0
  106. package/skills/sia-review-respond/pushback-patterns.md +90 -0
  107. package/skills/sia-search/SKILL.md +47 -0
  108. package/skills/sia-setup/SKILL.md +82 -0
  109. package/skills/sia-setup/setup-checklist.md +97 -0
  110. package/skills/sia-stats/SKILL.md +36 -0
  111. package/skills/sia-status/SKILL.md +44 -0
  112. package/skills/sia-sync/SKILL.md +46 -0
  113. package/skills/sia-team/SKILL.md +64 -0
  114. package/skills/sia-test/SKILL.md +92 -0
  115. package/skills/sia-test/testing-anti-patterns.md +104 -0
  116. package/skills/sia-tour/SKILL.md +29 -0
  117. package/skills/sia-upgrade/SKILL.md +43 -0
  118. package/skills/sia-verify/SKILL.md +81 -0
  119. package/skills/sia-visualize/SKILL.md +28 -0
  120. package/skills/sia-visualize-live/SKILL.md +55 -0
  121. package/skills/sia-visualize-live/scripts/graph-template.html +389 -0
  122. package/skills/sia-visualize-live/scripts/start-visualizer.sh +161 -0
  123. package/skills/sia-visualize-live/scripts/stop-visualizer.sh +55 -0
  124. package/skills/sia-visualize-live/scripts/visualizer-server.cjs +264 -0
  125. package/skills/sia-workspace/SKILL.md +57 -0
  126. package/src/agent/claude-md-template-flagging.md +219 -0
  127. package/src/agent/claude-md-template.md +213 -0
  128. package/src/agent/modules/sia-feature.md +100 -0
  129. package/src/agent/modules/sia-flagging.md +50 -0
  130. package/src/agent/modules/sia-orientation.md +92 -0
  131. package/src/agent/modules/sia-regression.md +115 -0
  132. package/src/agent/modules/sia-review.md +64 -0
  133. package/src/agent/modules/sia-tools.md +239 -0
  134. package/src/ast/extractors/c-include.ts +189 -0
  135. package/src/ast/extractors/csharp-project.ts +260 -0
  136. package/src/ast/extractors/prisma-schema.ts +44 -0
  137. package/src/ast/extractors/project-manifest.ts +111 -0
  138. package/src/ast/extractors/sql-schema.ts +67 -0
  139. package/src/ast/extractors/tier-a.ts +423 -0
  140. package/src/ast/extractors/tier-b.ts +289 -0
  141. package/src/ast/extractors/tier-dispatch.ts +247 -0
  142. package/src/ast/index-worker.ts +108 -0
  143. package/src/ast/indexer.ts +484 -0
  144. package/src/ast/languages.ts +408 -0
  145. package/src/ast/pagerank-builder.ts +125 -0
  146. package/src/ast/path-utils.ts +137 -0
  147. package/src/ast/tree-sitter/backends/native.ts +57 -0
  148. package/src/ast/tree-sitter/backends/wasm.ts +39 -0
  149. package/src/ast/tree-sitter/call-walker.ts +44 -0
  150. package/src/ast/tree-sitter/edit-computer.ts +55 -0
  151. package/src/ast/tree-sitter/query-runner.ts +46 -0
  152. package/src/ast/tree-sitter/service.ts +174 -0
  153. package/src/ast/tree-sitter/tree-cache.ts +39 -0
  154. package/src/ast/tree-sitter/types.ts +79 -0
  155. package/src/ast/watcher.ts +322 -0
  156. package/src/capture/chunker.ts +169 -0
  157. package/src/capture/consolidate.ts +127 -0
  158. package/src/capture/edge-inferrer.ts +161 -0
  159. package/src/capture/embedder.ts +166 -0
  160. package/src/capture/embedding-cache.ts +73 -0
  161. package/src/capture/flag-processor.ts +64 -0
  162. package/src/capture/hook.ts +67 -0
  163. package/src/capture/pipeline.ts +450 -0
  164. package/src/capture/prompts/consolidate.ts +25 -0
  165. package/src/capture/prompts/edge-infer.ts +29 -0
  166. package/src/capture/prompts/extract-flagged.ts +36 -0
  167. package/src/capture/prompts/extract.ts +42 -0
  168. package/src/capture/tokenizer.ts +147 -0
  169. package/src/capture/track-a-ast.ts +93 -0
  170. package/src/capture/track-b-llm.ts +149 -0
  171. package/src/capture/types.ts +64 -0
  172. package/src/cli/commands/community.ts +137 -0
  173. package/src/cli/commands/compare.ts +123 -0
  174. package/src/cli/commands/conflicts.ts +41 -0
  175. package/src/cli/commands/digest.ts +197 -0
  176. package/src/cli/commands/disable-flagging.ts +34 -0
  177. package/src/cli/commands/doctor.ts +240 -0
  178. package/src/cli/commands/download-model.ts +161 -0
  179. package/src/cli/commands/enable-flagging.ts +34 -0
  180. package/src/cli/commands/export-knowledge.ts +208 -0
  181. package/src/cli/commands/export.ts +85 -0
  182. package/src/cli/commands/freshness.ts +164 -0
  183. package/src/cli/commands/graph.ts +51 -0
  184. package/src/cli/commands/history.ts +139 -0
  185. package/src/cli/commands/import.ts +335 -0
  186. package/src/cli/commands/install.ts +156 -0
  187. package/src/cli/commands/lead-report.ts +241 -0
  188. package/src/cli/commands/learn.ts +321 -0
  189. package/src/cli/commands/pm-report.ts +413 -0
  190. package/src/cli/commands/prune.ts +75 -0
  191. package/src/cli/commands/qa-report.ts +278 -0
  192. package/src/cli/commands/reindex.ts +104 -0
  193. package/src/cli/commands/rollback.ts +70 -0
  194. package/src/cli/commands/search.ts +103 -0
  195. package/src/cli/commands/server.ts +91 -0
  196. package/src/cli/commands/share.ts +33 -0
  197. package/src/cli/commands/stats.ts +79 -0
  198. package/src/cli/commands/status.ts +176 -0
  199. package/src/cli/commands/sync.ts +96 -0
  200. package/src/cli/commands/team.ts +118 -0
  201. package/src/cli/commands/tour.ts +157 -0
  202. package/src/cli/commands/visualize-live.ts +162 -0
  203. package/src/cli/commands/workspace.ts +117 -0
  204. package/src/cli/index.ts +424 -0
  205. package/src/cli/learn-progress.ts +87 -0
  206. package/src/community/detection-bridge.ts +344 -0
  207. package/src/community/leiden.ts +462 -0
  208. package/src/community/raptor.ts +210 -0
  209. package/src/community/scheduler.ts +74 -0
  210. package/src/community/summarize.ts +115 -0
  211. package/src/decay/archiver.ts +73 -0
  212. package/src/decay/bridge-orphan-cleanup.ts +212 -0
  213. package/src/decay/consolidation-sweep.ts +112 -0
  214. package/src/decay/decay.ts +116 -0
  215. package/src/decay/deep-validator.ts +62 -0
  216. package/src/decay/episodic-promoter.ts +132 -0
  217. package/src/decay/maintenance-scheduler.ts +326 -0
  218. package/src/decay/scheduler.ts +6 -0
  219. package/src/decay/session-sweeper.ts +79 -0
  220. package/src/decay/types.ts +17 -0
  221. package/src/freshness/confidence-decay.ts +122 -0
  222. package/src/freshness/cuckoo-filter.ts +176 -0
  223. package/src/freshness/deep-validation.ts +345 -0
  224. package/src/freshness/dirty-tracker.ts +237 -0
  225. package/src/freshness/file-watcher-layer.ts +119 -0
  226. package/src/freshness/firewall.ts +64 -0
  227. package/src/freshness/git-reconcile-layer.ts +161 -0
  228. package/src/freshness/inverted-index.ts +158 -0
  229. package/src/freshness/stale-read-layer.ts +222 -0
  230. package/src/graph/audit.ts +69 -0
  231. package/src/graph/bridge-db.ts +141 -0
  232. package/src/graph/communities.ts +195 -0
  233. package/src/graph/db-interface.ts +259 -0
  234. package/src/graph/edges.ts +163 -0
  235. package/src/graph/entities.ts +327 -0
  236. package/src/graph/episodic-db.ts +113 -0
  237. package/src/graph/flags.ts +31 -0
  238. package/src/graph/meta-db.ts +200 -0
  239. package/src/graph/semantic-db.ts +101 -0
  240. package/src/graph/session-resume.ts +56 -0
  241. package/src/graph/snapshots.ts +342 -0
  242. package/src/graph/staging.ts +151 -0
  243. package/src/graph/types.ts +128 -0
  244. package/src/hooks/adapters/claude-code.ts +21 -0
  245. package/src/hooks/adapters/cline.ts +43 -0
  246. package/src/hooks/adapters/cursor.ts +65 -0
  247. package/src/hooks/adapters/generic.ts +12 -0
  248. package/src/hooks/agent-detect.ts +34 -0
  249. package/src/hooks/claude-md-directives.ts +32 -0
  250. package/src/hooks/event-router.ts +182 -0
  251. package/src/hooks/extractors/pattern-detector.ts +111 -0
  252. package/src/hooks/handlers/post-compact.ts +30 -0
  253. package/src/hooks/handlers/post-tool-use.ts +403 -0
  254. package/src/hooks/handlers/pre-compact.ts +100 -0
  255. package/src/hooks/handlers/session-end.ts +47 -0
  256. package/src/hooks/handlers/session-start.ts +154 -0
  257. package/src/hooks/handlers/stop.ts +128 -0
  258. package/src/hooks/handlers/user-prompt-submit.ts +68 -0
  259. package/src/hooks/plugin-branch-switch.ts +68 -0
  260. package/src/hooks/plugin-common.ts +47 -0
  261. package/src/hooks/plugin-post-compact.ts +28 -0
  262. package/src/hooks/plugin-post-tool-use.ts +38 -0
  263. package/src/hooks/plugin-pre-compact.ts +37 -0
  264. package/src/hooks/plugin-session-end.ts +37 -0
  265. package/src/hooks/plugin-session-start.ts +75 -0
  266. package/src/hooks/plugin-stop.ts +61 -0
  267. package/src/hooks/plugin-user-prompt-submit.ts +47 -0
  268. package/src/hooks/types.ts +43 -0
  269. package/src/knowledge/discovery.ts +238 -0
  270. package/src/knowledge/external-refs.ts +98 -0
  271. package/src/knowledge/freshness.ts +221 -0
  272. package/src/knowledge/ingest.ts +330 -0
  273. package/src/knowledge/markdown-export.ts +229 -0
  274. package/src/knowledge/markdown-import.ts +359 -0
  275. package/src/knowledge/patterns.ts +74 -0
  276. package/src/knowledge/templates.ts +307 -0
  277. package/src/llm/ai-sdk-adapter.ts +46 -0
  278. package/src/llm/config.ts +88 -0
  279. package/src/llm/cost-tracker.ts +110 -0
  280. package/src/llm/prompts/extraction.ts +55 -0
  281. package/src/llm/prompts/summarization.ts +36 -0
  282. package/src/llm/prompts/validation.ts +37 -0
  283. package/src/llm/provider-registry.ts +68 -0
  284. package/src/llm/reliability.ts +179 -0
  285. package/src/llm/schemas.ts +52 -0
  286. package/src/mcp/freshness-annotator.ts +69 -0
  287. package/src/mcp/server.ts +949 -0
  288. package/src/mcp/tools/sia-ast-query.ts +225 -0
  289. package/src/mcp/tools/sia-at-time.ts +151 -0
  290. package/src/mcp/tools/sia-backlinks.ts +87 -0
  291. package/src/mcp/tools/sia-batch-execute.ts +169 -0
  292. package/src/mcp/tools/sia-by-file.ts +89 -0
  293. package/src/mcp/tools/sia-community.ts +113 -0
  294. package/src/mcp/tools/sia-doctor.ts +73 -0
  295. package/src/mcp/tools/sia-execute-file.ts +122 -0
  296. package/src/mcp/tools/sia-execute.ts +104 -0
  297. package/src/mcp/tools/sia-expand.ts +158 -0
  298. package/src/mcp/tools/sia-fetch-and-index.ts +241 -0
  299. package/src/mcp/tools/sia-flag.ts +65 -0
  300. package/src/mcp/tools/sia-index.ts +111 -0
  301. package/src/mcp/tools/sia-note.ts +134 -0
  302. package/src/mcp/tools/sia-search.ts +105 -0
  303. package/src/mcp/tools/sia-stats.ts +63 -0
  304. package/src/mcp/tools/sia-sync-status.ts +44 -0
  305. package/src/mcp/tools/sia-upgrade.ts +247 -0
  306. package/src/mcp/truncate.ts +231 -0
  307. package/src/native/bridge.ts +167 -0
  308. package/src/native/fallback-ast-diff.ts +144 -0
  309. package/src/native/fallback-graph.ts +325 -0
  310. package/src/ontology/constraints.ts +56 -0
  311. package/src/ontology/errors.ts +8 -0
  312. package/src/ontology/middleware.ts +266 -0
  313. package/src/retrieval/bm25-search.ts +151 -0
  314. package/src/retrieval/context-assembly.ts +76 -0
  315. package/src/retrieval/graph-traversal.ts +168 -0
  316. package/src/retrieval/pagerank.ts +40 -0
  317. package/src/retrieval/query-classifier.ts +106 -0
  318. package/src/retrieval/reranker.ts +156 -0
  319. package/src/retrieval/search.ts +236 -0
  320. package/src/retrieval/throttle.ts +102 -0
  321. package/src/retrieval/vector-search.ts +203 -0
  322. package/src/retrieval/workspace-search.ts +130 -0
  323. package/src/sandbox/context-mode.ts +285 -0
  324. package/src/sandbox/credential-pass.ts +55 -0
  325. package/src/sandbox/executor.ts +235 -0
  326. package/src/security/pattern-detector.ts +127 -0
  327. package/src/security/rule-of-two.ts +50 -0
  328. package/src/security/sanitize.ts +46 -0
  329. package/src/security/semantic-consistency.ts +93 -0
  330. package/src/security/staging-promoter.ts +154 -0
  331. package/src/shared/config.ts +302 -0
  332. package/src/shared/diagnostics.ts +210 -0
  333. package/src/shared/errors.ts +48 -0
  334. package/src/shared/git-utils.ts +143 -0
  335. package/src/shared/llm-client.ts +120 -0
  336. package/src/shared/logger.ts +99 -0
  337. package/src/shared/types.ts +79 -0
  338. package/src/sync/client.ts +43 -0
  339. package/src/sync/conflict.ts +106 -0
  340. package/src/sync/dedup.ts +183 -0
  341. package/src/sync/hlc.ts +117 -0
  342. package/src/sync/keychain.ts +144 -0
  343. package/src/sync/pull.ts +232 -0
  344. package/src/sync/push.ts +131 -0
  345. package/src/types/chokidar.d.ts +23 -0
  346. package/src/visualization/graph-renderer.ts +312 -0
  347. package/src/visualization/subgraph-extract.ts +208 -0
  348. package/src/visualization/views/community-clusters.ts +246 -0
  349. package/src/visualization/views/dependency-map.ts +189 -0
  350. package/src/visualization/views/graph-explorer.ts +364 -0
  351. package/src/visualization/views/timeline.ts +247 -0
  352. package/src/workspace/api-contracts.ts +226 -0
  353. package/src/workspace/cross-repo.ts +61 -0
  354. package/src/workspace/detector.ts +190 -0
  355. package/src/workspace/manifest.ts +141 -0
@@ -0,0 +1,203 @@
1
+ // Module: vector-search — ONNX embedder + cosine similarity (VSS fallback)
2
+
3
+ import type { Embedder } from "@/capture/embedder";
4
+ import type { SiaDb } from "@/graph/db-interface";
5
+
6
+ /** A single vector search result: entity ID + similarity score. */
7
+ export interface VectorResult {
8
+ entityId: string;
9
+ score: number;
10
+ }
11
+
12
+ /** Options for vectorSearch. */
13
+ export interface VectorSearchOpts {
14
+ limit?: number;
15
+ paranoid?: boolean;
16
+ packagePath?: string;
17
+ }
18
+
19
+ /** Default similarity threshold below which results are discarded. */
20
+ const SIMILARITY_THRESHOLD = 0.3;
21
+
22
+ /** Maximum candidate entities to scan in brute-force fallback. */
23
+ const BRUTE_FORCE_LIMIT = 1000;
24
+
25
+ /**
26
+ * Compute cosine similarity between two Float32Arrays.
27
+ *
28
+ * Both vectors are assumed to be of equal length. Returns 0 if either
29
+ * has zero magnitude (degenerate case).
30
+ */
31
+ function cosineSim(a: Float32Array, b: Float32Array): number {
32
+ let dot = 0;
33
+ let normA = 0;
34
+ let normB = 0;
35
+
36
+ for (let i = 0; i < a.length; i++) {
37
+ dot += a[i] * b[i];
38
+ normA += a[i] * a[i];
39
+ normB += b[i] * b[i];
40
+ }
41
+
42
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
43
+ if (denom === 0) return 0;
44
+ return dot / denom;
45
+ }
46
+
47
+ /**
48
+ * Search entities by vector similarity.
49
+ *
50
+ * 1. Embed query via the provided embedder.
51
+ * 2. Try sqlite-vss (`vss_search`) on the `graph_nodes_vss` virtual table.
52
+ * 3. If VSS is unavailable, fall back to brute-force cosine scan over
53
+ * entities that have a non-NULL embedding column (capped at 1000).
54
+ *
55
+ * Results are filtered by an optional paranoid flag (excludes tier 4)
56
+ * and packagePath, then sorted by score descending and capped at `limit`.
57
+ */
58
+ export async function vectorSearch(
59
+ db: SiaDb,
60
+ query: string,
61
+ embedder: Embedder,
62
+ opts?: VectorSearchOpts,
63
+ ): Promise<VectorResult[]> {
64
+ const limit = opts?.limit ?? 15;
65
+
66
+ // Step 1: Embed the query text
67
+ const queryEmbedding = await embedder.embed(query);
68
+ if (!queryEmbedding) return [];
69
+
70
+ // Step 2: Try sqlite-vss via rawSqlite()
71
+ const vssResults = tryVssSearch(db, queryEmbedding, limit, opts);
72
+ if (vssResults !== null) return vssResults;
73
+
74
+ // Step 3: Brute-force cosine scan fallback
75
+ return bruteForceCosineSearch(db, queryEmbedding, limit, opts);
76
+ }
77
+
78
+ /**
79
+ * Attempt to use sqlite-vss extension for fast approximate search.
80
+ * Returns null if VSS is not available (extension not loaded, table missing, etc.).
81
+ */
82
+ function tryVssSearch(
83
+ db: SiaDb,
84
+ queryEmbedding: Float32Array,
85
+ limit: number,
86
+ opts?: VectorSearchOpts,
87
+ ): VectorResult[] | null {
88
+ const raw = db.rawSqlite();
89
+ if (!raw) return null;
90
+
91
+ try {
92
+ // Serialize embedding to JSON array for vss_search
93
+ const embeddingJson = JSON.stringify(Array.from(queryEmbedding));
94
+
95
+ // Use vss_search to get candidate rowids with distances
96
+ const vssRows = raw
97
+ .prepare(
98
+ `SELECT rowid, distance
99
+ FROM vss_search(graph_nodes_vss, ?, ?)`,
100
+ )
101
+ .all(embeddingJson, limit * 2) as Array<{ rowid: number; distance: number }>;
102
+
103
+ if (!vssRows || vssRows.length === 0) return null;
104
+
105
+ // Map rowids back to entity IDs with filters
106
+ const results: VectorResult[] = [];
107
+ for (const vssRow of vssRows) {
108
+ // Convert distance to similarity score (VSS returns L2 distance)
109
+ const score = 1 / (1 + vssRow.distance);
110
+ if (score < SIMILARITY_THRESHOLD) continue;
111
+
112
+ // Look up entity to apply filters
113
+ const entity = raw
114
+ .prepare(
115
+ `SELECT id, trust_tier, package_path
116
+ FROM graph_nodes
117
+ WHERE rowid = ?
118
+ AND t_valid_until IS NULL
119
+ AND archived_at IS NULL`,
120
+ )
121
+ .get(vssRow.rowid) as
122
+ | { id: string; trust_tier: number; package_path: string | null }
123
+ | undefined;
124
+
125
+ if (!entity) continue;
126
+ if (opts?.paranoid && entity.trust_tier === 4) continue;
127
+ if (opts?.packagePath && entity.package_path !== opts.packagePath) continue;
128
+
129
+ results.push({ entityId: entity.id, score });
130
+ }
131
+
132
+ results.sort((a, b) => b.score - a.score);
133
+ return results.slice(0, limit);
134
+ } catch {
135
+ // VSS extension not loaded or table doesn't exist — fall through to brute-force
136
+ return null;
137
+ }
138
+ }
139
+
140
+ /**
141
+ * Brute-force cosine similarity scan.
142
+ *
143
+ * Queries up to BRUTE_FORCE_LIMIT entities that have a non-NULL embedding,
144
+ * computes cosine similarity against the query embedding, filters by
145
+ * threshold and optional constraints, then returns sorted top-N results.
146
+ */
147
+ async function bruteForceCosineSearch(
148
+ db: SiaDb,
149
+ queryEmbedding: Float32Array,
150
+ limit: number,
151
+ opts?: VectorSearchOpts,
152
+ ): Promise<VectorResult[]> {
153
+ // Build WHERE clauses
154
+ const clauses: string[] = [
155
+ "embedding IS NOT NULL",
156
+ "t_valid_until IS NULL",
157
+ "archived_at IS NULL",
158
+ ];
159
+ const params: unknown[] = [];
160
+
161
+ if (opts?.paranoid) {
162
+ clauses.push("trust_tier != 4");
163
+ }
164
+ if (opts?.packagePath) {
165
+ clauses.push("package_path = ?");
166
+ params.push(opts.packagePath);
167
+ }
168
+
169
+ params.push(BRUTE_FORCE_LIMIT);
170
+
171
+ const sql = `SELECT id, embedding FROM graph_nodes WHERE ${clauses.join(" AND ")} LIMIT ?`;
172
+ const { rows } = await db.execute(sql, params);
173
+
174
+ const results: VectorResult[] = [];
175
+
176
+ for (const row of rows) {
177
+ const embeddingBlob = row.embedding;
178
+ if (!embeddingBlob) continue;
179
+
180
+ // Convert stored BLOB to Float32Array
181
+ let storedEmbedding: Float32Array;
182
+ if (embeddingBlob instanceof Buffer || embeddingBlob instanceof Uint8Array) {
183
+ storedEmbedding = new Float32Array(
184
+ (embeddingBlob as Uint8Array).buffer,
185
+ (embeddingBlob as Uint8Array).byteOffset,
186
+ (embeddingBlob as Uint8Array).byteLength / 4,
187
+ );
188
+ } else if (embeddingBlob instanceof ArrayBuffer) {
189
+ storedEmbedding = new Float32Array(embeddingBlob);
190
+ } else {
191
+ // Unexpected type — skip
192
+ continue;
193
+ }
194
+
195
+ const score = cosineSim(queryEmbedding, storedEmbedding);
196
+ if (score < SIMILARITY_THRESHOLD) continue;
197
+
198
+ results.push({ entityId: row.id as string, score });
199
+ }
200
+
201
+ results.sort((a, b) => b.score - a.score);
202
+ return results.slice(0, limit);
203
+ }
@@ -0,0 +1,130 @@
1
+ // Module: workspace-search — Workspace-scoped search via ATTACH
2
+
3
+ import type { SiaDb } from "@/graph/db-interface";
4
+ import type { SiaSearchResult } from "@/mcp/tools/sia-search";
5
+ import { SIA_HOME } from "@/shared/config";
6
+ import { getPeerRepos } from "@/workspace/cross-repo";
7
+
8
+ /** Options for workspaceSearch. */
9
+ export interface WorkspaceSearchOpts {
10
+ primaryDb: SiaDb;
11
+ metaDb: SiaDb;
12
+ bridgeDb: SiaDb;
13
+ workspaceId: string;
14
+ primaryRepoId: string;
15
+ query: string;
16
+ siaHome?: string;
17
+ limit?: number;
18
+ paranoid?: boolean;
19
+ node_types?: string[];
20
+ package_path?: string;
21
+ }
22
+
23
+ /** Result of a workspace search. */
24
+ export interface WorkspaceSearchResult {
25
+ entities: SiaSearchResult[];
26
+ missingRepos: string[];
27
+ }
28
+
29
+ /** Max peer repos to ATTACH (SQLite limit of 10 - main - bridge = 8) */
30
+ const MAX_PEERS = 8;
31
+
32
+ /**
33
+ * Perform workspace-scoped search across primary + peer repo databases.
34
+ *
35
+ * ATTACHes one peer at a time (ATTACH, query, DETACH, next) to stay safely
36
+ * within SQLite limits and simplify error handling. Results are merged and
37
+ * re-sorted after all peers are queried.
38
+ *
39
+ * Missing peers produce metadata entries, not errors.
40
+ * Does NOT set WAL pragma on attached read-only databases.
41
+ */
42
+ export async function workspaceSearch(opts: WorkspaceSearchOpts): Promise<WorkspaceSearchResult> {
43
+ const siaHome = opts.siaHome ?? SIA_HOME;
44
+ const limit = opts.limit ?? 15;
45
+ const missingRepos: string[] = [];
46
+
47
+ // Get peers from meta.db
48
+ const allPeers = await getPeerRepos(opts.metaDb, opts.workspaceId, opts.primaryRepoId, siaHome);
49
+
50
+ // Cap at MAX_PEERS
51
+ const peers = allPeers.slice(0, MAX_PEERS);
52
+ if (allPeers.length > MAX_PEERS) {
53
+ for (let i = MAX_PEERS; i < allPeers.length; i++) {
54
+ missingRepos.push(allPeers[i].name ?? allPeers[i].repoId);
55
+ }
56
+ }
57
+
58
+ // Build WHERE clause
59
+ const clauses: string[] = ["t_valid_until IS NULL", "archived_at IS NULL"];
60
+ if (opts.paranoid) clauses.push("trust_tier != 4");
61
+ if (opts.node_types && opts.node_types.length > 0) {
62
+ const placeholders = opts.node_types.map(() => "?").join(", ");
63
+ clauses.push(`type IN (${placeholders})`);
64
+ }
65
+ if (opts.package_path) {
66
+ clauses.push("package_path = ?");
67
+ }
68
+ const whereClause = clauses.join(" AND ");
69
+
70
+ // Build params for WHERE clause (without limit)
71
+ const filterParams: unknown[] = [];
72
+ if (opts.node_types) filterParams.push(...opts.node_types);
73
+ if (opts.package_path) filterParams.push(opts.package_path);
74
+
75
+ // Query primary
76
+ const allEntities: SiaSearchResult[] = [];
77
+ const primarySql = `SELECT * FROM graph_nodes WHERE ${whereClause} ORDER BY importance DESC LIMIT ?`;
78
+ const primaryResult = await opts.primaryDb.execute(primarySql, [...filterParams, limit]);
79
+
80
+ for (const row of primaryResult.rows) {
81
+ allEntities.push(mapRow(row, null));
82
+ }
83
+
84
+ // Query each peer via ATTACH
85
+ for (const peer of peers) {
86
+ try {
87
+ await opts.primaryDb.execute("ATTACH DATABASE ? AS peer_db", [peer.graphDbPath]);
88
+
89
+ const peerSql = `SELECT * FROM peer_db.graph_nodes WHERE ${whereClause} ORDER BY importance DESC LIMIT ?`;
90
+ const peerResult = await opts.primaryDb.execute(peerSql, [...filterParams, limit]);
91
+
92
+ for (const row of peerResult.rows) {
93
+ allEntities.push(mapRow(row, peer.name));
94
+ }
95
+
96
+ await opts.primaryDb.execute("DETACH DATABASE peer_db", []);
97
+ } catch {
98
+ missingRepos.push(peer.name ?? peer.repoId);
99
+ try {
100
+ await opts.primaryDb.execute("DETACH DATABASE peer_db", []);
101
+ } catch {
102
+ /* already detached or never attached */
103
+ }
104
+ }
105
+ }
106
+
107
+ // Sort all by importance DESC, take top `limit`
108
+ allEntities.sort((a, b) => b.importance - a.importance);
109
+ const capped = allEntities.slice(0, limit);
110
+
111
+ return { entities: capped, missingRepos };
112
+ }
113
+
114
+ function mapRow(row: Record<string, unknown>, sourceRepoName: string | null): SiaSearchResult {
115
+ return {
116
+ id: row.id as string,
117
+ type: row.type as string,
118
+ name: row.name as string,
119
+ summary: (row.summary as string) ?? "",
120
+ content: (row.content as string) ?? "",
121
+ trust_tier: row.trust_tier as number,
122
+ confidence: row.confidence as number,
123
+ importance: row.importance as number,
124
+ tags: (row.tags as string) ?? "[]",
125
+ file_paths: (row.file_paths as string) ?? "[]",
126
+ conflict_group_id: (row.conflict_group_id as string | null) ?? null,
127
+ t_valid_from: (row.t_valid_from as number | null) ?? null,
128
+ source_repo_name: sourceRepoName,
129
+ };
130
+ }
@@ -0,0 +1,285 @@
1
+ // Module: context-mode — Large output chunking with intent-based retrieval using strategy pattern
2
+
3
+ import { randomUUID } from "node:crypto";
4
+ import type { Embedder } from "@/capture/embedder";
5
+ import type { SiaDb } from "@/graph/db-interface";
6
+
7
+ // ---------------------------------------------------------------------------
8
+ // Public interfaces
9
+ // ---------------------------------------------------------------------------
10
+
11
+ export interface RawChunk {
12
+ text: string;
13
+ metadata?: Record<string, unknown>;
14
+ }
15
+
16
+ export interface StoredChunk {
17
+ id: string;
18
+ text: string;
19
+ embedding: number[];
20
+ nodeId: string; // ContentChunk entity ID in graph
21
+ }
22
+
23
+ export interface ChunkStrategy {
24
+ name: string;
25
+ chunk(content: string): RawChunk[];
26
+ extraEdges?(chunk: StoredChunk, db: SiaDb): Promise<void>;
27
+ }
28
+
29
+ export interface ContextModeResult {
30
+ applied: boolean;
31
+ chunks: string[];
32
+ totalIndexed: number;
33
+ contextSavings: number;
34
+ }
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // headingChunker strategy — splits markdown by heading lines outside code blocks
38
+ // ---------------------------------------------------------------------------
39
+
40
+ export const headingChunker: ChunkStrategy = {
41
+ name: "headingChunker",
42
+
43
+ chunk(content: string): RawChunk[] {
44
+ const lines = content.split("\n");
45
+ const chunks: RawChunk[] = [];
46
+ let inCodeBlock = false;
47
+ let currentHeading = "";
48
+ let currentLines: string[] = [];
49
+
50
+ const flush = () => {
51
+ if (currentLines.length > 0) {
52
+ chunks.push({
53
+ text: currentLines.join("\n"),
54
+ metadata: { heading: currentHeading },
55
+ });
56
+ }
57
+ };
58
+
59
+ for (const line of lines) {
60
+ // Track code fence state
61
+ if (line.trimStart().startsWith("```")) {
62
+ inCodeBlock = !inCodeBlock;
63
+ currentLines.push(line);
64
+ continue;
65
+ }
66
+
67
+ // Only split on headings outside code blocks
68
+ if (!inCodeBlock && /^#{1,6} /.test(line)) {
69
+ flush();
70
+ currentHeading = line;
71
+ currentLines = [line];
72
+ } else {
73
+ currentLines.push(line);
74
+ }
75
+ }
76
+
77
+ flush();
78
+
79
+ // If no headings were found, return single chunk
80
+ if (chunks.length === 0) {
81
+ return [{ text: content, metadata: { heading: "" } }];
82
+ }
83
+
84
+ return chunks;
85
+ },
86
+ };
87
+
88
+ // ---------------------------------------------------------------------------
89
+ // lineChunker strategy — groups newline-delimited lines into ~512-token (~2048 char) paragraphs
90
+ // ---------------------------------------------------------------------------
91
+
92
+ const LINE_CHUNK_SIZE = 2048;
93
+
94
+ export const lineChunker: ChunkStrategy = {
95
+ name: "lineChunker",
96
+
97
+ chunk(content: string): RawChunk[] {
98
+ const lines = content.split("\n");
99
+ const chunks: RawChunk[] = [];
100
+ let current = "";
101
+ let currentStart = 0;
102
+
103
+ for (let i = 0; i < lines.length; i++) {
104
+ const line = lines[i];
105
+ const candidate = current.length === 0 ? line : `${current}\n${line}`;
106
+
107
+ if (candidate.length > LINE_CHUNK_SIZE && current.length > 0) {
108
+ chunks.push({
109
+ text: current,
110
+ metadata: { startLine: currentStart, endLine: i - 1 },
111
+ });
112
+ current = line;
113
+ currentStart = i;
114
+ } else {
115
+ current = candidate;
116
+ }
117
+ }
118
+
119
+ if (current.length > 0) {
120
+ chunks.push({
121
+ text: current,
122
+ metadata: { startLine: currentStart, endLine: lines.length - 1 },
123
+ });
124
+ }
125
+
126
+ return chunks;
127
+ },
128
+ };
129
+
130
+ // ---------------------------------------------------------------------------
131
+ // contentTypeChunker strategy — detects content type and delegates to the right chunker
132
+ // ---------------------------------------------------------------------------
133
+
134
+ export const contentTypeChunker: ChunkStrategy = {
135
+ name: "contentTypeChunker",
136
+
137
+ chunk(content: string): RawChunk[] {
138
+ const trimmed = content.trimStart();
139
+
140
+ // Detect JSON (starts with { or [)
141
+ if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
142
+ try {
143
+ const parsed = JSON.parse(content);
144
+ const formatted = JSON.stringify(parsed, null, 2);
145
+ return lineChunker.chunk(formatted);
146
+ } catch {
147
+ // Not valid JSON (possibly truncated by output cap) — fall through to heading/line detection
148
+ }
149
+ }
150
+
151
+ // Check for markdown headings
152
+ if (/^#{1,6} /m.test(content)) {
153
+ return headingChunker.chunk(content);
154
+ }
155
+
156
+ // Default: lineChunker
157
+ return lineChunker.chunk(content);
158
+ },
159
+ };
160
+
161
+ // ---------------------------------------------------------------------------
162
+ // Cosine similarity between two number arrays
163
+ // ---------------------------------------------------------------------------
164
+
165
+ function cosineSimilarity(a: number[], b: number[]): number {
166
+ if (a.length === 0 || b.length === 0) return 0;
167
+ let dot = 0;
168
+ let normA = 0;
169
+ let normB = 0;
170
+ const len = Math.min(a.length, b.length);
171
+ for (let i = 0; i < len; i++) {
172
+ dot += a[i] * b[i];
173
+ normA += a[i] * a[i];
174
+ normB += b[i] * b[i];
175
+ }
176
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
177
+ return denom === 0 ? 0 : dot / denom;
178
+ }
179
+
180
+ // ---------------------------------------------------------------------------
181
+ // applyContextMode — main entry point
182
+ // ---------------------------------------------------------------------------
183
+
184
+ /**
185
+ * Apply context mode to a large content string.
186
+ *
187
+ * If content.length <= threshold OR intent is undefined, returns raw content unchanged.
188
+ * Otherwise, chunks the content via the strategy, embeds each chunk into the graph DB
189
+ * as ContentChunk entities, embeds the intent, and returns the top-K chunks by
190
+ * cosine similarity to the intent embedding.
191
+ */
192
+ export async function applyContextMode(
193
+ content: string,
194
+ intent: string | undefined,
195
+ strategy: ChunkStrategy,
196
+ db: SiaDb,
197
+ embedder: Embedder,
198
+ sessionId: string,
199
+ config: { threshold: number; topK: number },
200
+ ): Promise<ContextModeResult> {
201
+ // Short-circuit: below threshold or no intent
202
+ if (content.length <= config.threshold || intent === undefined) {
203
+ return {
204
+ applied: false,
205
+ chunks: [content],
206
+ totalIndexed: 0,
207
+ contextSavings: 0,
208
+ };
209
+ }
210
+
211
+ const now = Date.now();
212
+ const nowStr = String(now);
213
+
214
+ // 1. Chunk content via strategy
215
+ const rawChunks = strategy.chunk(content);
216
+
217
+ // 2. Embed each chunk and store as ContentChunk entity in the graph
218
+ const storedChunks: StoredChunk[] = [];
219
+
220
+ for (let i = 0; i < rawChunks.length; i++) {
221
+ const raw = rawChunks[i];
222
+ const nodeId = randomUUID();
223
+ const chunkName = `chunk-${sessionId}-${i}`;
224
+
225
+ // Embed the chunk text
226
+ const rawEmb = await embedder.embed(raw.text);
227
+ const embedding: number[] = rawEmb ? Array.from(rawEmb) : [];
228
+
229
+ // Store entity in graph DB (table is 'graph_nodes' after v5 migration)
230
+ await db.execute(
231
+ `INSERT INTO graph_nodes (id, type, name, summary, content, trust_tier, confidence, base_confidence, importance, base_importance, access_count, edge_count, tags, file_paths, t_created, t_valid_from, created_by, created_at, last_accessed)
232
+ VALUES (?, 'ContentChunk', ?, ?, ?, 3, 0.8, 0.8, 0.5, 0.5, 0, 0, '[]', '[]', ?, ?, 'sia-context-mode', ?, ?)`,
233
+ [nodeId, chunkName, raw.text.slice(0, 100), raw.text, nowStr, nowStr, nowStr, nowStr],
234
+ );
235
+
236
+ const stored: StoredChunk = {
237
+ id: randomUUID(),
238
+ text: raw.text,
239
+ embedding,
240
+ nodeId,
241
+ };
242
+
243
+ storedChunks.push(stored);
244
+
245
+ // Call extraEdges if defined
246
+ if (strategy.extraEdges) {
247
+ await strategy.extraEdges(stored, db);
248
+ }
249
+ }
250
+
251
+ // 3. Embed the intent
252
+ const intentEmbRaw = await embedder.embed(intent);
253
+ if (!intentEmbRaw) {
254
+ // Embedder failed — cannot do intent-based retrieval, return all chunks
255
+ return {
256
+ applied: true,
257
+ chunks: storedChunks.map((s) => s.text).slice(0, config.topK),
258
+ totalIndexed: storedChunks.length,
259
+ contextSavings:
260
+ content.length -
261
+ storedChunks.slice(0, config.topK).reduce((sum, s) => sum + s.text.length, 0),
262
+ };
263
+ }
264
+ const intentEmbedding: number[] = Array.from(intentEmbRaw);
265
+
266
+ // 4. Cosine similarity between intent embedding and each stored chunk embedding
267
+ const scored = storedChunks.map((chunk) => ({
268
+ chunk,
269
+ score: cosineSimilarity(intentEmbedding, chunk.embedding),
270
+ }));
271
+
272
+ // Sort by similarity descending, take top-K
273
+ scored.sort((a, b) => b.score - a.score);
274
+ const topChunks = scored.slice(0, config.topK).map((s) => s.chunk.text);
275
+
276
+ const totalIndexed = storedChunks.length;
277
+ const contextSavings = content.length - topChunks.reduce((sum, c) => sum + c.length, 0);
278
+
279
+ return {
280
+ applied: true,
281
+ chunks: topChunks,
282
+ totalIndexed,
283
+ contextSavings: Math.max(0, contextSavings),
284
+ };
285
+ }
@@ -0,0 +1,55 @@
1
+ // Module: credential-pass — Build allowlisted env for sandbox subprocesses
2
+
3
+ /** Exact env var names that always pass through. */
4
+ const EXACT_ALLOWLIST = [
5
+ "PATH",
6
+ "HOME",
7
+ "USER",
8
+ "SHELL",
9
+ "LANG",
10
+ "TERM",
11
+ "KUBECONFIG",
12
+ "GH_TOKEN",
13
+ "GITHUB_TOKEN",
14
+ "NODE_PATH",
15
+ "BUN_INSTALL",
16
+ ] as const;
17
+
18
+ /** Glob prefixes — any env var starting with these passes through. */
19
+ const PREFIX_ALLOWLIST = ["AWS_", "GOOGLE_", "GCLOUD_", "CLOUDSDK_", "DOCKER_", "GITHUB_"] as const;
20
+
21
+ /** Exported for test assertions. */
22
+ export const ENV_ALLOWLIST = { exact: EXACT_ALLOWLIST, prefixes: PREFIX_ALLOWLIST };
23
+
24
+ function isAllowlisted(key: string): boolean {
25
+ if ((EXACT_ALLOWLIST as readonly string[]).includes(key)) return true;
26
+ return PREFIX_ALLOWLIST.some((prefix) => key.startsWith(prefix));
27
+ }
28
+
29
+ /**
30
+ * Build a filtered env object for sandbox subprocess execution.
31
+ * Only allowlisted env vars from process.env pass through.
32
+ * `overrides` are merged last — user-provided values win.
33
+ * Never logs or persists any env values.
34
+ */
35
+ export function buildSandboxEnv(overrides?: Record<string, string>): Record<string, string> {
36
+ const env: Record<string, string> = {};
37
+
38
+ for (const [key, value] of Object.entries(process.env)) {
39
+ if (value !== undefined && isAllowlisted(key)) {
40
+ env[key] = value;
41
+ }
42
+ }
43
+
44
+ if (overrides) {
45
+ for (const [key, value] of Object.entries(overrides)) {
46
+ if (isAllowlisted(key)) {
47
+ env[key] = value;
48
+ } else {
49
+ console.warn(`[sia-sandbox] env override "${key}" dropped: not in allowlist`);
50
+ }
51
+ }
52
+ }
53
+
54
+ return env;
55
+ }