mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,708 @@
1
+ /**
2
+ * Vector store using hnswlib-node
3
+ *
4
+ * Supports both legacy (flat) and namespaced storage layouts:
5
+ * - Legacy: .mdcontext/vectors.bin, .mdcontext/vectors.meta.bin
6
+ * - Namespaced: .mdcontext/embeddings/{namespace}/vectors.bin, vectors.meta.bin
7
+ *
8
+ * New indexes are written using namespaced storage. Existing legacy indexes
9
+ * continue to be loaded from their original flat locations; this module does
10
+ * not perform automatic migration between layouts.
11
+ */
12
+
13
+ import * as fs from 'node:fs/promises'
14
+ import * as path from 'node:path'
15
+ import * as msgpack from '@msgpack/msgpack'
16
+ import { Effect } from 'effect'
17
+ import HierarchicalNSW from 'hnswlib-node'
18
+ import { DimensionMismatchError, VectorStoreError } from '../errors/index.js'
19
+ import { INDEX_DIR } from '../index/types.js'
20
+ import {
21
+ generateNamespace,
22
+ getNamespaceDir,
23
+ getMetaPath as getNamespacedMetaPath,
24
+ getVectorPath as getNamespacedVectorPath,
25
+ } from './embedding-namespace.js'
26
+ import type { VectorEntry, VectorIndex } from './types.js'
27
+
28
+ // ============================================================================
29
+ // Constants
30
+ // ============================================================================
31
+
32
+ const VECTOR_INDEX_FILE = 'vectors.bin'
33
+ const VECTOR_META_FILE = 'vectors.meta.bin'
34
+ const INDEX_VERSION = 1
35
+
36
+ // ============================================================================
37
+ // Vector Store
38
+ // ============================================================================
39
+
40
+ export interface VectorSearchOptions {
41
+ /** efSearch parameter for HNSW (controls recall/speed tradeoff, default: 100) */
42
+ readonly efSearch?: number | undefined
43
+ }
44
+
45
+ export interface VectorStore {
46
+ readonly rootPath: string
47
+ readonly dimensions: number
48
+ add(entries: VectorEntry[]): Effect.Effect<void, VectorStoreError>
49
+ search(
50
+ vector: number[],
51
+ limit: number,
52
+ threshold?: number,
53
+ options?: VectorSearchOptions,
54
+ ): Effect.Effect<VectorSearchResult[], VectorStoreError>
55
+ /**
56
+ * Search with additional stats about below-threshold results.
57
+ * Used to provide feedback when 0 results pass the threshold.
58
+ */
59
+ searchWithStats(
60
+ vector: number[],
61
+ limit: number,
62
+ threshold?: number,
63
+ options?: VectorSearchOptions,
64
+ ): Effect.Effect<VectorSearchResultWithStats, VectorStoreError>
65
+ save(): Effect.Effect<void, VectorStoreError>
66
+ /**
67
+ * Load the vector store from disk.
68
+ *
69
+ * @returns VectorStoreLoadResult with loaded status and any warnings
70
+ * @throws DimensionMismatchError if the stored dimensions don't match current provider
71
+ */
72
+ load(): Effect.Effect<
73
+ VectorStoreLoadResult,
74
+ VectorStoreError | DimensionMismatchError
75
+ >
76
+ getStats(): VectorStoreStats
77
+ }
78
+
79
+ export interface VectorSearchResult {
80
+ readonly id: string
81
+ readonly sectionId: string
82
+ readonly documentPath: string
83
+ readonly heading: string
84
+ readonly similarity: number
85
+ }
86
+
87
+ /**
88
+ * Extended search result with metadata about below-threshold results.
89
+ * Used to provide user feedback when 0 results pass the threshold.
90
+ */
91
+ export interface VectorSearchResultWithStats {
92
+ readonly results: VectorSearchResult[]
93
+ /** Number of results that were found but below threshold */
94
+ readonly belowThresholdCount: number
95
+ /** Highest similarity score among below-threshold results (if any) */
96
+ readonly belowThresholdHighest: number | null
97
+ }
98
+
99
+ export interface VectorStoreStats {
100
+ readonly count: number
101
+ readonly dimensions: number
102
+ readonly provider: string
103
+ readonly providerModel?: string | undefined
104
+ readonly totalCost: number
105
+ readonly totalTokens: number
106
+ }
107
+
108
+ /**
109
+ * Result of loading a vector store, including any warnings about config mismatches.
110
+ */
111
+ export interface VectorStoreLoadResult {
112
+ /** Whether the index was loaded successfully */
113
+ readonly loaded: boolean
114
+ /** Warning about HNSW parameter mismatch (if any) */
115
+ readonly hnswMismatch?: HnswMismatchWarning | undefined
116
+ }
117
+
118
+ /**
119
+ * Warning when HNSW parameters in config differ from stored index parameters.
120
+ * The index was built with different parameters than currently configured.
121
+ */
122
+ export interface HnswMismatchWarning {
123
+ /** Current config values */
124
+ readonly configParams: { m: number; efConstruction: number }
125
+ /** Values stored in the index */
126
+ readonly indexParams: { m: number; efConstruction: number }
127
+ }
128
+
129
+ // ============================================================================
130
+ // Implementation
131
+ // ============================================================================
132
+
133
+ class HnswVectorStore implements VectorStore {
134
+ readonly rootPath: string
135
+ readonly dimensions: number
136
+
137
+ private index: HierarchicalNSW.HierarchicalNSW | null = null
138
+ private entries: Map<number, VectorEntry> = new Map()
139
+ private idToIndex: Map<string, number> = new Map()
140
+ private nextIndex = 0
141
+ private provider = 'unknown'
142
+ private providerModel: string | undefined = undefined
143
+ private providerBaseURL: string | undefined = undefined
144
+ private totalCost = 0
145
+ private totalTokens = 0
146
+
147
+ // HNSW build parameters
148
+ private readonly hnswM: number
149
+ private readonly hnswEfConstruction: number
150
+
151
+ // Namespace support - when set, uses namespaced storage paths
152
+ private namespace: string | undefined = undefined
153
+
154
+ constructor(
155
+ rootPath: string,
156
+ dimensions: number,
157
+ hnswOptions?: HnswBuildOptions,
158
+ ) {
159
+ this.rootPath = path.resolve(rootPath)
160
+ this.dimensions = dimensions
161
+ this.hnswM = hnswOptions?.m ?? 16
162
+ this.hnswEfConstruction = hnswOptions?.efConstruction ?? 200
163
+ }
164
+
165
+ /**
166
+ * Set the namespace for this vector store.
167
+ * When set, all storage operations use the namespaced path.
168
+ */
169
+ setNamespace(namespace: string): void {
170
+ this.namespace = namespace
171
+ }
172
+
173
+ /**
174
+ * Get the current namespace (if any).
175
+ */
176
+ getNamespace(): string | undefined {
177
+ return this.namespace
178
+ }
179
+
180
+ /**
181
+ * Get the index directory path.
182
+ * Returns namespaced path if namespace is set, otherwise legacy path.
183
+ */
184
+ private getIndexDir(): string {
185
+ if (this.namespace) {
186
+ return getNamespaceDir(this.rootPath, this.namespace)
187
+ }
188
+ return path.join(this.rootPath, INDEX_DIR)
189
+ }
190
+
191
+ /**
192
+ * Get the vector index file path.
193
+ */
194
+ private getVectorPath(): string {
195
+ if (this.namespace) {
196
+ return getNamespacedVectorPath(this.rootPath, this.namespace)
197
+ }
198
+ return path.join(this.rootPath, INDEX_DIR, VECTOR_INDEX_FILE)
199
+ }
200
+
201
+ /**
202
+ * Get the metadata file path.
203
+ */
204
+ private getMetaPath(): string {
205
+ if (this.namespace) {
206
+ return getNamespacedMetaPath(this.rootPath, this.namespace)
207
+ }
208
+ return path.join(this.rootPath, INDEX_DIR, VECTOR_META_FILE)
209
+ }
210
+
211
+ private ensureIndex(): HierarchicalNSW.HierarchicalNSW {
212
+ if (!this.index) {
213
+ // Initialize with space for 10000 items, will resize as needed
214
+ this.index = new HierarchicalNSW.HierarchicalNSW(
215
+ 'cosine',
216
+ this.dimensions,
217
+ )
218
+ // Use configured HNSW parameters (M, efConstruction, randomSeed)
219
+ this.index.initIndex(10000, this.hnswM, this.hnswEfConstruction, 100)
220
+ }
221
+ return this.index
222
+ }
223
+
224
+ add(entries: VectorEntry[]): Effect.Effect<void, VectorStoreError> {
225
+ return Effect.try({
226
+ try: () => {
227
+ const index = this.ensureIndex()
228
+
229
+ for (const entry of entries) {
230
+ // Skip if already exists
231
+ if (this.idToIndex.has(entry.id)) {
232
+ continue
233
+ }
234
+
235
+ const idx = this.nextIndex++
236
+
237
+ // Resize if needed
238
+ if (idx >= index.getMaxElements()) {
239
+ index.resizeIndex(index.getMaxElements() * 2)
240
+ }
241
+
242
+ index.addPoint(entry.embedding as number[], idx)
243
+ this.entries.set(idx, entry)
244
+ this.idToIndex.set(entry.id, idx)
245
+ }
246
+ },
247
+ catch: (e) =>
248
+ new VectorStoreError({
249
+ operation: 'add',
250
+ message: e instanceof Error ? e.message : String(e),
251
+ cause: e,
252
+ }),
253
+ })
254
+ }
255
+
256
+ search(
257
+ vector: number[],
258
+ limit: number,
259
+ threshold = 0,
260
+ options?: VectorSearchOptions,
261
+ ): Effect.Effect<VectorSearchResult[], VectorStoreError> {
262
+ return Effect.try({
263
+ try: () => {
264
+ if (!this.index || this.entries.size === 0) {
265
+ return []
266
+ }
267
+
268
+ // Set efSearch if provided (controls recall/speed tradeoff)
269
+ if (options?.efSearch !== undefined) {
270
+ this.index.setEf(options.efSearch)
271
+ }
272
+
273
+ const result = this.index.searchKnn(
274
+ vector,
275
+ Math.min(limit, this.entries.size),
276
+ )
277
+ const results: VectorSearchResult[] = []
278
+
279
+ for (let i = 0; i < result.neighbors.length; i++) {
280
+ const idx = result.neighbors[i]
281
+ const distance = result.distances[i]
282
+
283
+ if (idx === undefined || distance === undefined) {
284
+ continue
285
+ }
286
+
287
+ // Convert distance to similarity (cosine distance to cosine similarity)
288
+ // hnswlib returns 1 - cosine_similarity for cosine space
289
+ const similarity = 1 - distance
290
+
291
+ if (similarity < threshold) {
292
+ continue
293
+ }
294
+
295
+ const entry = this.entries.get(idx)
296
+ if (entry) {
297
+ results.push({
298
+ id: entry.id,
299
+ sectionId: entry.sectionId,
300
+ documentPath: entry.documentPath,
301
+ heading: entry.heading,
302
+ similarity,
303
+ })
304
+ }
305
+ }
306
+
307
+ return results
308
+ },
309
+ catch: (e) =>
310
+ new VectorStoreError({
311
+ operation: 'search',
312
+ message: e instanceof Error ? e.message : String(e),
313
+ cause: e,
314
+ }),
315
+ })
316
+ }
317
+
318
+ searchWithStats(
319
+ vector: number[],
320
+ limit: number,
321
+ threshold = 0,
322
+ options?: VectorSearchOptions,
323
+ ): Effect.Effect<VectorSearchResultWithStats, VectorStoreError> {
324
+ return Effect.try({
325
+ try: () => {
326
+ if (!this.index || this.entries.size === 0) {
327
+ return {
328
+ results: [],
329
+ belowThresholdCount: 0,
330
+ belowThresholdHighest: null,
331
+ }
332
+ }
333
+
334
+ // Set efSearch if provided (controls recall/speed tradeoff)
335
+ if (options?.efSearch !== undefined) {
336
+ this.index.setEf(options.efSearch)
337
+ }
338
+
339
+ const result = this.index.searchKnn(
340
+ vector,
341
+ Math.min(limit, this.entries.size),
342
+ )
343
+ const results: VectorSearchResult[] = []
344
+ let belowThresholdCount = 0
345
+ let belowThresholdHighest: number | null = null
346
+
347
+ for (let i = 0; i < result.neighbors.length; i++) {
348
+ const idx = result.neighbors[i]
349
+ const distance = result.distances[i]
350
+
351
+ if (idx === undefined || distance === undefined) {
352
+ continue
353
+ }
354
+
355
+ // Convert distance to similarity (cosine distance to cosine similarity)
356
+ // hnswlib returns 1 - cosine_similarity for cosine space
357
+ const similarity = 1 - distance
358
+
359
+ const entry = this.entries.get(idx)
360
+ if (!entry) continue
361
+
362
+ if (similarity < threshold) {
363
+ // Track below-threshold stats
364
+ belowThresholdCount++
365
+ if (
366
+ belowThresholdHighest === null ||
367
+ similarity > belowThresholdHighest
368
+ ) {
369
+ belowThresholdHighest = similarity
370
+ }
371
+ continue
372
+ }
373
+
374
+ results.push({
375
+ id: entry.id,
376
+ sectionId: entry.sectionId,
377
+ documentPath: entry.documentPath,
378
+ heading: entry.heading,
379
+ similarity,
380
+ })
381
+ }
382
+
383
+ return {
384
+ results,
385
+ belowThresholdCount,
386
+ belowThresholdHighest,
387
+ }
388
+ },
389
+ catch: (e) =>
390
+ new VectorStoreError({
391
+ operation: 'search',
392
+ message: e instanceof Error ? e.message : String(e),
393
+ cause: e,
394
+ }),
395
+ })
396
+ }
397
+
398
+ save(): Effect.Effect<void, VectorStoreError> {
399
+ return Effect.gen(
400
+ function* (this: HnswVectorStore) {
401
+ if (!this.index) {
402
+ return
403
+ }
404
+
405
+ const indexDir = this.getIndexDir()
406
+ yield* Effect.tryPromise({
407
+ try: () => fs.mkdir(indexDir, { recursive: true }),
408
+ catch: (e) =>
409
+ new VectorStoreError({
410
+ operation: 'save',
411
+ message: `Failed to create directory: ${e instanceof Error ? e.message : String(e)}`,
412
+ cause: e,
413
+ }),
414
+ })
415
+
416
+ // Save the hnswlib index
417
+ yield* Effect.tryPromise({
418
+ try: () => this.index!.writeIndex(this.getVectorPath()),
419
+ catch: (e) =>
420
+ new VectorStoreError({
421
+ operation: 'save',
422
+ message: `Failed to write index: ${e instanceof Error ? e.message : String(e)}`,
423
+ cause: e,
424
+ }),
425
+ })
426
+
427
+ // Save metadata
428
+ const meta: VectorIndex = {
429
+ version: INDEX_VERSION,
430
+ provider: this.provider,
431
+ providerModel: this.providerModel,
432
+ providerBaseURL: this.providerBaseURL,
433
+ dimensions: this.dimensions,
434
+ entries: Object.fromEntries(
435
+ Array.from(this.entries.entries()).map(([idx, entry]) => [
436
+ idx.toString(),
437
+ entry,
438
+ ]),
439
+ ),
440
+ totalCost: this.totalCost,
441
+ totalTokens: this.totalTokens,
442
+ createdAt: new Date().toISOString(),
443
+ updatedAt: new Date().toISOString(),
444
+ // Store HNSW build parameters for validation on load
445
+ hnswParams: {
446
+ m: this.hnswM,
447
+ efConstruction: this.hnswEfConstruction,
448
+ },
449
+ }
450
+
451
+ yield* Effect.tryPromise({
452
+ try: async () => {
453
+ // Size validation
454
+ const estimatedSize = this.entries.size * 15000
455
+ if (estimatedSize > 100_000_000) {
456
+ console.warn(
457
+ `Large metadata detected: ~${(estimatedSize / 1e6).toFixed(0)}MB. ` +
458
+ `Consider indexing subdirectories separately.`,
459
+ )
460
+ }
461
+
462
+ // Encode with MessagePack and write
463
+ const encoded = msgpack.encode(meta)
464
+ await fs.writeFile(this.getMetaPath(), encoded)
465
+ },
466
+ catch: (e) =>
467
+ new VectorStoreError({
468
+ operation: 'save',
469
+ message: `Failed to write metadata: ${e instanceof Error ? e.message : String(e)}`,
470
+ cause: e,
471
+ }),
472
+ })
473
+ }.bind(this),
474
+ )
475
+ }
476
+
477
+ load(): Effect.Effect<
478
+ VectorStoreLoadResult,
479
+ VectorStoreError | DimensionMismatchError
480
+ > {
481
+ return Effect.gen(
482
+ function* (this: HnswVectorStore) {
483
+ const vectorPath = this.getVectorPath()
484
+ const metaPath = this.getMetaPath()
485
+
486
+ // Check if files exist - catch file not found gracefully
487
+ // For metadata, check both binary (.bin) and JSON (.json) for migration
488
+ const filesExist = yield* Effect.tryPromise({
489
+ try: async () => {
490
+ await fs.access(vectorPath)
491
+ // Check if either binary or JSON metadata exists
492
+ try {
493
+ await fs.access(metaPath)
494
+ return true
495
+ } catch {
496
+ const jsonPath = metaPath.replace('.bin', '.json')
497
+ await fs.access(jsonPath)
498
+ return true
499
+ }
500
+ },
501
+ catch: () =>
502
+ new VectorStoreError({
503
+ operation: 'load',
504
+ message: 'Files not found',
505
+ }),
506
+ }).pipe(
507
+ Effect.catchTag('VectorStoreError', () => Effect.succeed(false)),
508
+ )
509
+
510
+ if (!filesExist) {
511
+ return { loaded: false }
512
+ }
513
+
514
+ // Load metadata - try binary first, fall back to JSON for migration
515
+ const loadedMeta = yield* Effect.tryPromise({
516
+ try: async () => {
517
+ // Try binary format first (new)
518
+ try {
519
+ await fs.access(metaPath)
520
+ const buffer = await fs.readFile(metaPath)
521
+ return msgpack.decode(buffer) as VectorIndex
522
+ } catch {
523
+ // Fall back to JSON for migration (old)
524
+ const jsonPath = metaPath.replace('.bin', '.json')
525
+ try {
526
+ await fs.access(jsonPath)
527
+ const json = await fs.readFile(jsonPath, 'utf-8')
528
+ const meta = JSON.parse(json) as VectorIndex
529
+
530
+ // Auto-migrate to binary format (safe for concurrent access)
531
+ try {
532
+ const encoded = msgpack.encode(meta)
533
+ await fs.writeFile(metaPath, encoded)
534
+
535
+ // Remove old JSON file (ignore errors if already deleted by another process)
536
+ await fs.unlink(jsonPath).catch(() => {})
537
+ } catch {
538
+ // Migration failed, but we have the data - continue
539
+ }
540
+
541
+ return meta
542
+ } catch {
543
+ throw new Error('Metadata file not found')
544
+ }
545
+ }
546
+ },
547
+ catch: (e) =>
548
+ new VectorStoreError({
549
+ operation: 'load',
550
+ message: `Failed to read metadata: ${e instanceof Error ? e.message : String(e)}`,
551
+ cause: e,
552
+ }),
553
+ })
554
+
555
+ // Apply legacy index migration: default to 'openai' if provider is missing
556
+ const meta: VectorIndex = {
557
+ ...loadedMeta,
558
+ provider: loadedMeta.provider || 'openai',
559
+ }
560
+
561
+ // Verify dimensions match - fail with clear error if mismatch
562
+ if (meta.dimensions !== this.dimensions) {
563
+ return yield* Effect.fail(
564
+ new DimensionMismatchError({
565
+ corpusDimensions: meta.dimensions,
566
+ providerDimensions: this.dimensions,
567
+ corpusProvider: meta.providerModel
568
+ ? `${meta.provider}:${meta.providerModel}`
569
+ : meta.provider,
570
+ path: this.rootPath,
571
+ }),
572
+ )
573
+ }
574
+
575
+ // Load the hnswlib index
576
+ this.index = new HierarchicalNSW.HierarchicalNSW(
577
+ 'cosine',
578
+ this.dimensions,
579
+ )
580
+ yield* Effect.tryPromise({
581
+ try: () => this.index!.readIndex(vectorPath),
582
+ catch: (e) =>
583
+ new VectorStoreError({
584
+ operation: 'load',
585
+ message: `Failed to read index: ${e instanceof Error ? e.message : String(e)}`,
586
+ cause: e,
587
+ }),
588
+ })
589
+
590
+ // Restore entries
591
+ this.entries.clear()
592
+ this.idToIndex.clear()
593
+ this.nextIndex = 0
594
+
595
+ for (const [idxStr, entry] of Object.entries(meta.entries)) {
596
+ const idx = parseInt(idxStr, 10)
597
+ this.entries.set(idx, entry)
598
+ this.idToIndex.set(entry.id, idx)
599
+ this.nextIndex = Math.max(this.nextIndex, idx + 1)
600
+ }
601
+
602
+ this.provider = meta.provider
603
+ this.providerModel = meta.providerModel
604
+ this.providerBaseURL = meta.providerBaseURL
605
+ this.totalCost = meta.totalCost
606
+ this.totalTokens = meta.totalTokens
607
+
608
+ // Check for HNSW parameter mismatch
609
+ let hnswMismatch: HnswMismatchWarning | undefined
610
+ if (meta.hnswParams) {
611
+ const indexM = meta.hnswParams.m
612
+ const indexEf = meta.hnswParams.efConstruction
613
+ if (indexM !== this.hnswM || indexEf !== this.hnswEfConstruction) {
614
+ hnswMismatch = {
615
+ configParams: {
616
+ m: this.hnswM,
617
+ efConstruction: this.hnswEfConstruction,
618
+ },
619
+ indexParams: { m: indexM, efConstruction: indexEf },
620
+ }
621
+ }
622
+ }
623
+
624
+ return { loaded: true, hnswMismatch }
625
+ }.bind(this),
626
+ )
627
+ }
628
+
629
+ getStats(): VectorStoreStats {
630
+ return {
631
+ count: this.entries.size,
632
+ dimensions: this.dimensions,
633
+ provider: this.provider,
634
+ providerModel: this.providerModel,
635
+ totalCost: this.totalCost,
636
+ totalTokens: this.totalTokens,
637
+ }
638
+ }
639
+
640
+ setProvider(name: string, model?: string, baseURL?: string): void {
641
+ this.provider = name
642
+ this.providerModel = model
643
+ this.providerBaseURL = baseURL
644
+ }
645
+
646
+ addCost(cost: number, tokens: number): void {
647
+ this.totalCost += cost
648
+ this.totalTokens += tokens
649
+ }
650
+ }
651
+
652
+ // ============================================================================
653
+ // Factory
654
+ // ============================================================================
655
+
656
+ /**
657
+ * HNSW build parameters for index construction.
658
+ * These affect index quality and build time - changes require index rebuild.
659
+ */
660
+ export interface HnswBuildOptions {
661
+ /** Max connections per node (default: 16). Higher = better recall, larger index. */
662
+ readonly m?: number | undefined
663
+ /** Construction-time search width (default: 200). Higher = better quality, slower builds. */
664
+ readonly efConstruction?: number | undefined
665
+ }
666
+
667
+ /**
668
+ * Create a vector store for the given root path.
669
+ *
670
+ * @param rootPath - Root directory containing the index
671
+ * @param dimensions - Embedding dimensions
672
+ * @param hnswOptions - Optional HNSW build parameters
673
+ * @returns A new VectorStore instance
674
+ */
675
+ export const createVectorStore = (
676
+ rootPath: string,
677
+ dimensions: number,
678
+ hnswOptions?: HnswBuildOptions,
679
+ ): VectorStore => new HnswVectorStore(rootPath, dimensions, hnswOptions)
680
+
681
+ /**
682
+ * Create a namespaced vector store for a specific provider/model.
683
+ *
684
+ * Uses the new namespaced storage structure:
685
+ * .mdcontext/embeddings/{provider}_{model}_{dimensions}/vectors.bin
686
+ *
687
+ * @param rootPath - Root directory containing the index
688
+ * @param provider - Provider name (e.g., "openai", "voyage")
689
+ * @param model - Model name (e.g., "text-embedding-3-small")
690
+ * @param dimensions - Embedding dimensions
691
+ * @param hnswOptions - Optional HNSW build parameters
692
+ * @returns A new VectorStore instance with namespace set
693
+ */
694
+ export const createNamespacedVectorStore = (
695
+ rootPath: string,
696
+ provider: string,
697
+ model: string,
698
+ dimensions: number,
699
+ hnswOptions?: HnswBuildOptions,
700
+ ): VectorStore => {
701
+ const namespace = generateNamespace(provider, model, dimensions)
702
+ const store = new HnswVectorStore(rootPath, dimensions, hnswOptions)
703
+ store.setNamespace(namespace)
704
+ return store
705
+ }
706
+
707
+ // Export the class for type access
708
+ export { HnswVectorStore }