mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,366 @@
1
+ /**
2
+ * BM25 Index Store for keyword search
3
+ *
4
+ * Uses wink-bm25-text-search for efficient keyword matching.
5
+ * Index is persisted to .mdcontext/bm25.json for fast startup.
6
+ */
7
+
8
+ import * as fs from 'node:fs/promises'
9
+ import * as path from 'node:path'
10
+ import { Effect } from 'effect'
11
+ import bm25 from 'wink-bm25-text-search'
12
+ import { FileReadError, FileWriteError } from '../errors/index.js'
13
+ import { INDEX_DIR } from '../index/types.js'
14
+
15
+ // ============================================================================
16
+ // Types
17
+ // ============================================================================
18
+
19
+ export interface BM25Document {
20
+ readonly id: string
21
+ readonly sectionId: string
22
+ readonly documentPath: string
23
+ readonly heading: string
24
+ readonly content: string
25
+ }
26
+
27
+ export interface BM25SearchResult {
28
+ readonly sectionId: string
29
+ readonly documentPath: string
30
+ readonly heading: string
31
+ readonly score: number
32
+ readonly rank: number
33
+ }
34
+
35
+ export interface BM25Stats {
36
+ readonly count: number
37
+ readonly lastUpdated: string
38
+ }
39
+
40
+ interface BM25Metadata {
41
+ readonly version: number
42
+ readonly count: number
43
+ readonly lastUpdated: string
44
+ }
45
+
46
+ // ============================================================================
47
+ // Text Preparation
48
+ // ============================================================================
49
+
50
+ /**
51
+ * Simple tokenizer: lowercase, split on non-word chars, filter short tokens
52
+ */
53
+ const tokenize = (text: string): string[] => {
54
+ return text
55
+ .toLowerCase()
56
+ .split(/\W+/)
57
+ .filter((token) => token.length > 2)
58
+ }
59
+
60
+ // ============================================================================
61
+ // BM25 Store
62
+ // ============================================================================
63
+
64
+ export interface BM25Store {
65
+ /**
66
+ * Add documents to the index
67
+ */
68
+ add(docs: readonly BM25Document[]): Effect.Effect<void, never>
69
+
70
+ /**
71
+ * Consolidate the index (must be called after adding docs, before search)
72
+ */
73
+ consolidate(): Effect.Effect<void, never>
74
+
75
+ /**
76
+ * Search for matching documents
77
+ */
78
+ search(
79
+ query: string,
80
+ limit?: number,
81
+ ): Effect.Effect<readonly BM25SearchResult[], never>
82
+
83
+ /**
84
+ * Save the index to disk
85
+ */
86
+ save(): Effect.Effect<void, FileWriteError>
87
+
88
+ /**
89
+ * Load the index from disk
90
+ * @returns true if loaded successfully, false if no index exists
91
+ */
92
+ load(): Effect.Effect<boolean, FileReadError>
93
+
94
+ /**
95
+ * Get index statistics
96
+ */
97
+ getStats(): BM25Stats
98
+
99
+ /**
100
+ * Check if the index has been consolidated
101
+ */
102
+ isConsolidated(): boolean
103
+
104
+ /**
105
+ * Clear the index
106
+ */
107
+ clear(): void
108
+ }
109
+
110
+ /**
111
+ * Create a BM25 store for keyword search
112
+ */
113
+ export const createBM25Store = (rootPath: string): BM25Store => {
114
+ const resolvedRoot = path.resolve(rootPath)
115
+ const indexPath = path.join(resolvedRoot, INDEX_DIR, 'bm25.json')
116
+ const metadataPath = path.join(resolvedRoot, INDEX_DIR, 'bm25.meta.json')
117
+
118
+ // Store mapping from internal index to section info
119
+ const sectionMap: Map<
120
+ number,
121
+ { sectionId: string; documentPath: string; heading: string }
122
+ > = new Map()
123
+ let documentCount = 0
124
+ let consolidated = false
125
+ let lastUpdated = new Date().toISOString()
126
+
127
+ // Create BM25 engine
128
+ let engine = bm25()
129
+
130
+ // Configure with weights for heading vs content
131
+ engine.defineConfig({
132
+ fldWeights: {
133
+ heading: 2,
134
+ content: 1,
135
+ },
136
+ })
137
+
138
+ // Define tokenization
139
+ engine.definePrepTasks([tokenize])
140
+
141
+ return {
142
+ add(docs: readonly BM25Document[]): Effect.Effect<void, never> {
143
+ return Effect.sync(() => {
144
+ for (const doc of docs) {
145
+ const idx = documentCount++
146
+ sectionMap.set(idx, {
147
+ sectionId: doc.sectionId,
148
+ documentPath: doc.documentPath,
149
+ heading: doc.heading,
150
+ })
151
+ engine.addDoc(
152
+ {
153
+ heading: doc.heading,
154
+ content: doc.content,
155
+ },
156
+ idx,
157
+ )
158
+ }
159
+ consolidated = false
160
+ lastUpdated = new Date().toISOString()
161
+ })
162
+ },
163
+
164
+ consolidate(): Effect.Effect<void, never> {
165
+ return Effect.sync(() => {
166
+ if (!consolidated && documentCount > 0) {
167
+ engine.consolidate()
168
+ consolidated = true
169
+ }
170
+ })
171
+ },
172
+
173
+ search(
174
+ query: string,
175
+ limit = 10,
176
+ ): Effect.Effect<readonly BM25SearchResult[], never> {
177
+ return Effect.sync(() => {
178
+ if (!consolidated || documentCount === 0) {
179
+ return []
180
+ }
181
+
182
+ const results = engine.search(query, limit) as [number, number][]
183
+
184
+ return results.map(([idx, score], rank) => {
185
+ const info = sectionMap.get(idx)
186
+ return {
187
+ sectionId: info?.sectionId ?? '',
188
+ documentPath: info?.documentPath ?? '',
189
+ heading: info?.heading ?? '',
190
+ score,
191
+ rank: rank + 1,
192
+ }
193
+ })
194
+ })
195
+ },
196
+
197
+ save(): Effect.Effect<void, FileWriteError> {
198
+ return Effect.gen(function* () {
199
+ // Export BM25 index
200
+ const jsonModel = engine.exportJSON()
201
+
202
+ // Save section map as array for JSON serialization
203
+ const sectionMapArray = Array.from(sectionMap.entries())
204
+
205
+ const data = {
206
+ engine: jsonModel,
207
+ sectionMap: sectionMapArray,
208
+ }
209
+
210
+ const metadata: BM25Metadata = {
211
+ version: 1,
212
+ count: documentCount,
213
+ lastUpdated,
214
+ }
215
+
216
+ yield* Effect.tryPromise({
217
+ try: async () => {
218
+ await fs.writeFile(indexPath, JSON.stringify(data), 'utf-8')
219
+ await fs.writeFile(
220
+ metadataPath,
221
+ JSON.stringify(metadata, null, 2),
222
+ 'utf-8',
223
+ )
224
+ },
225
+ catch: (e) =>
226
+ new FileWriteError({
227
+ path: indexPath,
228
+ message: `Failed to save BM25 index: ${e instanceof Error ? e.message : String(e)}`,
229
+ }),
230
+ })
231
+ })
232
+ },
233
+
234
+ load(): Effect.Effect<boolean, FileReadError> {
235
+ return Effect.gen(function* () {
236
+ // Check if index exists
237
+ const exists = yield* Effect.promise(async () => {
238
+ try {
239
+ await fs.access(indexPath)
240
+ return true
241
+ } catch {
242
+ return false
243
+ }
244
+ })
245
+
246
+ if (!exists) {
247
+ return false
248
+ }
249
+
250
+ // Load data
251
+ const [dataStr, metaStr] = yield* Effect.tryPromise({
252
+ try: async () => {
253
+ const data = await fs.readFile(indexPath, 'utf-8')
254
+ const meta = await fs.readFile(metadataPath, 'utf-8')
255
+ return [data, meta] as const
256
+ },
257
+ catch: (e) =>
258
+ new FileReadError({
259
+ path: indexPath,
260
+ message: `Failed to load BM25 index: ${e instanceof Error ? e.message : String(e)}`,
261
+ }),
262
+ })
263
+
264
+ const data = JSON.parse(dataStr) as {
265
+ engine: string
266
+ sectionMap: [
267
+ number,
268
+ { sectionId: string; documentPath: string; heading: string },
269
+ ][]
270
+ }
271
+ const metadata = JSON.parse(metaStr) as BM25Metadata
272
+
273
+ // Restore engine
274
+ engine = bm25()
275
+ engine.importJSON(data.engine)
276
+ engine.definePrepTasks([tokenize])
277
+
278
+ // Restore section map
279
+ sectionMap.clear()
280
+ for (const [idx, info] of data.sectionMap) {
281
+ sectionMap.set(idx, info)
282
+ }
283
+
284
+ documentCount = metadata.count
285
+ lastUpdated = metadata.lastUpdated
286
+ consolidated = true
287
+
288
+ return true
289
+ })
290
+ },
291
+
292
+ getStats(): BM25Stats {
293
+ return {
294
+ count: documentCount,
295
+ lastUpdated,
296
+ }
297
+ },
298
+
299
+ isConsolidated(): boolean {
300
+ return consolidated
301
+ },
302
+
303
+ clear(): void {
304
+ engine = bm25()
305
+ engine.defineConfig({
306
+ fldWeights: {
307
+ heading: 2,
308
+ content: 1,
309
+ },
310
+ })
311
+ engine.definePrepTasks([tokenize])
312
+ sectionMap.clear()
313
+ documentCount = 0
314
+ consolidated = false
315
+ lastUpdated = new Date().toISOString()
316
+ },
317
+ }
318
+ }
319
+
320
+ // ============================================================================
321
+ // BM25 Search Function
322
+ // ============================================================================
323
+
324
+ /**
325
+ * Perform BM25 keyword search over indexed sections.
326
+ *
327
+ * @param rootPath - Root directory containing BM25 index
328
+ * @param query - Search query text
329
+ * @param limit - Maximum results (default: 10)
330
+ * @returns Ranked list of matching sections by BM25 score
331
+ */
332
+ export const bm25Search = (
333
+ rootPath: string,
334
+ query: string,
335
+ limit = 10,
336
+ ): Effect.Effect<readonly BM25SearchResult[], FileReadError> =>
337
+ Effect.gen(function* () {
338
+ const store = createBM25Store(rootPath)
339
+ const loaded = yield* store.load()
340
+
341
+ if (!loaded) {
342
+ return []
343
+ }
344
+
345
+ return yield* store.search(query, limit)
346
+ })
347
+
348
+ // ============================================================================
349
+ // Check BM25 Index Exists
350
+ // ============================================================================
351
+
352
+ /**
353
+ * Check if BM25 index exists for a directory
354
+ */
355
+ export const bm25IndexExists = (rootPath: string): Effect.Effect<boolean> =>
356
+ Effect.promise(async () => {
357
+ const resolvedRoot = path.resolve(rootPath)
358
+ const indexPath = path.join(resolvedRoot, INDEX_DIR, 'bm25.json')
359
+
360
+ try {
361
+ await fs.access(indexPath)
362
+ return true
363
+ } catch {
364
+ return false
365
+ }
366
+ })
@@ -0,0 +1,253 @@
1
+ /**
2
+ * Tests for cross-encoder re-ranking module
3
+ */
4
+
5
+ import { Effect, Exit } from 'effect'
6
+ import { afterEach, describe, expect, it, vi } from 'vitest'
7
+ import {
8
+ getReranker,
9
+ isRerankerAvailable,
10
+ type RerankedResult,
11
+ RerankerError,
12
+ type RerankerOptions,
13
+ rerankResults,
14
+ unloadReranker,
15
+ } from './cross-encoder.js'
16
+
17
+ describe('cross-encoder', () => {
18
+ afterEach(() => {
19
+ unloadReranker()
20
+ vi.restoreAllMocks()
21
+ })
22
+
23
+ describe('RerankerError', () => {
24
+ it('should create error with DependencyMissing reason', () => {
25
+ const error = new RerankerError(
26
+ 'DependencyMissing',
27
+ 'Package not installed',
28
+ )
29
+ expect(error.reason).toBe('DependencyMissing')
30
+ expect(error.message).toBe('Package not installed')
31
+ expect(error._tag).toBe('RerankerError')
32
+ expect(error.name).toBe('RerankerError')
33
+ })
34
+
35
+ it('should create error with ModelLoadFailed reason', () => {
36
+ const error = new RerankerError('ModelLoadFailed', 'Failed to load model')
37
+ expect(error.reason).toBe('ModelLoadFailed')
38
+ expect(error.message).toBe('Failed to load model')
39
+ })
40
+
41
+ it('should create error with InferenceFailed reason', () => {
42
+ const cause = new Error('GPU OOM')
43
+ const error = new RerankerError(
44
+ 'InferenceFailed',
45
+ 'Inference failed',
46
+ cause,
47
+ )
48
+ expect(error.reason).toBe('InferenceFailed')
49
+ expect(error.cause).toBe(cause)
50
+ })
51
+ })
52
+
53
+ describe('getReranker', () => {
54
+ it('should return a singleton reranker instance', () => {
55
+ const reranker1 = getReranker()
56
+ const reranker2 = getReranker()
57
+ expect(reranker1).toBe(reranker2)
58
+ })
59
+
60
+ it('should return reranker with expected interface', () => {
61
+ const reranker = getReranker()
62
+ expect(typeof reranker.rerank).toBe('function')
63
+ expect(typeof reranker.isReady).toBe('function')
64
+ expect(typeof reranker.unload).toBe('function')
65
+ })
66
+
67
+ it('should report not ready before loading', () => {
68
+ const reranker = getReranker()
69
+ expect(reranker.isReady()).toBe(false)
70
+ })
71
+
72
+ it('should accept custom cache directory', () => {
73
+ unloadReranker()
74
+ const reranker = getReranker('/custom/cache/dir')
75
+ expect(reranker).toBeDefined()
76
+ })
77
+ })
78
+
79
+ describe('unloadReranker', () => {
80
+ it('should reset singleton', () => {
81
+ const reranker1 = getReranker()
82
+ unloadReranker()
83
+ const reranker2 = getReranker()
84
+ expect(reranker1).not.toBe(reranker2)
85
+ })
86
+
87
+ it('should be safe to call multiple times', () => {
88
+ unloadReranker()
89
+ unloadReranker()
90
+ unloadReranker()
91
+ // No error should be thrown
92
+ })
93
+ })
94
+
95
+ describe('rerankResults', () => {
96
+ it('should return empty array for empty input', async () => {
97
+ const result = await Effect.runPromise(
98
+ rerankResults('test query', [], (item: unknown) => String(item)),
99
+ )
100
+ expect(result).toEqual([])
101
+ })
102
+
103
+ it('should handle DependencyMissing error gracefully', async () => {
104
+ // When transformers is not installed, the effect should fail with DependencyMissing
105
+ const results = [
106
+ { id: 1, content: 'test content' },
107
+ { id: 2, content: 'another content' },
108
+ ]
109
+
110
+ const effect = rerankResults(
111
+ 'test query',
112
+ results,
113
+ (item) => item.content,
114
+ )
115
+
116
+ const exit = await Effect.runPromiseExit(effect)
117
+
118
+ // This test will either:
119
+ // 1. Fail with DependencyMissing if @huggingface/transformers is not installed
120
+ // 2. Succeed if the package is installed
121
+ if (Exit.isFailure(exit)) {
122
+ const error = exit.cause
123
+ // Check if it's the right kind of error
124
+ if ('_tag' in error) {
125
+ // It should be a RerankerError
126
+ expect(true).toBe(true)
127
+ }
128
+ } else {
129
+ // If it succeeds, that's fine too (package is installed)
130
+ expect(Exit.isSuccess(exit)).toBe(true)
131
+ }
132
+ })
133
+
134
+ it('should respect topK option', async () => {
135
+ const results = Array.from({ length: 30 }, (_, i) => ({
136
+ id: i,
137
+ content: `content ${i}`,
138
+ }))
139
+
140
+ const options: RerankerOptions = { topK: 5, returnTopN: 3 }
141
+
142
+ // This test verifies the options are passed through
143
+ // The actual slicing happens in the effect implementation
144
+ const effect = rerankResults(
145
+ 'test query',
146
+ results,
147
+ (item) => item.content,
148
+ options,
149
+ )
150
+
151
+ // Just verify the effect can be created without error
152
+ expect(effect).toBeDefined()
153
+ })
154
+
155
+ it('should respect returnTopN option', async () => {
156
+ const results = Array.from({ length: 10 }, (_, i) => ({
157
+ id: i,
158
+ content: `content ${i}`,
159
+ }))
160
+
161
+ const options: RerankerOptions = { topK: 10, returnTopN: 5 }
162
+
163
+ const effect = rerankResults(
164
+ 'test query',
165
+ results,
166
+ (item) => item.content,
167
+ options,
168
+ )
169
+
170
+ expect(effect).toBeDefined()
171
+ })
172
+
173
+ it('should use default topK of 20', async () => {
174
+ // Default options should be topK=20, returnTopN=10
175
+ const results = Array.from({ length: 25 }, (_, i) => ({
176
+ id: i,
177
+ content: `content ${i}`,
178
+ }))
179
+
180
+ const effect = rerankResults(
181
+ 'test query',
182
+ results,
183
+ (item) => item.content,
184
+ // No options, should use defaults
185
+ )
186
+
187
+ expect(effect).toBeDefined()
188
+ })
189
+
190
+ it('should use default returnTopN of 10', async () => {
191
+ const results = Array.from({ length: 15 }, (_, i) => ({
192
+ id: i,
193
+ content: `content ${i}`,
194
+ }))
195
+
196
+ const effect = rerankResults(
197
+ 'test query',
198
+ results,
199
+ (item) => item.content,
200
+ )
201
+
202
+ expect(effect).toBeDefined()
203
+ })
204
+ })
205
+
206
+ describe('isRerankerAvailable', () => {
207
+ it('should return boolean effect', async () => {
208
+ const effect = isRerankerAvailable()
209
+ const result = await Effect.runPromise(effect)
210
+ expect(typeof result).toBe('boolean')
211
+ })
212
+
213
+ it('should not throw on missing dependency', async () => {
214
+ // This should gracefully return false if package is not installed
215
+ const result = await Effect.runPromise(isRerankerAvailable())
216
+ // Result is either true or false, never throws
217
+ expect([true, false]).toContain(result)
218
+ })
219
+ })
220
+
221
+ describe('RerankedResult type', () => {
222
+ it('should have expected structure', () => {
223
+ const result: RerankedResult<{ id: number }> = {
224
+ item: { id: 1 },
225
+ rerankerScore: 0.95,
226
+ originalRank: 1,
227
+ }
228
+
229
+ expect(result.item.id).toBe(1)
230
+ expect(result.rerankerScore).toBe(0.95)
231
+ expect(result.originalRank).toBe(1)
232
+ })
233
+ })
234
+
235
+ describe('Reranker interface', () => {
236
+ it('should define rerank method signature', () => {
237
+ const reranker = getReranker()
238
+
239
+ // Verify the rerank method exists and is async
240
+ expect(typeof reranker.rerank).toBe('function')
241
+ })
242
+
243
+ it('should define isReady method', () => {
244
+ const reranker = getReranker()
245
+ expect(typeof reranker.isReady).toBe('function')
246
+ })
247
+
248
+ it('should define unload method', () => {
249
+ const reranker = getReranker()
250
+ expect(typeof reranker.unload).toBe('function')
251
+ })
252
+ })
253
+ })