mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,650 @@
1
+ /**
2
+ * Hybrid Search Integration Tests
3
+ *
4
+ * Tests the full hybrid search stack combining BM25 keyword search with
5
+ * semantic vector search using Reciprocal Rank Fusion (RRF).
6
+ *
7
+ * Test corpus: src/__tests__/fixtures/semantic-search/multi-word-corpus
8
+ * - 6 documents covering system configuration and error handling topics
9
+ * - Pre-built embeddings (512 dimensions, text-embedding-3-small)
10
+ * - BM25 index for keyword search
11
+ */
12
+
13
+ import * as fs from 'node:fs/promises'
14
+ import * as path from 'node:path'
15
+ import { Effect } from 'effect'
16
+ import { afterAll, beforeAll, describe, expect, it } from 'vitest'
17
+ import { createStorage, loadSectionIndex } from '../../index/storage.js'
18
+ import { createBM25Store } from '../bm25-store.js'
19
+ import {
20
+ detectSearchModes,
21
+ hybridSearch,
22
+ type SearchMode,
23
+ } from '../hybrid-search.js'
24
+
25
+ const TEST_CORPUS_PATH = path.join(
26
+ __dirname,
27
+ '../../__tests__/fixtures/semantic-search/multi-word-corpus',
28
+ )
29
+
30
+ describe('Hybrid Search Integration', () => {
31
+ beforeAll(async () => {
32
+ const store = createBM25Store(TEST_CORPUS_PATH)
33
+ const loaded = await Effect.runPromise(store.load())
34
+
35
+ if (!loaded) {
36
+ const storage = createStorage(TEST_CORPUS_PATH)
37
+ const sectionIndex = await Effect.runPromise(loadSectionIndex(storage))
38
+
39
+ if (sectionIndex) {
40
+ const docs = await Promise.all(
41
+ Object.values(sectionIndex.sections).map(async (section) => {
42
+ const filePath = path.join(TEST_CORPUS_PATH, section.documentPath)
43
+ let content = ''
44
+
45
+ try {
46
+ const fileContent = await fs.readFile(filePath, 'utf-8')
47
+ const lines = fileContent.split('\n')
48
+ content = lines
49
+ .slice(section.startLine - 1, section.endLine)
50
+ .join('\n')
51
+ } catch (_e) {
52
+ content = ''
53
+ }
54
+
55
+ return {
56
+ id: section.id,
57
+ sectionId: section.id,
58
+ documentPath: section.documentPath,
59
+ heading: section.heading,
60
+ content,
61
+ }
62
+ }),
63
+ )
64
+
65
+ await Effect.runPromise(
66
+ Effect.gen(function* () {
67
+ yield* store.add(docs)
68
+ yield* store.consolidate()
69
+ yield* store.save()
70
+ }),
71
+ )
72
+ }
73
+ }
74
+ })
75
+
76
+ afterAll(async () => {
77
+ // Test cleanup handled by Vitest
78
+ })
79
+
80
+ describe('Index Detection', () => {
81
+ it('should detect both BM25 and embeddings indexes', async () => {
82
+ const modes = await Effect.runPromise(detectSearchModes(TEST_CORPUS_PATH))
83
+
84
+ expect(modes.hasBM25).toBe(true)
85
+ expect(modes.hasEmbeddings).toBe(true)
86
+ expect(modes.recommendedMode).toBe('hybrid')
87
+ })
88
+
89
+ it('should recommend hybrid mode when both indexes available', async () => {
90
+ const modes = await Effect.runPromise(detectSearchModes(TEST_CORPUS_PATH))
91
+
92
+ expect(modes.recommendedMode).toBe('hybrid')
93
+ })
94
+ })
95
+
96
+ describe('Hybrid Search Results', () => {
97
+ it('should combine semantic and keyword results', async () => {
98
+ const query = 'error handling configuration'
99
+ const result = await Effect.runPromise(
100
+ hybridSearch(TEST_CORPUS_PATH, query, {
101
+ limit: 10,
102
+ threshold: 0.2,
103
+ mode: 'hybrid',
104
+ }),
105
+ )
106
+
107
+ expect(result.results).toBeDefined()
108
+ expect(Array.isArray(result.results)).toBe(true)
109
+ expect(result.results.length).toBeGreaterThan(0)
110
+
111
+ expect(result.stats.mode).toBe('hybrid')
112
+ expect(result.stats.semanticResults).toBeGreaterThanOrEqual(0)
113
+ expect(result.stats.keywordResults).toBeGreaterThan(0)
114
+ })
115
+
116
+ it('should include both exact matches and semantic matches', async () => {
117
+ const query = 'configuration management'
118
+ const result = await Effect.runPromise(
119
+ hybridSearch(TEST_CORPUS_PATH, query, {
120
+ limit: 20,
121
+ threshold: 0.15,
122
+ mode: 'hybrid',
123
+ }),
124
+ )
125
+
126
+ const hasExactMatches = result.results.some((r) =>
127
+ r.sources.includes('keyword'),
128
+ )
129
+ const hasSemanticMatches = result.results.some((r) =>
130
+ r.sources.includes('semantic'),
131
+ )
132
+
133
+ expect(hasExactMatches).toBe(true)
134
+ if (result.stats.embeddingsAvailable) {
135
+ expect(hasSemanticMatches).toBe(true)
136
+ }
137
+ })
138
+
139
+ it('should indicate which sources contributed to each result', async () => {
140
+ const result = await Effect.runPromise(
141
+ hybridSearch(TEST_CORPUS_PATH, 'error handling', {
142
+ limit: 10,
143
+ threshold: 0.3,
144
+ mode: 'hybrid',
145
+ }),
146
+ )
147
+
148
+ for (const r of result.results) {
149
+ expect(r.sources).toBeDefined()
150
+ expect(Array.isArray(r.sources)).toBe(true)
151
+ expect(r.sources.length).toBeGreaterThan(0)
152
+
153
+ for (const source of r.sources) {
154
+ expect(['semantic', 'keyword']).toContain(source)
155
+ }
156
+ }
157
+ })
158
+ })
159
+
160
+ describe('RRF Scoring', () => {
161
+ it('should rank results by combined RRF score', async () => {
162
+ const result = await Effect.runPromise(
163
+ hybridSearch(TEST_CORPUS_PATH, 'error handling', {
164
+ limit: 10,
165
+ threshold: 0.2,
166
+ mode: 'hybrid',
167
+ }),
168
+ )
169
+
170
+ expect(result.results.length).toBeGreaterThanOrEqual(1)
171
+
172
+ for (let i = 1; i < result.results.length; i++) {
173
+ const prevScore = result.results[i - 1]!.score
174
+ const currentScore = result.results[i]!.score
175
+ expect(prevScore).toBeGreaterThanOrEqual(currentScore)
176
+ }
177
+ })
178
+
179
+ it('should give higher scores to results found by both methods', async () => {
180
+ const result = await Effect.runPromise(
181
+ hybridSearch(TEST_CORPUS_PATH, 'error handling', {
182
+ limit: 15,
183
+ threshold: 0.2,
184
+ mode: 'hybrid',
185
+ }),
186
+ )
187
+
188
+ const bothSources = result.results.filter((r) => r.sources.length === 2)
189
+ const singleSource = result.results.filter((r) => r.sources.length === 1)
190
+
191
+ if (bothSources.length > 0 && singleSource.length > 0) {
192
+ const avgBoth =
193
+ bothSources.reduce((sum, r) => sum + r.score, 0) / bothSources.length
194
+ const avgSingle =
195
+ singleSource.reduce((sum, r) => sum + r.score, 0) /
196
+ singleSource.length
197
+
198
+ expect(avgBoth).toBeGreaterThan(avgSingle * 0.8)
199
+ }
200
+ })
201
+
202
+ it('should include individual scores when available', async () => {
203
+ const result = await Effect.runPromise(
204
+ hybridSearch(TEST_CORPUS_PATH, 'configuration', {
205
+ limit: 10,
206
+ threshold: 0.2,
207
+ mode: 'hybrid',
208
+ }),
209
+ )
210
+
211
+ for (const r of result.results) {
212
+ if (r.sources.includes('semantic')) {
213
+ expect(r.similarity).toBeDefined()
214
+ expect(r.similarity).toBeGreaterThanOrEqual(0)
215
+ expect(r.similarity).toBeLessThanOrEqual(1)
216
+ }
217
+
218
+ if (r.sources.includes('keyword')) {
219
+ expect(r.bm25Score).toBeDefined()
220
+ expect(r.bm25Score).toBeGreaterThan(0)
221
+ }
222
+ }
223
+ })
224
+ })
225
+
226
+ describe('Result Format', () => {
227
+ it('should return results with correct structure', async () => {
228
+ const result = await Effect.runPromise(
229
+ hybridSearch(TEST_CORPUS_PATH, 'error', {
230
+ limit: 5,
231
+ threshold: 0.3,
232
+ mode: 'hybrid',
233
+ }),
234
+ )
235
+
236
+ for (const r of result.results) {
237
+ expect(r.sectionId).toBeDefined()
238
+ expect(typeof r.sectionId).toBe('string')
239
+
240
+ expect(r.documentPath).toBeDefined()
241
+ expect(typeof r.documentPath).toBe('string')
242
+
243
+ expect(r.heading).toBeDefined()
244
+ expect(typeof r.heading).toBe('string')
245
+
246
+ expect(r.score).toBeDefined()
247
+ expect(typeof r.score).toBe('number')
248
+ expect(r.score).toBeGreaterThan(0)
249
+
250
+ expect(r.sources).toBeDefined()
251
+ expect(Array.isArray(r.sources)).toBe(true)
252
+ }
253
+ })
254
+
255
+ it('should return stats with search metadata', async () => {
256
+ const result = await Effect.runPromise(
257
+ hybridSearch(TEST_CORPUS_PATH, 'error', {
258
+ limit: 10,
259
+ threshold: 0.2,
260
+ mode: 'hybrid',
261
+ }),
262
+ )
263
+
264
+ expect(result.stats).toBeDefined()
265
+ expect(result.stats.modeReason).toBeDefined()
266
+ expect(result.stats.semanticResults).toBeGreaterThanOrEqual(0)
267
+ expect(result.stats.keywordResults).toBeGreaterThanOrEqual(0)
268
+ expect(result.stats.combinedResults).toBeGreaterThanOrEqual(0)
269
+ expect(result.stats.bm25Available).toBe(true)
270
+ expect(typeof result.stats.embeddingsAvailable).toBe('boolean')
271
+ })
272
+
273
+ it('should track total available results', async () => {
274
+ const result = await Effect.runPromise(
275
+ hybridSearch(TEST_CORPUS_PATH, 'error', {
276
+ limit: 3,
277
+ threshold: 0.2,
278
+ mode: 'hybrid',
279
+ }),
280
+ )
281
+
282
+ if (result.stats.totalAvailable !== undefined) {
283
+ expect(result.stats.totalAvailable).toBeGreaterThanOrEqual(
284
+ result.results.length,
285
+ )
286
+ }
287
+ })
288
+ })
289
+
290
+ describe('Search Modes', () => {
291
+ it('should support explicit hybrid mode', async () => {
292
+ const result = await Effect.runPromise(
293
+ hybridSearch(TEST_CORPUS_PATH, 'configuration', {
294
+ limit: 10,
295
+ mode: 'hybrid',
296
+ }),
297
+ )
298
+
299
+ expect(result.stats.mode).toBe('hybrid')
300
+ expect(result.stats.modeReason).toContain('--mode')
301
+ })
302
+
303
+ it('should support semantic-only mode', async () => {
304
+ const result = await Effect.runPromise(
305
+ hybridSearch(TEST_CORPUS_PATH, 'error handling', {
306
+ limit: 10,
307
+ mode: 'semantic',
308
+ }),
309
+ )
310
+
311
+ expect(result.stats.mode).toBe('semantic')
312
+ expect(result.results.every((r) => r.sources.includes('semantic'))).toBe(
313
+ true,
314
+ )
315
+ expect(result.results.every((r) => !r.sources.includes('keyword'))).toBe(
316
+ true,
317
+ )
318
+ })
319
+
320
+ it('should support keyword-only mode', async () => {
321
+ const result = await Effect.runPromise(
322
+ hybridSearch(TEST_CORPUS_PATH, 'configuration', {
323
+ limit: 10,
324
+ mode: 'keyword',
325
+ }),
326
+ )
327
+
328
+ expect(result.stats.mode).toBe('keyword')
329
+ expect(result.results.every((r) => r.sources.includes('keyword'))).toBe(
330
+ true,
331
+ )
332
+ expect(result.results.every((r) => !r.sources.includes('semantic'))).toBe(
333
+ true,
334
+ )
335
+ })
336
+ })
337
+
338
+ describe('Query Variations', () => {
339
+ it('should handle single-word queries', async () => {
340
+ const result = await Effect.runPromise(
341
+ hybridSearch(TEST_CORPUS_PATH, 'error', {
342
+ limit: 10,
343
+ threshold: 0.2,
344
+ mode: 'hybrid',
345
+ }),
346
+ )
347
+
348
+ expect(result.results.length).toBeGreaterThan(0)
349
+ expect(result.stats.keywordResults).toBeGreaterThan(0)
350
+ })
351
+
352
+ it('should handle multi-word queries', async () => {
353
+ const result = await Effect.runPromise(
354
+ hybridSearch(TEST_CORPUS_PATH, 'error handling configuration', {
355
+ limit: 10,
356
+ threshold: 0.3,
357
+ mode: 'hybrid',
358
+ }),
359
+ )
360
+
361
+ expect(result.results.length).toBeGreaterThan(0)
362
+ })
363
+
364
+ it('should handle phrase queries', async () => {
365
+ const result = await Effect.runPromise(
366
+ hybridSearch(TEST_CORPUS_PATH, 'distributed systems', {
367
+ limit: 10,
368
+ threshold: 0.3,
369
+ mode: 'hybrid',
370
+ }),
371
+ )
372
+
373
+ expect(result.results.length).toBeGreaterThan(0)
374
+ })
375
+
376
+ it('should handle technical terms', async () => {
377
+ const result = await Effect.runPromise(
378
+ hybridSearch(TEST_CORPUS_PATH, 'automation', {
379
+ limit: 10,
380
+ threshold: 0.3,
381
+ mode: 'hybrid',
382
+ }),
383
+ )
384
+
385
+ expect(result.results.length).toBeGreaterThan(0)
386
+ })
387
+
388
+ it('should return empty results for unrelated queries', async () => {
389
+ const result = await Effect.runPromise(
390
+ hybridSearch(TEST_CORPUS_PATH, 'quantum physics blockchain', {
391
+ limit: 10,
392
+ threshold: 0.7,
393
+ mode: 'hybrid',
394
+ }),
395
+ )
396
+
397
+ expect(result.results.length).toEqual(0)
398
+ })
399
+ })
400
+
401
+ describe('Search Parameters', () => {
402
+ it('should respect limit parameter', async () => {
403
+ const limit = 3
404
+ const result = await Effect.runPromise(
405
+ hybridSearch(TEST_CORPUS_PATH, 'configuration', {
406
+ limit,
407
+ threshold: 0.2,
408
+ mode: 'hybrid',
409
+ }),
410
+ )
411
+
412
+ expect(result.results.length).toBeLessThanOrEqual(limit)
413
+ })
414
+
415
+ it('should respect threshold parameter', async () => {
416
+ const threshold = 0.7
417
+ const result = await Effect.runPromise(
418
+ hybridSearch(TEST_CORPUS_PATH, 'configuration', {
419
+ limit: 20,
420
+ threshold,
421
+ mode: 'hybrid',
422
+ }),
423
+ )
424
+
425
+ for (const r of result.results) {
426
+ if (r.similarity !== undefined) {
427
+ expect(r.similarity).toBeGreaterThanOrEqual(threshold)
428
+ }
429
+ }
430
+ })
431
+
432
+ it('should support custom RRF weights', async () => {
433
+ const result = await Effect.runPromise(
434
+ hybridSearch(TEST_CORPUS_PATH, 'error handling', {
435
+ limit: 10,
436
+ threshold: 0.3,
437
+ mode: 'hybrid',
438
+ bm25Weight: 2.0,
439
+ semanticWeight: 1.0,
440
+ }),
441
+ )
442
+
443
+ expect(result.results.length).toBeGreaterThan(0)
444
+ expect(result.stats.mode).toBe('hybrid')
445
+ })
446
+
447
+ it('should support custom RRF k constant', async () => {
448
+ const result = await Effect.runPromise(
449
+ hybridSearch(TEST_CORPUS_PATH, 'configuration', {
450
+ limit: 10,
451
+ threshold: 0.3,
452
+ mode: 'hybrid',
453
+ rrfK: 30,
454
+ }),
455
+ )
456
+
457
+ expect(result.results.length).toBeGreaterThan(0)
458
+ })
459
+
460
+ it('should support path pattern filtering', async () => {
461
+ const result = await Effect.runPromise(
462
+ hybridSearch(TEST_CORPUS_PATH, 'error', {
463
+ limit: 10,
464
+ threshold: 0.3,
465
+ mode: 'hybrid',
466
+ pathPattern: 'error-*.md',
467
+ }),
468
+ )
469
+
470
+ for (const r of result.results) {
471
+ expect(r.documentPath).toMatch(/error-.*\.md/)
472
+ }
473
+ })
474
+
475
+ it('should support quality modes', async () => {
476
+ const qualities: Array<'fast' | 'balanced' | 'thorough'> = [
477
+ 'fast',
478
+ 'balanced',
479
+ 'thorough',
480
+ ]
481
+
482
+ for (const quality of qualities) {
483
+ const result = await Effect.runPromise(
484
+ hybridSearch(TEST_CORPUS_PATH, 'configuration', {
485
+ limit: 10,
486
+ threshold: 0.3,
487
+ mode: 'hybrid',
488
+ quality,
489
+ }),
490
+ )
491
+
492
+ expect(result.results.length).toBeGreaterThan(0)
493
+ }
494
+ })
495
+ })
496
+
497
+ describe('Edge Cases', () => {
498
+ it('should handle empty query', async () => {
499
+ const result = await Effect.runPromise(
500
+ hybridSearch(TEST_CORPUS_PATH, '', {
501
+ limit: 10,
502
+ threshold: 0.3,
503
+ mode: 'hybrid',
504
+ }),
505
+ )
506
+
507
+ expect(result.results).toBeDefined()
508
+ expect(Array.isArray(result.results)).toBe(true)
509
+ })
510
+
511
+ it('should handle very short queries', async () => {
512
+ const result = await Effect.runPromise(
513
+ hybridSearch(TEST_CORPUS_PATH, 'a', {
514
+ limit: 10,
515
+ threshold: 0.3,
516
+ mode: 'hybrid',
517
+ }),
518
+ )
519
+
520
+ expect(result.results).toBeDefined()
521
+ expect(Array.isArray(result.results)).toBe(true)
522
+ })
523
+
524
+ it('should handle limit of 1', async () => {
525
+ const result = await Effect.runPromise(
526
+ hybridSearch(TEST_CORPUS_PATH, 'configuration', {
527
+ limit: 1,
528
+ threshold: 0.3,
529
+ mode: 'hybrid',
530
+ }),
531
+ )
532
+
533
+ expect(result.results.length).toBeLessThanOrEqual(1)
534
+ })
535
+
536
+ it('should handle high threshold with no results', async () => {
537
+ const result = await Effect.runPromise(
538
+ hybridSearch(TEST_CORPUS_PATH, 'configuration', {
539
+ limit: 10,
540
+ threshold: 0.99,
541
+ mode: 'hybrid',
542
+ }),
543
+ )
544
+
545
+ expect(result.results).toBeDefined()
546
+ expect(Array.isArray(result.results)).toBe(true)
547
+ })
548
+
549
+ it('should handle large limit values', async () => {
550
+ const result = await Effect.runPromise(
551
+ hybridSearch(TEST_CORPUS_PATH, 'configuration', {
552
+ limit: 1000,
553
+ threshold: 0.2,
554
+ mode: 'hybrid',
555
+ }),
556
+ )
557
+
558
+ expect(result.results).toBeDefined()
559
+ expect(Array.isArray(result.results)).toBe(true)
560
+ })
561
+ })
562
+
563
+ describe('Result Consistency', () => {
564
+ it('should return deterministic results for same query', async () => {
565
+ const query = 'error handling'
566
+ const options = {
567
+ limit: 10,
568
+ threshold: 0.3,
569
+ mode: 'hybrid' as SearchMode,
570
+ }
571
+
572
+ const result1 = await Effect.runPromise(
573
+ hybridSearch(TEST_CORPUS_PATH, query, options),
574
+ )
575
+ const result2 = await Effect.runPromise(
576
+ hybridSearch(TEST_CORPUS_PATH, query, options),
577
+ )
578
+
579
+ expect(result1.results.length).toBe(result2.results.length)
580
+
581
+ for (let i = 0; i < result1.results.length; i++) {
582
+ const r1 = result1.results[i]
583
+ const r2 = result2.results[i]
584
+ expect(r1?.sectionId).toBe(r2?.sectionId)
585
+ expect(r1?.score).toBeCloseTo(r2?.score ?? 0, 5)
586
+ }
587
+ })
588
+
589
+ it('should maintain score ordering across searches', async () => {
590
+ const result = await Effect.runPromise(
591
+ hybridSearch(TEST_CORPUS_PATH, 'configuration', {
592
+ limit: 10,
593
+ threshold: 0.2,
594
+ mode: 'hybrid',
595
+ }),
596
+ )
597
+
598
+ const scores = result.results.map((r) => r.score)
599
+
600
+ for (let i = 1; i < scores.length; i++) {
601
+ expect(scores[i - 1]).toBeGreaterThanOrEqual(scores[i]!)
602
+ }
603
+ })
604
+ })
605
+
606
+ describe('Performance', () => {
607
+ it('should complete search within reasonable time', async () => {
608
+ const startTime = Date.now()
609
+
610
+ await Effect.runPromise(
611
+ hybridSearch(TEST_CORPUS_PATH, 'error handling configuration', {
612
+ limit: 10,
613
+ threshold: 0.3,
614
+ mode: 'hybrid',
615
+ }),
616
+ )
617
+
618
+ const duration = Date.now() - startTime
619
+
620
+ expect(duration).toBeLessThan(5000)
621
+ })
622
+
623
+ it('should handle multiple concurrent searches', async () => {
624
+ const queries = [
625
+ 'configuration',
626
+ 'error handling',
627
+ 'distributed systems',
628
+ 'automation',
629
+ ]
630
+
631
+ const promises = queries.map((query) =>
632
+ Effect.runPromise(
633
+ hybridSearch(TEST_CORPUS_PATH, query, {
634
+ limit: 5,
635
+ threshold: 0.3,
636
+ mode: 'hybrid',
637
+ }),
638
+ ),
639
+ )
640
+
641
+ const results = await Promise.all(promises)
642
+
643
+ expect(results).toHaveLength(queries.length)
644
+ for (const result of results) {
645
+ expect(result.results).toBeDefined()
646
+ expect(Array.isArray(result.results)).toBe(true)
647
+ }
648
+ })
649
+ })
650
+ })