mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,712 @@
1
+ /**
2
+ * Embed + Index Integration Tests
3
+ *
4
+ * Tests the full indexing and embedding pipeline on small and large corpora.
5
+ * Verifies binary format usage, MessagePack handling, metadata creation,
6
+ * and proper index loading after creation.
7
+ */
8
+
9
+ import * as fs from 'node:fs'
10
+ import * as os from 'node:os'
11
+ import * as path from 'node:path'
12
+ import { Effect } from 'effect'
13
+ import { afterEach, beforeEach, describe, expect, it } from 'vitest'
14
+ import {
15
+ createVectorStore,
16
+ type HnswVectorStore,
17
+ } from '../../src/embeddings/vector-store.js'
18
+ import { buildIndex } from '../../src/index/indexer.js'
19
+ import {
20
+ createStorage,
21
+ loadDocumentIndex,
22
+ loadSectionIndex,
23
+ } from '../../src/index/storage.js'
24
+
25
+ // ============================================================================
26
+ // Test Setup
27
+ // ============================================================================
28
+
29
+ describe('Embed + Index Integration Tests', () => {
30
+ let tempDir: string
31
+ const savedEnv: Record<string, string | undefined> = {}
32
+
33
+ beforeEach(() => {
34
+ tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'mdcontext-embed-int-'))
35
+
36
+ // Save and mock API key for tests
37
+ savedEnv.OPENAI_API_KEY = process.env.OPENAI_API_KEY
38
+ process.env.OPENAI_API_KEY = 'sk-test-mock-key-for-testing'
39
+ })
40
+
41
+ afterEach(() => {
42
+ fs.rmSync(tempDir, { recursive: true, force: true })
43
+
44
+ // Restore env vars
45
+ for (const [key, value] of Object.entries(savedEnv)) {
46
+ if (value !== undefined) {
47
+ process.env[key] = value
48
+ } else {
49
+ delete process.env[key]
50
+ }
51
+ }
52
+ })
53
+
54
+ // ==========================================================================
55
+ // Helper Functions
56
+ // ==========================================================================
57
+
58
+ /**
59
+ * Create a test markdown file with specified content
60
+ */
61
+ const createMarkdownFile = (filePath: string, content: string): void => {
62
+ const dir = path.dirname(filePath)
63
+ fs.mkdirSync(dir, { recursive: true })
64
+ fs.writeFileSync(filePath, content, 'utf-8')
65
+ }
66
+
67
+ /**
68
+ * Create a small test corpus (50-100 docs)
69
+ */
70
+ const createSmallCorpus = (baseDir: string): void => {
71
+ // Create 60 small markdown files across 3 directories
72
+ for (let i = 1; i <= 20; i++) {
73
+ createMarkdownFile(
74
+ path.join(baseDir, 'docs', `file${i}.md`),
75
+ `# Document ${i}\n\nThis is a test document with some content.\n\n## Section 1\n\nContent for section 1.\n\n## Section 2\n\nContent for section 2.`,
76
+ )
77
+ }
78
+
79
+ for (let i = 1; i <= 20; i++) {
80
+ createMarkdownFile(
81
+ path.join(baseDir, 'guides', `guide${i}.md`),
82
+ `# Guide ${i}\n\nThis is a guide document.\n\n## Getting Started\n\nHow to get started.\n\n## Advanced Topics\n\nAdvanced usage patterns.`,
83
+ )
84
+ }
85
+
86
+ for (let i = 1; i <= 20; i++) {
87
+ createMarkdownFile(
88
+ path.join(baseDir, 'api', `api${i}.md`),
89
+ `# API Reference ${i}\n\nAPI documentation.\n\n## Methods\n\nAvailable methods.\n\n## Examples\n\nUsage examples.`,
90
+ )
91
+ }
92
+ }
93
+
94
+ /**
95
+ * Create a large test corpus (1000+ docs)
96
+ */
97
+ const createLargeCorpus = (baseDir: string): void => {
98
+ // Create 1050 markdown files across 10 directories
99
+ for (let dir = 1; dir <= 10; dir++) {
100
+ for (let file = 1; file <= 105; file++) {
101
+ createMarkdownFile(
102
+ path.join(baseDir, `section${dir}`, `doc${file}.md`),
103
+ `# Document ${dir}-${file}\n\n## Overview\n\nContent for document ${dir}-${file}.\n\n## Details\n\nDetailed information about this topic.\n\n## Examples\n\nCode examples and usage patterns.`,
104
+ )
105
+ }
106
+ }
107
+ }
108
+
109
+ /**
110
+ * Get file size in bytes
111
+ */
112
+ const getFileSize = (filePath: string): number => {
113
+ try {
114
+ return fs.statSync(filePath).size
115
+ } catch {
116
+ return 0
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Check if a file exists
122
+ */
123
+ const fileExists = (filePath: string): boolean => {
124
+ try {
125
+ fs.accessSync(filePath)
126
+ return true
127
+ } catch {
128
+ return false
129
+ }
130
+ }
131
+
132
+ // ==========================================================================
133
+ // Small Corpus Tests (50-100 docs)
134
+ // ==========================================================================
135
+
136
+ describe('Small Corpus Tests', () => {
137
+ it('indexes small corpus without errors', async () => {
138
+ createSmallCorpus(tempDir)
139
+
140
+ const result = await Effect.runPromise(buildIndex(tempDir))
141
+
142
+ expect(result.totalDocuments).toBeGreaterThanOrEqual(60)
143
+ expect(result.totalSections).toBeGreaterThan(0)
144
+ expect(result.errors).toHaveLength(0)
145
+ })
146
+
147
+ it('creates .mdcontext directory structure', async () => {
148
+ createSmallCorpus(tempDir)
149
+
150
+ await Effect.runPromise(buildIndex(tempDir))
151
+
152
+ const mdcontextDir = path.join(tempDir, '.mdcontext')
153
+ const indexesDir = path.join(mdcontextDir, 'indexes')
154
+ expect(fileExists(mdcontextDir)).toBe(true)
155
+ expect(fileExists(path.join(mdcontextDir, 'config.json'))).toBe(true)
156
+ expect(fileExists(path.join(indexesDir, 'documents.json'))).toBe(true)
157
+ expect(fileExists(path.join(indexesDir, 'sections.json'))).toBe(true)
158
+ expect(fileExists(path.join(indexesDir, 'links.json'))).toBe(true)
159
+ })
160
+
161
+ it('verifies binary format is used for vector metadata', async () => {
162
+ createSmallCorpus(tempDir)
163
+
164
+ // Build index and embeddings
165
+ await Effect.runPromise(buildIndex(tempDir))
166
+
167
+ // Add a mock vector entry to test format
168
+ const vectorStore = createVectorStore(tempDir, 512)
169
+ await Effect.runPromise(
170
+ vectorStore.add([
171
+ {
172
+ id: 'test-1',
173
+ sectionId: 'sec-1',
174
+ documentPath: 'test.md',
175
+ heading: 'Test',
176
+ embedding: Array(512).fill(0.1),
177
+ },
178
+ ]),
179
+ )
180
+ await Effect.runPromise(vectorStore.save())
181
+
182
+ // Check that binary format (.bin) is created, not JSON
183
+ const metaPath = path.join(tempDir, '.mdcontext', 'vectors.meta.bin')
184
+ const jsonPath = path.join(tempDir, '.mdcontext', 'vectors.meta.json')
185
+
186
+ expect(fileExists(metaPath)).toBe(true)
187
+ expect(fileExists(jsonPath)).toBe(false)
188
+ })
189
+
190
+ it('loads index successfully after creation', async () => {
191
+ createSmallCorpus(tempDir)
192
+
193
+ await Effect.runPromise(buildIndex(tempDir))
194
+
195
+ // Verify we can load the created index
196
+ const storage = createStorage(tempDir)
197
+ const docIndex = await Effect.runPromise(loadDocumentIndex(storage))
198
+ const sectionIndex = await Effect.runPromise(loadSectionIndex(storage))
199
+
200
+ expect(docIndex).not.toBeNull()
201
+ expect(sectionIndex).not.toBeNull()
202
+ expect(Object.keys(docIndex!.documents).length).toBeGreaterThanOrEqual(60)
203
+ expect(Object.keys(sectionIndex!.sections).length).toBeGreaterThan(0)
204
+ })
205
+
206
+ it('incremental index skips unchanged files', async () => {
207
+ createSmallCorpus(tempDir)
208
+
209
+ // First index
210
+ const firstResult = await Effect.runPromise(buildIndex(tempDir))
211
+ const firstIndexed = firstResult.documentsIndexed
212
+
213
+ // Second index without changes
214
+ const secondResult = await Effect.runPromise(buildIndex(tempDir))
215
+
216
+ expect(secondResult.documentsIndexed).toBe(0)
217
+ expect(secondResult.skipped.unchanged).toBe(firstIndexed)
218
+ })
219
+
220
+ it('force flag rebuilds entire index', async () => {
221
+ createSmallCorpus(tempDir)
222
+
223
+ // First index
224
+ const firstResult = await Effect.runPromise(buildIndex(tempDir))
225
+ const totalDocs = firstResult.totalDocuments
226
+
227
+ // Force rebuild
228
+ const secondResult = await Effect.runPromise(
229
+ buildIndex(tempDir, { force: true }),
230
+ )
231
+
232
+ expect(secondResult.documentsIndexed).toBe(totalDocs)
233
+ expect(secondResult.skipped.unchanged).toBe(0)
234
+ })
235
+ })
236
+
237
+ // ==========================================================================
238
+ // Large Corpus Tests (1000+ docs)
239
+ // ==========================================================================
240
+
241
+ describe('Large Corpus Tests', () => {
242
+ it('indexes large corpus without errors', async () => {
243
+ createLargeCorpus(tempDir)
244
+
245
+ const result = await Effect.runPromise(buildIndex(tempDir))
246
+
247
+ expect(result.totalDocuments).toBeGreaterThanOrEqual(1000)
248
+ expect(result.totalSections).toBeGreaterThan(3000)
249
+ expect(result.errors).toHaveLength(0)
250
+ expect(result.duration).toBeGreaterThan(0)
251
+ })
252
+
253
+ it('verifies MessagePack handles large metadata efficiently', async () => {
254
+ createLargeCorpus(tempDir)
255
+
256
+ await Effect.runPromise(buildIndex(tempDir))
257
+
258
+ const metaPath = path.join(tempDir, '.mdcontext', 'vectors.meta.bin')
259
+
260
+ // Create a large vector store with some entries to test MessagePack
261
+ const vectorStore = createVectorStore(tempDir, 512)
262
+ await Effect.runPromise(
263
+ vectorStore.add([
264
+ {
265
+ id: 'test-1',
266
+ sectionId: 'sec-1',
267
+ documentPath: 'test.md',
268
+ heading: 'Test',
269
+ embedding: Array(512).fill(0.1),
270
+ },
271
+ ]),
272
+ )
273
+ await Effect.runPromise(vectorStore.save())
274
+
275
+ // Binary file should exist
276
+ expect(fileExists(metaPath)).toBe(true)
277
+
278
+ // File should have reasonable size (MessagePack is efficient)
279
+ const size = getFileSize(metaPath)
280
+ expect(size).toBeGreaterThan(0)
281
+ })
282
+
283
+ it('checks file sizes are reasonable for large corpus', async () => {
284
+ createLargeCorpus(tempDir)
285
+
286
+ await Effect.runPromise(buildIndex(tempDir))
287
+
288
+ // Check document index size
289
+ const docPath = path.join(
290
+ tempDir,
291
+ '.mdcontext',
292
+ 'indexes',
293
+ 'documents.json',
294
+ )
295
+ const docSize = getFileSize(docPath)
296
+ expect(docSize).toBeGreaterThan(0)
297
+ expect(docSize).toBeLessThan(50_000_000) // < 50MB reasonable for 1000+ docs
298
+
299
+ // Check section index size
300
+ const sectionPath = path.join(
301
+ tempDir,
302
+ '.mdcontext',
303
+ 'indexes',
304
+ 'sections.json',
305
+ )
306
+ const sectionSize = getFileSize(sectionPath)
307
+ expect(sectionSize).toBeGreaterThan(0)
308
+ expect(sectionSize).toBeLessThan(100_000_000) // < 100MB reasonable
309
+ })
310
+
311
+ it('large corpus can be loaded after indexing', async () => {
312
+ createLargeCorpus(tempDir)
313
+
314
+ await Effect.runPromise(buildIndex(tempDir))
315
+
316
+ const storage = createStorage(tempDir)
317
+ const docIndex = await Effect.runPromise(loadDocumentIndex(storage))
318
+ const sectionIndex = await Effect.runPromise(loadSectionIndex(storage))
319
+
320
+ expect(docIndex).not.toBeNull()
321
+ expect(sectionIndex).not.toBeNull()
322
+ expect(Object.keys(docIndex!.documents).length).toBeGreaterThanOrEqual(
323
+ 1000,
324
+ )
325
+ expect(Object.keys(sectionIndex!.sections).length).toBeGreaterThanOrEqual(
326
+ 3000,
327
+ )
328
+ })
329
+
330
+ it('processes large corpus in reasonable time', async () => {
331
+ createLargeCorpus(tempDir)
332
+
333
+ const startTime = Date.now()
334
+ const result = await Effect.runPromise(buildIndex(tempDir))
335
+ const duration = Date.now() - startTime
336
+
337
+ // Should complete within reasonable time (adjust based on CI performance)
338
+ expect(duration).toBeLessThan(60_000) // < 60 seconds
339
+ expect(result.duration).toBeGreaterThan(0)
340
+ }, 60000)
341
+ }, 120000)
342
+
343
+ // ==========================================================================
344
+ // Metadata and Binary Format Tests
345
+ // ==========================================================================
346
+
347
+ describe('Metadata and Binary Format Tests', () => {
348
+ it('verifies vectors.meta.bin is created not vectors.meta.json', async () => {
349
+ createSmallCorpus(tempDir)
350
+
351
+ await Effect.runPromise(buildIndex(tempDir))
352
+
353
+ const vectorStore = createVectorStore(tempDir, 512)
354
+ await Effect.runPromise(
355
+ vectorStore.add([
356
+ {
357
+ id: 'test-1',
358
+ sectionId: 'sec-1',
359
+ documentPath: 'test.md',
360
+ heading: 'Test',
361
+ embedding: Array(512).fill(0.1),
362
+ },
363
+ ]),
364
+ )
365
+ await Effect.runPromise(vectorStore.save())
366
+
367
+ const binPath = path.join(tempDir, '.mdcontext', 'vectors.meta.bin')
368
+ const jsonPath = path.join(tempDir, '.mdcontext', 'vectors.meta.json')
369
+
370
+ expect(fileExists(binPath)).toBe(true)
371
+ expect(fileExists(jsonPath)).toBe(false)
372
+ })
373
+
374
+ it('binary metadata can be loaded after saving', async () => {
375
+ createSmallCorpus(tempDir)
376
+
377
+ await Effect.runPromise(buildIndex(tempDir))
378
+
379
+ // Save vector store with data
380
+ const vectorStore1 = createVectorStore(tempDir, 512)
381
+ await Effect.runPromise(
382
+ vectorStore1.add([
383
+ {
384
+ id: 'test-1',
385
+ sectionId: 'sec-1',
386
+ documentPath: 'test.md',
387
+ heading: 'Test',
388
+ embedding: Array(512).fill(0.1),
389
+ },
390
+ ]),
391
+ )
392
+ await Effect.runPromise(vectorStore1.save())
393
+
394
+ // Load vector store
395
+ const vectorStore2 = createVectorStore(tempDir, 512)
396
+ const loadResult = await Effect.runPromise(vectorStore2.load())
397
+
398
+ expect(loadResult.loaded).toBe(true)
399
+ })
400
+
401
+ it('handles metadata size warnings for large corpora', async () => {
402
+ createLargeCorpus(tempDir)
403
+
404
+ await Effect.runPromise(buildIndex(tempDir))
405
+
406
+ const vectorStore = createVectorStore(tempDir, 512)
407
+ await Effect.runPromise(
408
+ vectorStore.add([
409
+ {
410
+ id: 'test-1',
411
+ sectionId: 'sec-1',
412
+ documentPath: 'test.md',
413
+ heading: 'Test',
414
+ embedding: Array(512).fill(0.1),
415
+ },
416
+ ]),
417
+ )
418
+
419
+ // Capture console.warn calls
420
+ const originalWarn = console.warn
421
+ const warnings: string[] = []
422
+ console.warn = (msg: string) => warnings.push(msg)
423
+
424
+ try {
425
+ await Effect.runPromise(vectorStore.save())
426
+
427
+ // For very large corpora (>100MB), a warning should appear
428
+ // This test verifies the warning system works
429
+ const metaPath = path.join(tempDir, '.mdcontext', 'vectors.meta.bin')
430
+ const size = getFileSize(metaPath)
431
+
432
+ if (size > 100_000_000) {
433
+ expect(warnings.some((w) => w.includes('Large metadata'))).toBe(true)
434
+ }
435
+ } finally {
436
+ console.warn = originalWarn
437
+ }
438
+ })
439
+ })
440
+
441
+ // ==========================================================================
442
+ // Vector Store Loading Tests
443
+ // ==========================================================================
444
+
445
+ describe('Vector Store Loading Tests', () => {
446
+ it('vector store loads successfully after index creation', async () => {
447
+ createSmallCorpus(tempDir)
448
+
449
+ await Effect.runPromise(buildIndex(tempDir))
450
+
451
+ const vectorStore = createVectorStore(tempDir, 512)
452
+ await Effect.runPromise(
453
+ vectorStore.add([
454
+ {
455
+ id: 'test-1',
456
+ sectionId: 'sec-1',
457
+ documentPath: 'test.md',
458
+ heading: 'Test',
459
+ embedding: Array(512).fill(0.1),
460
+ },
461
+ ]),
462
+ )
463
+ await Effect.runPromise(vectorStore.save())
464
+
465
+ const loadResult = await Effect.runPromise(vectorStore.load())
466
+
467
+ expect(loadResult.loaded).toBe(true)
468
+ expect(loadResult.hnswMismatch).toBeUndefined()
469
+ })
470
+
471
+ it('detects dimension mismatch on load', async () => {
472
+ createSmallCorpus(tempDir)
473
+
474
+ await Effect.runPromise(buildIndex(tempDir))
475
+
476
+ // Save with 512 dimensions
477
+ const vectorStore1 = createVectorStore(tempDir, 512)
478
+ await Effect.runPromise(
479
+ vectorStore1.add([
480
+ {
481
+ id: 'test-1',
482
+ sectionId: 'sec-1',
483
+ documentPath: 'test.md',
484
+ heading: 'Test',
485
+ embedding: Array(512).fill(0.1),
486
+ },
487
+ ]),
488
+ )
489
+ await Effect.runPromise(vectorStore1.save())
490
+
491
+ // Try to load with different dimensions
492
+ const vectorStore2 = createVectorStore(tempDir, 768)
493
+
494
+ await expect(
495
+ Effect.runPromise(vectorStore2.load()),
496
+ ).rejects.toThrowError()
497
+ })
498
+
499
+ it('returns false loaded status when files do not exist', async () => {
500
+ const vectorStore = createVectorStore(tempDir, 512)
501
+ const loadResult = await Effect.runPromise(vectorStore.load())
502
+
503
+ expect(loadResult.loaded).toBe(false)
504
+ })
505
+
506
+ it('preserves provider metadata across save/load', async () => {
507
+ createSmallCorpus(tempDir)
508
+
509
+ await Effect.runPromise(buildIndex(tempDir))
510
+
511
+ // Save with provider metadata
512
+ const vectorStore1 = createVectorStore(tempDir, 512) as HnswVectorStore
513
+ vectorStore1.setProvider('openai', 'text-embedding-3-small', undefined)
514
+ await Effect.runPromise(
515
+ vectorStore1.add([
516
+ {
517
+ id: 'test-1',
518
+ sectionId: 'sec-1',
519
+ documentPath: 'test.md',
520
+ heading: 'Test',
521
+ embedding: Array(512).fill(0.1),
522
+ },
523
+ ]),
524
+ )
525
+ await Effect.runPromise(vectorStore1.save())
526
+
527
+ // Load and verify metadata preserved
528
+ const vectorStore2 = createVectorStore(tempDir, 512)
529
+ await Effect.runPromise(vectorStore2.load())
530
+ const stats = vectorStore2.getStats()
531
+
532
+ expect(stats.provider).toBe('openai')
533
+ expect(stats.providerModel).toBe('text-embedding-3-small')
534
+ expect(stats.dimensions).toBe(512)
535
+ })
536
+
537
+ it('handles HNSW parameter mismatch detection', async () => {
538
+ createSmallCorpus(tempDir)
539
+
540
+ await Effect.runPromise(buildIndex(tempDir))
541
+
542
+ // Save with specific HNSW params
543
+ const vectorStore1 = createVectorStore(tempDir, 512, {
544
+ m: 16,
545
+ efConstruction: 200,
546
+ })
547
+ await Effect.runPromise(
548
+ vectorStore1.add([
549
+ {
550
+ id: 'test-1',
551
+ sectionId: 'sec-1',
552
+ documentPath: 'test.md',
553
+ heading: 'Test',
554
+ embedding: Array(512).fill(0.1),
555
+ },
556
+ ]),
557
+ )
558
+ await Effect.runPromise(vectorStore1.save())
559
+
560
+ // Load with different HNSW params
561
+ const vectorStore2 = createVectorStore(tempDir, 512, {
562
+ m: 24,
563
+ efConstruction: 256,
564
+ })
565
+ const loadResult = await Effect.runPromise(vectorStore2.load())
566
+
567
+ expect(loadResult.loaded).toBe(true)
568
+ expect(loadResult.hnswMismatch).toBeDefined()
569
+ expect(loadResult.hnswMismatch?.configParams.m).toBe(24)
570
+ expect(loadResult.hnswMismatch?.indexParams.m).toBe(16)
571
+ })
572
+ })
573
+
574
+ // ==========================================================================
575
+ // Edge Cases and Error Handling
576
+ // ==========================================================================
577
+
578
+ describe('Edge Cases and Error Handling', () => {
579
+ it('handles empty corpus gracefully', async () => {
580
+ // Create directory but no files
581
+ fs.mkdirSync(path.join(tempDir, 'empty'), { recursive: true })
582
+
583
+ const result = await Effect.runPromise(buildIndex(tempDir))
584
+
585
+ expect(result.totalDocuments).toBe(0)
586
+ expect(result.totalSections).toBe(0)
587
+ expect(result.errors).toHaveLength(0)
588
+ })
589
+
590
+ it('handles corpus with only hidden files', async () => {
591
+ // Create only hidden files
592
+ createMarkdownFile(
593
+ path.join(tempDir, '.hidden', 'file.md'),
594
+ '# Hidden\n\nHidden file.',
595
+ )
596
+
597
+ const result = await Effect.runPromise(buildIndex(tempDir))
598
+
599
+ expect(result.totalDocuments).toBe(0)
600
+ expect(result.skipped.hidden).toBeGreaterThan(0)
601
+ })
602
+
603
+ it('handles corpus with excluded patterns', async () => {
604
+ createSmallCorpus(tempDir)
605
+
606
+ const result = await Effect.runPromise(
607
+ buildIndex(tempDir, { exclude: ['docs/**'] }),
608
+ )
609
+
610
+ // Should skip docs directory
611
+ expect(result.totalDocuments).toBeLessThan(60)
612
+ expect(result.skipped.excluded).toBeGreaterThan(0)
613
+ })
614
+
615
+ it('handles files with parsing errors', async () => {
616
+ // Create invalid markdown file
617
+ createMarkdownFile(
618
+ path.join(tempDir, 'invalid.md'),
619
+ '# Test\n\nInvalid content',
620
+ )
621
+
622
+ const result = await Effect.runPromise(buildIndex(tempDir))
623
+
624
+ // Should still complete successfully
625
+ expect(result.totalDocuments).toBeGreaterThanOrEqual(0)
626
+ })
627
+
628
+ it('handles .gitignore patterns correctly', async () => {
629
+ createSmallCorpus(tempDir)
630
+
631
+ // Create .gitignore
632
+ fs.writeFileSync(
633
+ path.join(tempDir, '.gitignore'),
634
+ 'docs/\n*.tmp\n',
635
+ 'utf-8',
636
+ )
637
+
638
+ const result = await Effect.runPromise(buildIndex(tempDir))
639
+
640
+ // Should respect .gitignore
641
+ expect(result.skipped.excluded).toBeGreaterThan(0)
642
+ })
643
+
644
+ it('handles .mdcontextignore patterns correctly', async () => {
645
+ createSmallCorpus(tempDir)
646
+
647
+ // Create .mdcontextignore
648
+ fs.writeFileSync(
649
+ path.join(tempDir, '.mdcontextignore'),
650
+ 'guides/\n',
651
+ 'utf-8',
652
+ )
653
+
654
+ const result = await Effect.runPromise(buildIndex(tempDir))
655
+
656
+ // Should respect .mdcontextignore
657
+ expect(result.skipped.excluded).toBeGreaterThan(0)
658
+ })
659
+ })
660
+
661
+ // ==========================================================================
662
+ // Performance and Scalability Tests
663
+ // ==========================================================================
664
+
665
+ describe('Performance and Scalability', () => {
666
+ it('indexes scale linearly with corpus size', async () => {
667
+ // Small corpus baseline
668
+ createSmallCorpus(tempDir)
669
+ const smallResult = await Effect.runPromise(buildIndex(tempDir))
670
+ const smallTimePerDoc = smallResult.duration / smallResult.totalDocuments
671
+
672
+ // Clean and create larger corpus
673
+ fs.rmSync(tempDir, { recursive: true, force: true })
674
+ tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'mdcontext-embed-int-'))
675
+ createLargeCorpus(tempDir)
676
+
677
+ const largeResult = await Effect.runPromise(buildIndex(tempDir))
678
+ const largeTimePerDoc = largeResult.duration / largeResult.totalDocuments
679
+
680
+ // Time per document should be roughly similar (within 3x)
681
+ expect(largeTimePerDoc).toBeLessThan(smallTimePerDoc * 3)
682
+ })
683
+
684
+ it('section index grows proportionally to documents', async () => {
685
+ createSmallCorpus(tempDir)
686
+
687
+ const result = await Effect.runPromise(buildIndex(tempDir))
688
+
689
+ // Each document has ~3 sections, ratio should be reasonable
690
+ const ratio = result.totalSections / result.totalDocuments
691
+ expect(ratio).toBeGreaterThan(2)
692
+ expect(ratio).toBeLessThan(10)
693
+ })
694
+
695
+ it('handles repeated index/rebuild cycles', async () => {
696
+ createSmallCorpus(tempDir)
697
+
698
+ // Run multiple index cycles
699
+ for (let i = 0; i < 5; i++) {
700
+ const result = await Effect.runPromise(
701
+ buildIndex(tempDir, { force: true }),
702
+ )
703
+ expect(result.totalDocuments).toBeGreaterThanOrEqual(60)
704
+ }
705
+
706
+ // Final verification
707
+ const storage = createStorage(tempDir)
708
+ const docIndex = await Effect.runPromise(loadDocumentIndex(storage))
709
+ expect(docIndex).not.toBeNull()
710
+ })
711
+ })
712
+ })