mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,684 @@
1
+ /**
2
+ * Indexer service for building and updating indexes
3
+ */
4
+
5
+ import * as fs from 'node:fs/promises'
6
+ import * as path from 'node:path'
7
+ import { Effect } from 'effect'
8
+ import type { Ignore } from 'ignore'
9
+ import type { MdSection } from '../core/types.js'
10
+ import {
11
+ type DirectoryCreateError,
12
+ DirectoryWalkError,
13
+ type FileReadError,
14
+ type FileWriteError,
15
+ type IndexCorruptedError,
16
+ ParseError,
17
+ } from '../errors/index.js'
18
+ import { parse } from '../parser/parser.js'
19
+ import { createIgnoreFilter, shouldIgnore } from './ignore-patterns.js'
20
+ import {
21
+ computeHash,
22
+ createEmptyDocumentIndex,
23
+ createEmptyLinkIndex,
24
+ createEmptySectionIndex,
25
+ createStorage,
26
+ initializeIndex,
27
+ loadDocumentIndex,
28
+ loadLinkIndex,
29
+ loadSectionIndex,
30
+ saveDocumentIndex,
31
+ saveLinkIndex,
32
+ saveSectionIndex,
33
+ } from './storage.js'
34
+ import type {
35
+ DocumentEntry,
36
+ DocumentIndex,
37
+ FileProcessingError,
38
+ IndexResult,
39
+ SectionEntry,
40
+ SkipSummary,
41
+ } from './types.js'
42
+
43
+ // ============================================================================
44
+ // File Discovery
45
+ // ============================================================================
46
+
47
+ const isMarkdownFile = (filename: string): boolean =>
48
+ filename.endsWith('.md') || filename.endsWith('.mdx')
49
+
50
+ /**
51
+ * Result of directory walk including tracked skip counts
52
+ */
53
+ interface WalkResult {
54
+ readonly files: string[]
55
+ readonly skipped: {
56
+ hidden: number
57
+ excluded: number
58
+ }
59
+ }
60
+
61
+ /**
62
+ * Walk directory using ignore filter for pattern matching.
63
+ *
64
+ * @param dir - Directory to walk
65
+ * @param rootPath - Root path for computing relative paths
66
+ * @param filter - Ignore filter instance
67
+ * @returns Walk result with files and skip counts
68
+ */
69
+ const walkDirectory = async (
70
+ dir: string,
71
+ rootPath: string,
72
+ filter: Ignore,
73
+ ): Promise<WalkResult> => {
74
+ const files: string[] = []
75
+ let hiddenCount = 0
76
+ let excludedCount = 0
77
+ const entries = await fs.readdir(dir, { withFileTypes: true })
78
+
79
+ for (const entry of entries) {
80
+ const fullPath = path.join(dir, entry.name)
81
+ const relativePath = path.relative(rootPath, fullPath)
82
+
83
+ // Skip hidden files/directories (starting with .)
84
+ if (entry.name.startsWith('.')) {
85
+ if (entry.isDirectory()) {
86
+ hiddenCount++
87
+ }
88
+ continue
89
+ }
90
+
91
+ // Check ignore filter for both files and directories
92
+ if (shouldIgnore(relativePath, filter)) {
93
+ if (entry.isDirectory()) {
94
+ excludedCount++
95
+ } else {
96
+ excludedCount++
97
+ }
98
+ continue
99
+ }
100
+
101
+ if (entry.isDirectory()) {
102
+ const subResult = await walkDirectory(fullPath, rootPath, filter)
103
+ files.push(...subResult.files)
104
+ hiddenCount += subResult.skipped.hidden
105
+ excludedCount += subResult.skipped.excluded
106
+ } else if (entry.isFile() && isMarkdownFile(entry.name)) {
107
+ files.push(fullPath)
108
+ }
109
+ }
110
+
111
+ return { files, skipped: { hidden: hiddenCount, excluded: excludedCount } }
112
+ }
113
+
114
+ // ============================================================================
115
+ // Section Flattening
116
+ // ============================================================================
117
+
118
+ const flattenSections = (
119
+ sections: readonly MdSection[],
120
+ docId: string,
121
+ docPath: string,
122
+ ): SectionEntry[] => {
123
+ const result: SectionEntry[] = []
124
+
125
+ const traverse = (section: MdSection): void => {
126
+ result.push({
127
+ id: section.id,
128
+ documentId: docId,
129
+ documentPath: docPath,
130
+ heading: section.heading,
131
+ level: section.level,
132
+ startLine: section.startLine,
133
+ endLine: section.endLine,
134
+ tokenCount: section.metadata.tokenCount,
135
+ hasCode: section.metadata.hasCode,
136
+ hasList: section.metadata.hasList,
137
+ hasTable: section.metadata.hasTable,
138
+ })
139
+
140
+ for (const child of section.children) {
141
+ traverse(child)
142
+ }
143
+ }
144
+
145
+ for (const section of sections) {
146
+ traverse(section)
147
+ }
148
+
149
+ return result
150
+ }
151
+
152
+ // ============================================================================
153
+ // Link Resolution
154
+ // ============================================================================
155
+
156
+ const resolveInternalLink = (
157
+ href: string,
158
+ fromPath: string,
159
+ rootPath: string,
160
+ ): string | null => {
161
+ if (href.startsWith('#')) {
162
+ return fromPath
163
+ }
164
+
165
+ if (href.startsWith('http://') || href.startsWith('https://')) {
166
+ return null
167
+ }
168
+
169
+ const linkPath = href.split('#')[0] ?? ''
170
+ if (!linkPath) return null
171
+
172
+ const fromDir = path.dirname(fromPath)
173
+ const resolved = path.resolve(fromDir, linkPath)
174
+
175
+ if (!resolved.startsWith(rootPath)) {
176
+ return null
177
+ }
178
+
179
+ return path.relative(rootPath, resolved)
180
+ }
181
+
182
+ // ============================================================================
183
+ // Index Building
184
+ // ============================================================================
185
+
186
+ export interface IndexProgress {
187
+ readonly current: number
188
+ readonly total: number
189
+ readonly filePath: string
190
+ }
191
+
192
+ export interface IndexOptions {
193
+ readonly force?: boolean | undefined
194
+ /** CLI/config exclude patterns (overrides ignore files) */
195
+ readonly exclude?: readonly string[] | undefined
196
+ /** Whether to honor .gitignore (default: true) */
197
+ readonly honorGitignore?: boolean | undefined
198
+ /** Whether to honor .mdcontextignore (default: true) */
199
+ readonly honorMdcontextignore?: boolean | undefined
200
+ /** Callback for progress updates during file indexing */
201
+ readonly onProgress?: ((progress: IndexProgress) => void) | undefined
202
+ }
203
+
204
+ export const buildIndex = (
205
+ rootPath: string,
206
+ options: IndexOptions = {},
207
+ ): Effect.Effect<
208
+ IndexResult,
209
+ | DirectoryWalkError
210
+ | DirectoryCreateError
211
+ | FileReadError
212
+ | FileWriteError
213
+ | IndexCorruptedError
214
+ > =>
215
+ Effect.gen(function* () {
216
+ const startTime = Date.now()
217
+ const storage = createStorage(rootPath)
218
+ const errors: FileProcessingError[] = []
219
+
220
+ // Initialize storage
221
+ yield* initializeIndex(storage)
222
+
223
+ // Load existing indexes or create empty ones
224
+ const existingDocIndex = yield* loadDocumentIndex(storage)
225
+ const docIndex: DocumentIndex =
226
+ options.force || !existingDocIndex
227
+ ? createEmptyDocumentIndex(storage.rootPath)
228
+ : existingDocIndex
229
+
230
+ // Load existing section and link indexes to preserve data for unchanged files
231
+ const existingSectionIndex = yield* loadSectionIndex(storage)
232
+ const existingLinkIndex = yield* loadLinkIndex(storage)
233
+ const sectionIndex = existingSectionIndex ?? createEmptySectionIndex()
234
+ const linkIndex = existingLinkIndex ?? createEmptyLinkIndex()
235
+
236
+ // Build ignore filter with proper precedence:
237
+ // CLI/config patterns > .mdcontextignore > .gitignore > defaults
238
+ const ignoreResult = yield* createIgnoreFilter({
239
+ rootPath: storage.rootPath,
240
+ cliPatterns: options.exclude,
241
+ honorGitignore: options.honorGitignore ?? true,
242
+ honorMdcontextignore: options.honorMdcontextignore ?? true,
243
+ })
244
+
245
+ // Discover files using the ignore filter
246
+ const walkResult = yield* Effect.tryPromise({
247
+ try: () =>
248
+ walkDirectory(storage.rootPath, storage.rootPath, ignoreResult.filter),
249
+ catch: (e) =>
250
+ new DirectoryWalkError({
251
+ path: storage.rootPath,
252
+ message: `Failed to traverse directory: ${e instanceof Error ? e.message : String(e)}`,
253
+ cause: e,
254
+ }),
255
+ })
256
+
257
+ const { files, skipped: walkSkipped } = walkResult
258
+
259
+ // Process each file
260
+ let documentsIndexed = 0
261
+ let sectionsIndexed = 0
262
+ let linksIndexed = 0
263
+ let unchangedCount = 0
264
+
265
+ const mutableDocuments: Record<string, DocumentEntry> = {
266
+ ...docIndex.documents,
267
+ }
268
+ const mutableSections: Record<string, SectionEntry> = {
269
+ ...sectionIndex.sections,
270
+ }
271
+
272
+ const mutableByHeading: Record<string, string[]> = Object.assign(
273
+ Object.create(null),
274
+ Object.fromEntries(
275
+ Object.entries(sectionIndex.byHeading).map(([k, v]) => [k, [...v]]),
276
+ ),
277
+ )
278
+ const mutableByDocument: Record<string, string[]> = Object.assign(
279
+ Object.create(null),
280
+ Object.fromEntries(
281
+ Object.entries(sectionIndex.byDocument).map(([k, v]) => [k, [...v]]),
282
+ ),
283
+ )
284
+ const mutableForward: Record<string, string[]> = Object.assign(
285
+ Object.create(null),
286
+ Object.fromEntries(
287
+ Object.entries(linkIndex.forward).map(([k, v]) => [k, [...v]]),
288
+ ),
289
+ )
290
+ const mutableBackward: Record<string, string[]> = Object.assign(
291
+ Object.create(null),
292
+ Object.fromEntries(
293
+ Object.entries(linkIndex.backward).map(([k, v]) => [k, [...v]]),
294
+ ),
295
+ )
296
+ const brokenLinks: string[] = [...linkIndex.broken]
297
+ const totalFiles = files.length
298
+
299
+ for (let fileIndex = 0; fileIndex < files.length; fileIndex++) {
300
+ const filePath = files[fileIndex]!
301
+ const relativePath = path.relative(storage.rootPath, filePath)
302
+
303
+ // Report progress
304
+ if (options.onProgress) {
305
+ options.onProgress({
306
+ current: fileIndex + 1,
307
+ total: totalFiles,
308
+ filePath: relativePath,
309
+ })
310
+ }
311
+
312
+ // Process each file, collecting errors instead of failing
313
+ const processFile = Effect.gen(function* () {
314
+ // Read file content and stats
315
+ const [content, stats] = yield* Effect.promise(() =>
316
+ Promise.all([fs.readFile(filePath, 'utf-8'), fs.stat(filePath)]),
317
+ )
318
+
319
+ const hash = computeHash(content)
320
+ const existingEntry = mutableDocuments[relativePath]
321
+
322
+ // Skip if unchanged
323
+ if (
324
+ !options.force &&
325
+ existingEntry &&
326
+ existingEntry.hash === hash &&
327
+ existingEntry.mtime === stats.mtime.getTime()
328
+ ) {
329
+ unchangedCount++
330
+ return // File unchanged, skip processing
331
+ }
332
+
333
+ // Parse document
334
+ const doc = yield* parse(content, {
335
+ path: relativePath,
336
+ lastModified: stats.mtime,
337
+ }).pipe(
338
+ Effect.mapError(
339
+ (e) =>
340
+ new ParseError({
341
+ message: e.message,
342
+ path: relativePath,
343
+ ...(e.line !== undefined && { line: e.line }),
344
+ ...(e.column !== undefined && { column: e.column }),
345
+ }),
346
+ ),
347
+ )
348
+
349
+ // Clean up old sections for this document before adding new ones
350
+ if (existingEntry) {
351
+ const oldSectionIds = mutableByDocument[existingEntry.id] ?? []
352
+ for (const sectionId of oldSectionIds) {
353
+ const oldSection = mutableSections[sectionId]
354
+ if (oldSection) {
355
+ // Remove from byHeading
356
+ const headingKey = oldSection.heading.toLowerCase()
357
+ const headingList = mutableByHeading[headingKey]
358
+ if (headingList) {
359
+ const idx = headingList.indexOf(sectionId)
360
+ if (idx !== -1) headingList.splice(idx, 1)
361
+ }
362
+ }
363
+ delete mutableSections[sectionId]
364
+ }
365
+ delete mutableByDocument[existingEntry.id]
366
+
367
+ // Clean up old links
368
+ delete mutableForward[relativePath]
369
+ }
370
+
371
+ // Update document index
372
+ mutableDocuments[relativePath] = {
373
+ id: doc.id,
374
+ path: relativePath,
375
+ title: doc.title,
376
+ mtime: stats.mtime.getTime(),
377
+ hash,
378
+ tokenCount: doc.metadata.tokenCount,
379
+ sectionCount: doc.metadata.headingCount,
380
+ }
381
+
382
+ documentsIndexed++
383
+
384
+ // Update section index
385
+ const sections = flattenSections(doc.sections, doc.id, relativePath)
386
+ mutableByDocument[doc.id] = []
387
+
388
+ for (const section of sections) {
389
+ mutableSections[section.id] = section
390
+ mutableByDocument[doc.id]?.push(section.id)
391
+
392
+ // Index by heading
393
+ const headingKey = section.heading.toLowerCase()
394
+ if (!mutableByHeading[headingKey]) {
395
+ mutableByHeading[headingKey] = []
396
+ }
397
+ mutableByHeading[headingKey]?.push(section.id)
398
+
399
+ sectionsIndexed++
400
+ }
401
+
402
+ // Update link index
403
+ const internalLinks = doc.links.filter((l) => l.type === 'internal')
404
+ const outgoingLinks: string[] = []
405
+
406
+ for (const link of internalLinks) {
407
+ const target = resolveInternalLink(
408
+ link.href,
409
+ filePath,
410
+ storage.rootPath,
411
+ )
412
+
413
+ if (target) {
414
+ outgoingLinks.push(target)
415
+
416
+ // Add to backward links
417
+ if (!mutableBackward[target]) {
418
+ mutableBackward[target] = []
419
+ }
420
+ if (!mutableBackward[target]?.includes(relativePath)) {
421
+ mutableBackward[target]?.push(relativePath)
422
+ }
423
+
424
+ linksIndexed++
425
+ }
426
+ }
427
+
428
+ mutableForward[relativePath] = outgoingLinks
429
+ }).pipe(
430
+ // Note: catchAll is intentional for batch file processing.
431
+ // Individual file failures should be collected in errors array
432
+ // rather than stopping the entire index build operation.
433
+ Effect.catchAll((error) => {
434
+ // Extract message from typed errors or generic errors
435
+ const message =
436
+ 'message' in error && typeof error.message === 'string'
437
+ ? error.message
438
+ : String(error)
439
+ errors.push({
440
+ path: relativePath,
441
+ message,
442
+ })
443
+ return Effect.void
444
+ }),
445
+ )
446
+
447
+ yield* processFile
448
+ }
449
+
450
+ // Check for broken links
451
+ for (const [_from, targets] of Object.entries(mutableForward)) {
452
+ for (const target of targets) {
453
+ if (!mutableDocuments[target] && !brokenLinks.includes(target)) {
454
+ brokenLinks.push(target)
455
+ }
456
+ }
457
+ }
458
+
459
+ // Save indexes
460
+ yield* saveDocumentIndex(storage, {
461
+ version: docIndex.version,
462
+ rootPath: storage.rootPath,
463
+ documents: mutableDocuments,
464
+ })
465
+
466
+ yield* saveSectionIndex(storage, {
467
+ version: sectionIndex.version,
468
+ sections: mutableSections,
469
+ byHeading: mutableByHeading,
470
+ byDocument: mutableByDocument,
471
+ })
472
+
473
+ yield* saveLinkIndex(storage, {
474
+ version: linkIndex.version,
475
+ forward: mutableForward,
476
+ backward: mutableBackward,
477
+ broken: brokenLinks,
478
+ })
479
+
480
+ const duration = Date.now() - startTime
481
+
482
+ // Calculate totals for all links across all forward entries
483
+ const totalLinks = Object.values(mutableForward).reduce(
484
+ (sum, links) => sum + links.length,
485
+ 0,
486
+ )
487
+
488
+ // Build skip summary
489
+ const skipped: SkipSummary = {
490
+ unchanged: unchangedCount,
491
+ excluded: walkSkipped.excluded,
492
+ hidden: walkSkipped.hidden,
493
+ total: unchangedCount + walkSkipped.excluded + walkSkipped.hidden,
494
+ }
495
+
496
+ return {
497
+ documentsIndexed,
498
+ sectionsIndexed,
499
+ linksIndexed,
500
+ totalDocuments: Object.keys(mutableDocuments).length,
501
+ totalSections: Object.keys(mutableSections).length,
502
+ totalLinks,
503
+ duration,
504
+ errors,
505
+ skipped,
506
+ }
507
+ })
508
+
509
+ // ============================================================================
510
+ // Link Queries
511
+ // ============================================================================
512
+
513
+ export const getOutgoingLinks = (
514
+ rootPath: string,
515
+ filePath: string,
516
+ ): Effect.Effect<readonly string[], FileReadError | IndexCorruptedError> =>
517
+ Effect.gen(function* () {
518
+ const storage = createStorage(rootPath)
519
+ const linkIndex = yield* loadLinkIndex(storage)
520
+
521
+ if (!linkIndex) {
522
+ return []
523
+ }
524
+
525
+ const relativePath = path.relative(storage.rootPath, path.resolve(filePath))
526
+ return linkIndex.forward[relativePath] ?? []
527
+ })
528
+
529
+ export const getIncomingLinks = (
530
+ rootPath: string,
531
+ filePath: string,
532
+ ): Effect.Effect<readonly string[], FileReadError | IndexCorruptedError> =>
533
+ Effect.gen(function* () {
534
+ const storage = createStorage(rootPath)
535
+ const linkIndex = yield* loadLinkIndex(storage)
536
+
537
+ if (!linkIndex) {
538
+ return []
539
+ }
540
+
541
+ const relativePath = path.relative(storage.rootPath, path.resolve(filePath))
542
+ return linkIndex.backward[relativePath] ?? []
543
+ })
544
+
545
+ export const getBrokenLinks = (
546
+ rootPath: string,
547
+ ): Effect.Effect<readonly string[], FileReadError | IndexCorruptedError> =>
548
+ Effect.gen(function* () {
549
+ const storage = createStorage(rootPath)
550
+ const linkIndex = yield* loadLinkIndex(storage)
551
+
552
+ if (!linkIndex) {
553
+ return []
554
+ }
555
+
556
+ return linkIndex.broken
557
+ })
558
+
559
+ // ============================================================================
560
+ // BM25 Index Building
561
+ // ============================================================================
562
+
563
+ import { type BM25Document, createBM25Store } from '../search/bm25-store.js'
564
+
565
+ export interface BuildBM25Options {
566
+ readonly force?: boolean
567
+ readonly onProgress?: (progress: { current: number; total: number }) => void
568
+ }
569
+
570
+ export interface BuildBM25Result {
571
+ readonly sectionsIndexed: number
572
+ readonly duration: number
573
+ }
574
+
575
+ /**
576
+ * Build BM25 keyword index for all sections.
577
+ *
578
+ * @param rootPath - Root directory containing indexed markdown files
579
+ * @param options - Build options (force rebuild, progress callback)
580
+ * @returns Result with section count and timing
581
+ */
582
+ export const buildBM25Index = (
583
+ rootPath: string,
584
+ options: BuildBM25Options = {},
585
+ ): Effect.Effect<
586
+ BuildBM25Result,
587
+ FileReadError | IndexCorruptedError | FileWriteError
588
+ > =>
589
+ Effect.gen(function* () {
590
+ const startTime = Date.now()
591
+ const storage = createStorage(rootPath)
592
+
593
+ // Load section index
594
+ const docIndex = yield* loadDocumentIndex(storage)
595
+ const sectionIndex = yield* loadSectionIndex(storage)
596
+
597
+ if (!docIndex || !sectionIndex) {
598
+ return { sectionsIndexed: 0, duration: 0 }
599
+ }
600
+
601
+ // Create BM25 store
602
+ const bm25Store = createBM25Store(storage.rootPath)
603
+
604
+ // Check if we can skip
605
+ if (!options.force) {
606
+ const loaded = yield* bm25Store.load()
607
+ if (loaded) {
608
+ const stats = bm25Store.getStats()
609
+ if (stats.count > 0) {
610
+ return { sectionsIndexed: 0, duration: Date.now() - startTime }
611
+ }
612
+ }
613
+ }
614
+
615
+ // Clear and rebuild
616
+ bm25Store.clear()
617
+
618
+ // Group sections by document for efficient file reading
619
+ const sectionsByDoc: Map<string, SectionEntry[]> = new Map()
620
+ for (const section of Object.values(sectionIndex.sections)) {
621
+ if (section.tokenCount < 10) continue
622
+ const existing = sectionsByDoc.get(section.documentPath)
623
+ if (existing) {
624
+ existing.push(section)
625
+ } else {
626
+ sectionsByDoc.set(section.documentPath, [section])
627
+ }
628
+ }
629
+
630
+ const totalDocs = sectionsByDoc.size
631
+ let processedDocs = 0
632
+ let sectionsIndexed = 0
633
+
634
+ // Process each document
635
+ for (const [docPath, sections] of sectionsByDoc) {
636
+ const filePath = path.join(storage.rootPath, docPath)
637
+
638
+ // Read file content
639
+ const fileContentResult = yield* Effect.promise(() =>
640
+ fs.readFile(filePath, 'utf-8'),
641
+ ).pipe(
642
+ Effect.map((content) => ({ ok: true as const, content })),
643
+ Effect.catchAll(() =>
644
+ Effect.succeed({ ok: false as const, content: '' }),
645
+ ),
646
+ )
647
+
648
+ if (!fileContentResult.ok) continue
649
+
650
+ const lines = fileContentResult.content.split('\n')
651
+ const docs: BM25Document[] = []
652
+
653
+ for (const section of sections) {
654
+ const content = lines
655
+ .slice(section.startLine - 1, section.endLine)
656
+ .join('\n')
657
+
658
+ docs.push({
659
+ id: section.id,
660
+ sectionId: section.id,
661
+ documentPath: section.documentPath,
662
+ heading: section.heading,
663
+ content,
664
+ })
665
+ sectionsIndexed++
666
+ }
667
+
668
+ yield* bm25Store.add(docs)
669
+
670
+ processedDocs++
671
+ if (options.onProgress) {
672
+ options.onProgress({ current: processedDocs, total: totalDocs })
673
+ }
674
+ }
675
+
676
+ // Consolidate and save
677
+ yield* bm25Store.consolidate()
678
+ yield* bm25Store.save()
679
+
680
+ return {
681
+ sectionsIndexed,
682
+ duration: Date.now() - startTime,
683
+ }
684
+ })