mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
package/docs/DESIGN.md ADDED
@@ -0,0 +1,439 @@
1
+ # Design: @hw/mdcontext
2
+
3
+ ## Data Model
4
+
5
+ ### Document
6
+
7
+ ```typescript
8
+ interface MdDocument {
9
+ readonly id: string; // hash of path
10
+ readonly path: string; // relative to root
11
+ readonly title: string; // first H1 or filename
12
+ readonly frontmatter: Record<string, unknown>;
13
+ readonly sections: readonly MdSection[];
14
+ readonly links: readonly MdLink[];
15
+ readonly codeBlocks: readonly MdCodeBlock[];
16
+ readonly metadata: DocumentMetadata;
17
+ }
18
+
19
+ interface DocumentMetadata {
20
+ readonly wordCount: number;
21
+ readonly tokenCount: number; // estimated
22
+ readonly headingCount: number;
23
+ readonly linkCount: number;
24
+ readonly codeBlockCount: number;
25
+ readonly lastModified: Date;
26
+ readonly indexedAt: Date;
27
+ }
28
+ ```
29
+
30
+ ### Section
31
+
32
+ ```typescript
33
+ interface MdSection {
34
+ readonly id: string; // doc-id + heading slug
35
+ readonly heading: string; // heading text
36
+ readonly level: 1 | 2 | 3 | 4 | 5 | 6;
37
+ readonly content: string; // raw markdown content
38
+ readonly plainText: string; // stripped for embedding
39
+ readonly startLine: number;
40
+ readonly endLine: number;
41
+ readonly children: readonly MdSection[]; // nested sections
42
+ readonly metadata: SectionMetadata;
43
+ }
44
+
45
+ interface SectionMetadata {
46
+ readonly wordCount: number;
47
+ readonly tokenCount: number;
48
+ readonly hasCode: boolean;
49
+ readonly hasList: boolean;
50
+ readonly hasTable: boolean;
51
+ }
52
+ ```
53
+
54
+ ### Link
55
+
56
+ ```typescript
57
+ interface MdLink {
58
+ readonly type: "internal" | "external" | "image";
59
+ readonly href: string;
60
+ readonly text: string;
61
+ readonly sectionId: string; // which section contains this link
62
+ readonly line: number;
63
+ }
64
+ ```
65
+
66
+ ### Code Block
67
+
68
+ ```typescript
69
+ interface MdCodeBlock {
70
+ readonly language: string | null;
71
+ readonly content: string;
72
+ readonly sectionId: string;
73
+ readonly startLine: number;
74
+ readonly endLine: number;
75
+ }
76
+ ```
77
+
78
+ ---
79
+
80
+ ## Index Structure
81
+
82
+ ### File Layout
83
+
84
+ ```
85
+ .mdcontext/
86
+ config.json # Configuration
87
+ indexes/
88
+ documents.json # Document metadata index
89
+ sections.json # Section index
90
+ links.json # Link graph (forward + back)
91
+ vectors.faiss # Embedding vectors
92
+ vectors.meta.json # Vector ID → Section ID mapping
93
+ cache/
94
+ parsed/ # Cached parsed documents
95
+ <hash>.json
96
+ metrics/
97
+ queries.jsonl # Query log
98
+ stats.json # Aggregated stats
99
+ ```
100
+
101
+ ### Document Index
102
+
103
+ ```typescript
104
+ // documents.json
105
+ interface DocumentIndex {
106
+ readonly version: number;
107
+ readonly rootPath: string;
108
+ readonly documents: Record<string, DocumentEntry>;
109
+ }
110
+
111
+ interface DocumentEntry {
112
+ readonly path: string;
113
+ readonly title: string;
114
+ readonly mtime: number;
115
+ readonly hash: string; // content hash for change detection
116
+ readonly tokenCount: number;
117
+ readonly sectionCount: number;
118
+ }
119
+ ```
120
+
121
+ ### Section Index
122
+
123
+ ```typescript
124
+ // sections.json
125
+ interface SectionIndex {
126
+ readonly version: number;
127
+ readonly sections: Record<string, SectionEntry>;
128
+ readonly byHeading: Record<string, string[]>; // heading → section IDs
129
+ readonly byDocument: Record<string, string[]>; // doc ID → section IDs
130
+ }
131
+
132
+ interface SectionEntry {
133
+ readonly documentId: string;
134
+ readonly heading: string;
135
+ readonly level: number;
136
+ readonly startLine: number;
137
+ readonly tokenCount: number;
138
+ }
139
+ ```
140
+
141
+ ### Link Index
142
+
143
+ ```typescript
144
+ // links.json
145
+ interface LinkIndex {
146
+ readonly version: number;
147
+ readonly forward: Record<string, string[]>; // doc → docs it links to
148
+ readonly backward: Record<string, string[]>; // doc → docs that link to it
149
+ readonly broken: string[]; // links to non-existent docs
150
+ }
151
+ ```
152
+
153
+ ---
154
+
155
+ ## Embedding Strategy
156
+
157
+ ### What to Embed
158
+
159
+ | Unit | Pros | Cons |
160
+ | ---------- | --------------------- | --------------------------------------- |
161
+ | Document | Simple, fewer vectors | Too coarse, loses section relevance |
162
+ | Section | Good granularity | Many vectors, section boundaries matter |
163
+ | Paragraph | Fine-grained | Too many vectors, context loss |
164
+ | **Hybrid** | Best of both | More complexity |
165
+
166
+ **Decision: Section-level embeddings with document-level fallback**
167
+
168
+ - Each section gets embedded
169
+ - Very short sections (< 50 tokens) merged with parent
170
+ - Document-level embedding as additional signal
171
+
172
+ ### Embedding Content
173
+
174
+ For each section, embed:
175
+
176
+ ```
177
+ {heading}
178
+
179
+ {plainText first 500 tokens}
180
+
181
+ Parent: {parent heading}
182
+ Document: {document title}
183
+ ```
184
+
185
+ Including parent and document provides hierarchical context.
186
+
187
+ ### Vector Dimensions
188
+
189
+ | Model | Dimensions | Notes |
190
+ | ----------------------------- | ---------- | ----------------------- |
191
+ | OpenAI text-embedding-3-small | 1536 | Good quality, cheap |
192
+ | OpenAI text-embedding-3-large | 3072 | Best quality, expensive |
193
+ | BGE-large-en-v1.5 | 1024 | Local, good quality |
194
+
195
+ **Decision: Start with text-embedding-3-small, make pluggable**
196
+
197
+ ---
198
+
199
+ ## Summarization Strategy
200
+
201
+ ### Hierarchical Compression
202
+
203
+ ```
204
+ Level 0: Raw section content
205
+ ↓ (compress)
206
+ Level 1: Section summary (key sentences, ~20% of original)
207
+ ↓ (compress)
208
+ Level 2: Document summary (combined section summaries)
209
+ ↓ (compress)
210
+ Level 3: Collection summary (key documents, themes)
211
+ ```
212
+
213
+ ### Section Summary Algorithm
214
+
215
+ 1. **Extract key sentences** — First sentence, sentences with keywords, concluding sentence
216
+ 2. **Preserve structure markers** — Keep heading, list item starts, code block presence
217
+ 3. **Token budget** — Target 20% of original, min 50 tokens, max 500 tokens
218
+
219
+ ### Document Summary Template
220
+
221
+ ```markdown
222
+ # {title}
223
+
224
+ ## Overview
225
+
226
+ {first paragraph or extracted thesis}
227
+
228
+ ## Sections
229
+
230
+ - **{heading 1}**: {one-line summary}
231
+ - **{heading 2}**: {one-line summary}
232
+ ...
233
+
234
+ ## Key Points
235
+
236
+ - {extracted key point 1}
237
+ - {extracted key point 2}
238
+
239
+ ## Links
240
+
241
+ - References: {count} internal, {count} external
242
+ - Referenced by: {backlink count} documents
243
+
244
+ **Tokens:** {original} → {summary} ({percent}% reduction)
245
+ ```
246
+
247
+ ---
248
+
249
+ ## Analytics Design
250
+
251
+ ### Metrics Categories
252
+
253
+ #### Performance Metrics
254
+
255
+ | Metric | Type | Labels | Description |
256
+ | ----------------------------------- | --------- | ------- | ------------------------- |
257
+ | `mdcontext_parse_duration_ms` | Histogram | - | Time to parse a document |
258
+ | `mdcontext_index_build_duration_ms` | Histogram | `type` | Time to build index |
259
+ | `mdcontext_query_duration_ms` | Histogram | `type` | Query execution time |
260
+ | `mdcontext_embed_duration_ms` | Histogram | `model` | Embedding generation time |
261
+ | `mdcontext_cache_hit_total` | Counter | `cache` | Cache hits |
262
+ | `mdcontext_cache_miss_total` | Counter | `cache` | Cache misses |
263
+
264
+ #### Usage Metrics
265
+
266
+ | Metric | Type | Labels | Description |
267
+ | ------------------------------- | ------- | ------ | ------------------------ |
268
+ | `mdcontext_queries_total` | Counter | `type` | Total queries |
269
+ | `mdcontext_tokens_input_total` | Counter | - | Tokens sent to embedding |
270
+ | `mdcontext_tokens_output_total` | Counter | - | Tokens in responses |
271
+ | `mdcontext_documents_indexed` | Gauge | - | Documents in index |
272
+ | `mdcontext_sections_indexed` | Gauge | - | Sections in index |
273
+
274
+ #### Quality Metrics
275
+
276
+ | Metric | Type | Labels | Description |
277
+ | ----------------------------------- | --------- | ------- | ------------------------ |
278
+ | `mdcontext_search_results_returned` | Histogram | - | Results per query |
279
+ | `mdcontext_compression_ratio` | Histogram | `level` | Token reduction achieved |
280
+
281
+ ### Query Logging
282
+
283
+ ```typescript
284
+ interface QueryLogEntry {
285
+ readonly timestamp: Date;
286
+ readonly type: "search" | "context" | "structure";
287
+ readonly query: string;
288
+ readonly filters: Record<string, unknown>;
289
+ readonly resultCount: number;
290
+ readonly durationMs: number;
291
+ readonly tokensUsed: number;
292
+ readonly cacheHit: boolean;
293
+ }
294
+ ```
295
+
296
+ Stored as JSONL for easy streaming analysis.
297
+
298
+ ---
299
+
300
+ ## API Design
301
+
302
+ ### Core Functions
303
+
304
+ ```typescript
305
+ // Parsing
306
+ parse(content: string): Effect<MdDocument, ParseError>
307
+ parseFile(path: string): Effect<MdDocument, ParseError | IoError>
308
+
309
+ // Indexing
310
+ index(dir: string, options?: IndexOptions): Effect<IndexResult, IndexError>
311
+ reindex(paths: string[]): Effect<IndexResult, IndexError>
312
+
313
+ // Search
314
+ search(query: string, options?: SearchOptions): Effect<SearchResult[], SearchError>
315
+ structuralSearch(pattern: StructuralPattern): Effect<StructuralResult[], SearchError>
316
+
317
+ // Context
318
+ getContext(path: string, options?: ContextOptions): Effect<Context, ContextError>
319
+ assembleContext(sources: ContextSource[], budget: number): Effect<AssembledContext, ContextError>
320
+
321
+ // Summarization
322
+ summarize(doc: MdDocument, level: SummaryLevel): Effect<Summary, SummarizeError>
323
+
324
+ // Metrics
325
+ getMetrics(): Effect<Metrics, never>
326
+ ```
327
+
328
+ ### Options Types
329
+
330
+ ```typescript
331
+ interface IndexOptions {
332
+ readonly include?: string[]; // Glob patterns
333
+ readonly exclude?: string[]; // Glob patterns
334
+ readonly embeddings?: boolean; // Generate embeddings
335
+ readonly force?: boolean; // Rebuild even if cached
336
+ }
337
+
338
+ interface SearchOptions {
339
+ readonly limit?: number; // Max results (default 10)
340
+ readonly threshold?: number; // Min similarity (default 0.7)
341
+ readonly filter?: SearchFilter; // Structural filters
342
+ }
343
+
344
+ interface SearchFilter {
345
+ readonly paths?: string[]; // Limit to these paths
346
+ readonly headingLevel?: number[]; // Only these heading levels
347
+ readonly hasCode?: boolean; // Sections with code
348
+ readonly minTokens?: number; // Minimum section size
349
+ readonly maxTokens?: number; // Maximum section size
350
+ }
351
+
352
+ interface ContextOptions {
353
+ readonly level?: SummaryLevel; // 'full' | 'summary' | 'brief'
354
+ readonly maxTokens?: number; // Token budget
355
+ readonly sections?: string[]; // Specific sections
356
+ }
357
+ ```
358
+
359
+ ---
360
+
361
+ ## CLI Design
362
+
363
+ ```bash
364
+ # Indexing
365
+ mdcontext index [dir] # Index directory (default: .)
366
+ mdcontext index --watch # Index and watch for changes
367
+ mdcontext index --force # Force full rebuild
368
+
369
+ # Search
370
+ mdcontext search "query" # Semantic search
371
+ mdcontext search "query" --limit 5 # Limit results
372
+ mdcontext search "query" --json # JSON output
373
+
374
+ # Context
375
+ mdcontext context <path> # Full document context
376
+ mdcontext context <path> --brief # Brief summary
377
+ mdcontext context <path> --tokens 500 # Token budget
378
+
379
+ # Structure
380
+ mdcontext structure <path> # Show document structure
381
+ mdcontext structure <path> --tree # Tree view
382
+ mdcontext links <path> # Show link graph
383
+ mdcontext backlinks <path> # What links to this?
384
+
385
+ # Metrics
386
+ mdcontext metrics # Show current metrics
387
+ mdcontext metrics --json # JSON format
388
+ mdcontext metrics --reset # Reset counters
389
+
390
+ # Daemon
391
+ mdcontext daemon # Run as daemon
392
+ mdcontext daemon --port 8765 # Custom port
393
+ ```
394
+
395
+ ---
396
+
397
+ ## MCP Tools
398
+
399
+ ```typescript
400
+ const tools = [
401
+ {
402
+ name: "md_search",
403
+ description: "Search markdown documents by meaning",
404
+ parameters: {
405
+ query: { type: "string", required: true },
406
+ limit: { type: "number", default: 5 },
407
+ path_filter: { type: "string", description: "Glob pattern" },
408
+ },
409
+ },
410
+ {
411
+ name: "md_context",
412
+ description: "Get LLM-ready context from a markdown file",
413
+ parameters: {
414
+ path: { type: "string", required: true },
415
+ level: { type: "string", enum: ["full", "summary", "brief"] },
416
+ max_tokens: { type: "number" },
417
+ },
418
+ },
419
+ {
420
+ name: "md_structure",
421
+ description: "Get the structure/outline of a markdown file",
422
+ parameters: {
423
+ path: { type: "string", required: true },
424
+ },
425
+ },
426
+ {
427
+ name: "md_links",
428
+ description: "Get links to/from a markdown file",
429
+ parameters: {
430
+ path: { type: "string", required: true },
431
+ direction: { type: "string", enum: ["outgoing", "incoming", "both"] },
432
+ },
433
+ },
434
+ ];
435
+ ```
436
+
437
+ ---
438
+
439
+ _Created: 2025-01-18_