mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,291 @@
1
+ import { Effect } from 'effect'
2
+ import { describe, expect, it } from 'vitest'
3
+ import { parse } from './parser.js'
4
+
5
+ describe('markdown parser', () => {
6
+ describe('basic parsing', () => {
7
+ it('parses a simple markdown document', async () => {
8
+ const content = `# Hello World
9
+
10
+ This is a paragraph.
11
+
12
+ ## Section One
13
+
14
+ Content for section one.
15
+
16
+ ## Section Two
17
+
18
+ Content for section two.
19
+ `
20
+
21
+ const result = await Effect.runPromise(parse(content))
22
+
23
+ expect(result.title).toBe('Hello World')
24
+ expect(result.sections).toHaveLength(1) // One H1 as root
25
+ expect(result.sections[0]?.heading).toBe('Hello World')
26
+ expect(result.sections[0]?.children).toHaveLength(2) // Two H2 children
27
+ })
28
+
29
+ it('extracts frontmatter', async () => {
30
+ const content = `---
31
+ title: Custom Title
32
+ author: Test Author
33
+ tags:
34
+ - markdown
35
+ - parser
36
+ ---
37
+
38
+ # Heading
39
+
40
+ Content here.
41
+ `
42
+
43
+ const result = await Effect.runPromise(parse(content))
44
+
45
+ expect(result.frontmatter).toEqual({
46
+ title: 'Custom Title',
47
+ author: 'Test Author',
48
+ tags: ['markdown', 'parser'],
49
+ })
50
+ expect(result.title).toBe('Heading') // H1 takes precedence
51
+ })
52
+
53
+ it('uses frontmatter title when no H1 present', async () => {
54
+ const content = `---
55
+ title: Frontmatter Title
56
+ ---
57
+
58
+ ## Only an H2
59
+
60
+ Some content.
61
+ `
62
+
63
+ const result = await Effect.runPromise(parse(content))
64
+
65
+ expect(result.title).toBe('Frontmatter Title')
66
+ })
67
+
68
+ it('handles malformed YAML frontmatter gracefully', async () => {
69
+ const content = `---
70
+ title: Valid Start
71
+ But this is not valid YAML:
72
+ - missing colon here
73
+ invalid: [unclosed bracket
74
+ ---
75
+
76
+ # Actual Content
77
+
78
+ This should still parse.
79
+ `
80
+
81
+ const result = await Effect.runPromise(parse(content))
82
+
83
+ // Should not throw, should parse with empty frontmatter
84
+ expect(result.frontmatter).toEqual({})
85
+ expect(result.title).toBe('Actual Content')
86
+ })
87
+ })
88
+
89
+ describe('section hierarchy', () => {
90
+ it('builds proper section hierarchy', async () => {
91
+ const content = `# Root
92
+
93
+ ## Level 2 A
94
+
95
+ ### Level 3 A1
96
+
97
+ Content
98
+
99
+ ### Level 3 A2
100
+
101
+ Content
102
+
103
+ ## Level 2 B
104
+
105
+ Content
106
+ `
107
+
108
+ const result = await Effect.runPromise(parse(content))
109
+
110
+ expect(result.sections).toHaveLength(1)
111
+
112
+ const root = result.sections[0]!
113
+ expect(root.heading).toBe('Root')
114
+ expect(root.children).toHaveLength(2)
115
+
116
+ const level2A = root.children[0]!
117
+ expect(level2A.heading).toBe('Level 2 A')
118
+ expect(level2A.children).toHaveLength(2)
119
+
120
+ const level3A1 = level2A.children[0]!
121
+ expect(level3A1.heading).toBe('Level 3 A1')
122
+ expect(level3A1.children).toHaveLength(0)
123
+ })
124
+ })
125
+
126
+ describe('links', () => {
127
+ it('extracts internal links', async () => {
128
+ const content = `# Links
129
+
130
+ Check out [other doc](./other.md).
131
+
132
+ And [section link](#section).
133
+ `
134
+
135
+ const result = await Effect.runPromise(parse(content))
136
+
137
+ expect(result.links).toHaveLength(2)
138
+ expect(result.links[0]?.type).toBe('internal')
139
+ expect(result.links[0]?.href).toBe('./other.md')
140
+ expect(result.links[1]?.type).toBe('internal')
141
+ expect(result.links[1]?.href).toBe('#section')
142
+ })
143
+
144
+ it('extracts external links', async () => {
145
+ const content = `# External Links
146
+
147
+ Visit [Google](https://google.com).
148
+ `
149
+
150
+ const result = await Effect.runPromise(parse(content))
151
+
152
+ expect(result.links).toHaveLength(1)
153
+ expect(result.links[0]?.type).toBe('external')
154
+ expect(result.links[0]?.href).toBe('https://google.com')
155
+ })
156
+
157
+ it('extracts image links', async () => {
158
+ const content = `# Images
159
+
160
+ ![Alt text](./image.png)
161
+ `
162
+
163
+ const result = await Effect.runPromise(parse(content))
164
+
165
+ expect(result.links).toHaveLength(1)
166
+ expect(result.links[0]?.type).toBe('image')
167
+ expect(result.links[0]?.text).toBe('Alt text')
168
+ })
169
+ })
170
+
171
+ describe('code blocks', () => {
172
+ it('extracts code blocks with language', async () => {
173
+ const content = `# Code
174
+
175
+ \`\`\`typescript
176
+ const x = 1;
177
+ \`\`\`
178
+ `
179
+
180
+ const result = await Effect.runPromise(parse(content))
181
+
182
+ expect(result.codeBlocks).toHaveLength(1)
183
+ expect(result.codeBlocks[0]?.language).toBe('typescript')
184
+ expect(result.codeBlocks[0]?.content).toBe('const x = 1;')
185
+ })
186
+
187
+ it('extracts code blocks without language', async () => {
188
+ const content = `# Code
189
+
190
+ \`\`\`
191
+ plain text
192
+ \`\`\`
193
+ `
194
+
195
+ const result = await Effect.runPromise(parse(content))
196
+
197
+ expect(result.codeBlocks).toHaveLength(1)
198
+ expect(result.codeBlocks[0]?.language).toBeNull()
199
+ })
200
+ })
201
+
202
+ describe('GFM features', () => {
203
+ it('detects tables in sections', async () => {
204
+ const content = `# Tables
205
+
206
+ | Header 1 | Header 2 |
207
+ | -------- | -------- |
208
+ | Cell 1 | Cell 2 |
209
+ `
210
+
211
+ const result = await Effect.runPromise(parse(content))
212
+
213
+ expect(result.sections[0]?.metadata.hasTable).toBe(true)
214
+ })
215
+
216
+ it('detects lists in sections', async () => {
217
+ const content = `# Lists
218
+
219
+ - Item 1
220
+ - Item 2
221
+ - Item 3
222
+ `
223
+
224
+ const result = await Effect.runPromise(parse(content))
225
+
226
+ expect(result.sections[0]?.metadata.hasList).toBe(true)
227
+ })
228
+
229
+ it('detects task lists', async () => {
230
+ const content = `# Tasks
231
+
232
+ - [ ] Todo item
233
+ - [x] Completed item
234
+ `
235
+
236
+ const result = await Effect.runPromise(parse(content))
237
+
238
+ expect(result.sections[0]?.metadata.hasList).toBe(true)
239
+ })
240
+ })
241
+
242
+ describe('metadata', () => {
243
+ it('counts tokens and words', async () => {
244
+ const content = `# Document
245
+
246
+ This is some text content for testing token counting.
247
+ `
248
+
249
+ const result = await Effect.runPromise(parse(content))
250
+
251
+ expect(result.metadata.tokenCount).toBeGreaterThan(0)
252
+ expect(result.metadata.wordCount).toBeGreaterThan(0)
253
+ })
254
+
255
+ it('counts links and code blocks', async () => {
256
+ const content = `# Test
257
+
258
+ [Link 1](./a.md)
259
+ [Link 2](./b.md)
260
+
261
+ \`\`\`js
262
+ code
263
+ \`\`\`
264
+
265
+ \`\`\`py
266
+ code
267
+ \`\`\`
268
+ `
269
+
270
+ const result = await Effect.runPromise(parse(content))
271
+
272
+ expect(result.metadata.linkCount).toBe(2)
273
+ expect(result.metadata.codeBlockCount).toBe(2)
274
+ })
275
+
276
+ it('counts headings', async () => {
277
+ const content = `# H1
278
+
279
+ ## H2
280
+
281
+ ### H3
282
+
283
+ ## Another H2
284
+ `
285
+
286
+ const result = await Effect.runPromise(parse(content))
287
+
288
+ expect(result.metadata.headingCount).toBe(4)
289
+ })
290
+ })
291
+ })
@@ -0,0 +1,394 @@
1
+ /**
2
+ * Markdown parser using remark/unified
3
+ * Handles GFM (tables, task lists) and YAML frontmatter
4
+ */
5
+
6
+ import * as crypto from 'node:crypto'
7
+ import { Effect } from 'effect'
8
+ import matter from 'gray-matter'
9
+ import type { Code, Heading, Image, Link, Parent, Root, Text } from 'mdast'
10
+ import remarkGfm from 'remark-gfm'
11
+ import remarkParse from 'remark-parse'
12
+ import { unified } from 'unified'
13
+ import { visit } from 'unist-util-visit'
14
+
15
+ import type {
16
+ DocumentMetadata,
17
+ HeadingLevel,
18
+ MdCodeBlock,
19
+ MdDocument,
20
+ MdLink,
21
+ MdSection,
22
+ ParseError,
23
+ } from '../core/types.js'
24
+ import { FileReadError } from '../errors/index.js'
25
+ import { countTokensApprox, countWords } from '../utils/tokens.js'
26
+
27
+ // ============================================================================
28
+ // Parser Configuration
29
+ // ============================================================================
30
+
31
+ const processor = unified().use(remarkParse).use(remarkGfm)
32
+
33
+ // ============================================================================
34
+ // Helper Functions
35
+ // ============================================================================
36
+
37
+ const generateId = (input: string): string => {
38
+ return crypto.createHash('md5').update(input).digest('hex').slice(0, 12)
39
+ }
40
+
41
+ const slugify = (text: string): string => {
42
+ return text
43
+ .toLowerCase()
44
+ .replace(/[^\w\s-]/g, '')
45
+ .replace(/\s+/g, '-')
46
+ .replace(/-+/g, '-')
47
+ .trim()
48
+ }
49
+
50
+ const isInternalLink = (href: string): boolean => {
51
+ if (href.startsWith('http://') || href.startsWith('https://')) return false
52
+ if (href.startsWith('mailto:')) return false
53
+ if (href.startsWith('#')) return true
54
+ if (href.endsWith('.md') || href.includes('.md#')) return true
55
+ return !href.includes('://')
56
+ }
57
+
58
+ const extractPlainText = (node: Parent | Root): string => {
59
+ const texts: string[] = []
60
+ visit(node, 'text', (textNode: Text) => {
61
+ texts.push(textNode.value)
62
+ })
63
+ return texts.join(' ')
64
+ }
65
+
66
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
67
+ const getNodeEndLine = (node: any): number => {
68
+ return node?.position?.end?.line ?? 0
69
+ }
70
+
71
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
72
+ const getNodeStartLine = (node: any): number => {
73
+ return node?.position?.start?.line ?? 0
74
+ }
75
+
76
+ // ============================================================================
77
+ // Section Extraction
78
+ // ============================================================================
79
+
80
+ interface RawSection {
81
+ heading: string
82
+ level: HeadingLevel
83
+ startLine: number
84
+ endLine: number
85
+ contentStartLine: number
86
+ contentNodes: unknown[]
87
+ }
88
+
89
+ const extractRawSections = (tree: Root): RawSection[] => {
90
+ const sections: RawSection[] = []
91
+ const headings: {
92
+ heading: string
93
+ level: HeadingLevel
94
+ line: number
95
+ index: number
96
+ }[] = []
97
+
98
+ // First pass: collect all headings with their positions
99
+ tree.children.forEach((node, index) => {
100
+ if (node.type === 'heading') {
101
+ const heading = node as Heading
102
+ headings.push({
103
+ heading: extractPlainText(heading),
104
+ level: heading.depth as HeadingLevel,
105
+ line: getNodeStartLine(node),
106
+ index,
107
+ })
108
+ }
109
+ })
110
+
111
+ // Second pass: create sections from headings
112
+ headings.forEach((h, i) => {
113
+ const nextHeading = headings[i + 1]
114
+ const endIndex = nextHeading ? nextHeading.index : tree.children.length
115
+
116
+ // Get content nodes between this heading and the next
117
+ const contentNodes = tree.children.slice(h.index + 1, endIndex)
118
+ const lastContentNode = contentNodes[contentNodes.length - 1]
119
+ const endLine = lastContentNode ? getNodeEndLine(lastContentNode) : h.line
120
+
121
+ sections.push({
122
+ heading: h.heading,
123
+ level: h.level,
124
+ startLine: h.line,
125
+ endLine,
126
+ contentStartLine: h.line + 1,
127
+ contentNodes,
128
+ })
129
+ })
130
+
131
+ return sections
132
+ }
133
+
134
+ const buildSectionHierarchy = (
135
+ rawSections: RawSection[],
136
+ docId: string,
137
+ lines: string[],
138
+ ): MdSection[] => {
139
+ const result: MdSection[] = []
140
+ const stack: { section: MdSection; level: number }[] = []
141
+
142
+ for (const raw of rawSections) {
143
+ const contentLines = lines.slice(raw.startLine - 1, raw.endLine)
144
+ const content = contentLines.join('\n')
145
+ const plainText = extractSectionPlainText(raw.contentNodes as Parent[])
146
+
147
+ const hasCode = (raw.contentNodes as { type: string }[]).some(
148
+ (n) => n.type === 'code',
149
+ )
150
+ const hasList = (raw.contentNodes as { type: string }[]).some(
151
+ (n) => n.type === 'list',
152
+ )
153
+ const hasTable = (raw.contentNodes as { type: string }[]).some(
154
+ (n) => n.type === 'table',
155
+ )
156
+
157
+ const section: MdSection = {
158
+ id: `${docId}-${slugify(raw.heading)}`,
159
+ heading: raw.heading,
160
+ level: raw.level,
161
+ content,
162
+ plainText,
163
+ startLine: raw.startLine,
164
+ endLine: raw.endLine,
165
+ children: [],
166
+ metadata: {
167
+ wordCount: countWords(plainText),
168
+ tokenCount: countTokensApprox(content),
169
+ hasCode,
170
+ hasList,
171
+ hasTable,
172
+ },
173
+ }
174
+
175
+ // Build hierarchy: find parent for this section
176
+ while (stack.length > 0 && stack[stack.length - 1]!.level >= raw.level) {
177
+ stack.pop()
178
+ }
179
+
180
+ if (stack.length === 0) {
181
+ result.push(section)
182
+ } else {
183
+ const parent = stack[stack.length - 1]!
184
+ ;(parent.section.children as MdSection[]).push(section)
185
+ }
186
+
187
+ stack.push({ section, level: raw.level })
188
+ }
189
+
190
+ return result
191
+ }
192
+
193
+ const extractSectionPlainText = (nodes: Parent[]): string => {
194
+ const texts: string[] = []
195
+ for (const node of nodes) {
196
+ if ('value' in node && typeof node.value === 'string') {
197
+ texts.push(node.value)
198
+ } else if ('children' in node) {
199
+ texts.push(extractPlainText(node))
200
+ }
201
+ }
202
+ return texts.join(' ')
203
+ }
204
+
205
+ const countAllSections = (sections: MdSection[]): number => {
206
+ let count = 0
207
+ for (const section of sections) {
208
+ count += 1
209
+ count += countAllSections(section.children as MdSection[])
210
+ }
211
+ return count
212
+ }
213
+
214
+ // ============================================================================
215
+ // Link Extraction
216
+ // ============================================================================
217
+
218
+ const extractLinks = (tree: Root, docId: string): MdLink[] => {
219
+ const links: MdLink[] = []
220
+ let currentSectionId = docId
221
+
222
+ visit(tree, (node) => {
223
+ if (node.type === 'heading') {
224
+ currentSectionId = `${docId}-${slugify(extractPlainText(node as Heading))}`
225
+ }
226
+
227
+ if (node.type === 'link') {
228
+ const link = node as Link
229
+ const internal = isInternalLink(link.url)
230
+ links.push({
231
+ type: internal ? 'internal' : 'external',
232
+ href: link.url,
233
+ text: extractPlainText(link),
234
+ sectionId: currentSectionId,
235
+ line: getNodeStartLine(node),
236
+ })
237
+ }
238
+
239
+ if (node.type === 'image') {
240
+ const img = node as Image
241
+ links.push({
242
+ type: 'image',
243
+ href: img.url,
244
+ text: img.alt ?? '',
245
+ sectionId: currentSectionId,
246
+ line: getNodeStartLine(node),
247
+ })
248
+ }
249
+ })
250
+
251
+ return links
252
+ }
253
+
254
+ // ============================================================================
255
+ // Code Block Extraction
256
+ // ============================================================================
257
+
258
+ const extractCodeBlocks = (tree: Root, docId: string): MdCodeBlock[] => {
259
+ const codeBlocks: MdCodeBlock[] = []
260
+ let currentSectionId = docId
261
+
262
+ visit(tree, (node) => {
263
+ if (node.type === 'heading') {
264
+ currentSectionId = `${docId}-${slugify(extractPlainText(node as Heading))}`
265
+ }
266
+
267
+ if (node.type === 'code') {
268
+ const code = node as Code
269
+ codeBlocks.push({
270
+ language: code.lang ?? null,
271
+ content: code.value,
272
+ sectionId: currentSectionId,
273
+ startLine: getNodeStartLine(node),
274
+ endLine: getNodeEndLine(node),
275
+ })
276
+ }
277
+ })
278
+
279
+ return codeBlocks
280
+ }
281
+
282
+ // ============================================================================
283
+ // Main Parser Function
284
+ // ============================================================================
285
+
286
+ export interface ParseOptions {
287
+ readonly path?: string
288
+ readonly lastModified?: Date
289
+ }
290
+
291
+ export const parse = (
292
+ content: string,
293
+ options: ParseOptions = {},
294
+ ): Effect.Effect<MdDocument, ParseError> =>
295
+ Effect.gen(function* () {
296
+ const path = options.path ?? 'unknown'
297
+ const docId = generateId(path)
298
+ const now = new Date()
299
+
300
+ // Extract frontmatter (graceful handling for malformed YAML)
301
+ let frontmatter: Record<string, unknown> = {}
302
+ let markdownContent: string = content
303
+
304
+ try {
305
+ const parsed = matter(content)
306
+ frontmatter = parsed.data
307
+ markdownContent = parsed.content
308
+ } catch (error) {
309
+ // Malformed frontmatter - treat entire content as markdown
310
+ const msg = error instanceof Error ? error.message : String(error)
311
+ console.warn(
312
+ `Warning: Malformed frontmatter in ${path}, skipping: ${msg.split('\n')[0]}`,
313
+ )
314
+ }
315
+
316
+ // Parse markdown to AST
317
+ const tree = processor.parse(markdownContent) as Root
318
+
319
+ // Split content into lines for reference
320
+ const lines = markdownContent.split('\n')
321
+
322
+ // Extract sections
323
+ const rawSections = extractRawSections(tree)
324
+ const sections = buildSectionHierarchy(rawSections, docId, lines)
325
+
326
+ // Extract links and code blocks
327
+ const links = extractLinks(tree, docId)
328
+ const codeBlocks = extractCodeBlocks(tree, docId)
329
+
330
+ // Determine title (first H1 or filename)
331
+ const firstH1 = sections.find((s) => s.level === 1)
332
+ const title =
333
+ firstH1?.heading ??
334
+ (typeof frontmatter.title === 'string' ? frontmatter.title : null) ??
335
+ path.split('/').pop()?.replace(/\.md$/, '') ??
336
+ 'Untitled'
337
+
338
+ // Calculate metadata
339
+ const totalContent = sections.map((s) => s.content).join('\n')
340
+ const metadata: DocumentMetadata = {
341
+ wordCount: countWords(totalContent),
342
+ tokenCount: countTokensApprox(content),
343
+ headingCount: countAllSections(sections),
344
+ linkCount: links.length,
345
+ codeBlockCount: codeBlocks.length,
346
+ lastModified: options.lastModified ?? now,
347
+ indexedAt: now,
348
+ }
349
+
350
+ const document: MdDocument = {
351
+ id: docId,
352
+ path,
353
+ title,
354
+ frontmatter,
355
+ sections,
356
+ links,
357
+ codeBlocks,
358
+ metadata,
359
+ }
360
+
361
+ return document
362
+ })
363
+
364
+ /**
365
+ * Parse a markdown file from the filesystem
366
+ *
367
+ * @throws ParseError - File content cannot be parsed
368
+ * @throws FileReadError - File cannot be read from filesystem
369
+ */
370
+ export const parseFile = (
371
+ filePath: string,
372
+ ): Effect.Effect<MdDocument, ParseError | FileReadError> =>
373
+ Effect.gen(function* () {
374
+ const fs = yield* Effect.promise(() => import('node:fs/promises'))
375
+
376
+ const [content, stats] = yield* Effect.tryPromise({
377
+ try: () =>
378
+ Promise.all([
379
+ fs.readFile(filePath, 'utf-8'),
380
+ fs.stat(filePath),
381
+ ] as const),
382
+ catch: (error) =>
383
+ new FileReadError({
384
+ path: filePath,
385
+ message: error instanceof Error ? error.message : 'Unknown error',
386
+ cause: error,
387
+ }),
388
+ })
389
+
390
+ return yield* parse(content, {
391
+ path: filePath,
392
+ lastModified: stats.mtime,
393
+ })
394
+ })