mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,277 @@
1
+ /**
2
+ * Tests for section filtering utilities
3
+ */
4
+
5
+ import { describe, expect, it } from 'vitest'
6
+ import type { HeadingLevel, MdDocument, MdSection } from '../core/types.js'
7
+ import {
8
+ buildSectionList,
9
+ extractSectionContent,
10
+ filterDocumentSections,
11
+ filterExcludedSections,
12
+ } from './section-filter.js'
13
+
14
+ // Helper to create minimal section for testing
15
+ const createSection = (
16
+ heading: string,
17
+ level: HeadingLevel,
18
+ children: MdSection[] = [],
19
+ tokenCount: number = 100,
20
+ ): MdSection => ({
21
+ id: `section-${heading.toLowerCase().replace(/\s+/g, '-')}`,
22
+ heading,
23
+ level,
24
+ content: `# ${heading}\n\nContent for ${heading}`,
25
+ plainText: `Content for ${heading}`,
26
+ startLine: 1,
27
+ endLine: 10,
28
+ children,
29
+ metadata: {
30
+ wordCount: 10,
31
+ tokenCount,
32
+ hasCode: false,
33
+ hasList: false,
34
+ hasTable: false,
35
+ },
36
+ })
37
+
38
+ // Helper to create minimal document for testing
39
+ const createDocument = (sections: MdSection[]): MdDocument => ({
40
+ id: 'test-doc',
41
+ path: '/test/doc.md',
42
+ title: 'Test Document',
43
+ sections,
44
+ links: [],
45
+ codeBlocks: [],
46
+ metadata: {
47
+ tokenCount: sections.reduce((acc, s) => acc + s.metadata.tokenCount, 0),
48
+ headingCount: sections.length,
49
+ linkCount: 0,
50
+ codeBlockCount: 0,
51
+ wordCount: 100,
52
+ lastModified: new Date(),
53
+ indexedAt: new Date(),
54
+ },
55
+ frontmatter: {},
56
+ })
57
+
58
+ describe('section-filter', () => {
59
+ describe('filterExcludedSections', () => {
60
+ const sectionList = [
61
+ { number: '1', heading: 'Introduction', level: 1, tokenCount: 100 },
62
+ { number: '1.1', heading: 'Overview', level: 2, tokenCount: 50 },
63
+ { number: '2', heading: 'Installation', level: 1, tokenCount: 200 },
64
+ { number: '2.1', heading: 'Requirements', level: 2, tokenCount: 75 },
65
+ { number: '2.2', heading: 'Setup Steps', level: 2, tokenCount: 80 },
66
+ { number: '3', heading: 'API Reference', level: 1, tokenCount: 500 },
67
+ { number: '3.1', heading: 'Methods', level: 2, tokenCount: 300 },
68
+ { number: '4', heading: 'License', level: 1, tokenCount: 50 },
69
+ ]
70
+
71
+ it('returns all sections when no exclusion patterns provided', () => {
72
+ const result = filterExcludedSections(sectionList, [])
73
+ expect(result).toEqual(sectionList)
74
+ })
75
+
76
+ it('excludes sections by exact heading match', () => {
77
+ const result = filterExcludedSections(sectionList, ['License'])
78
+ expect(result).toHaveLength(7)
79
+ expect(result.find((s) => s.heading === 'License')).toBeUndefined()
80
+ })
81
+
82
+ it('excludes sections by partial heading match', () => {
83
+ const result = filterExcludedSections(sectionList, ['Setup'])
84
+ expect(result).toHaveLength(7)
85
+ expect(result.find((s) => s.heading === 'Setup Steps')).toBeUndefined()
86
+ })
87
+
88
+ it('excludes sections by glob pattern', () => {
89
+ const result = filterExcludedSections(sectionList, ['*Reference*'])
90
+ expect(result).toHaveLength(7)
91
+ expect(result.find((s) => s.heading === 'API Reference')).toBeUndefined()
92
+ })
93
+
94
+ it('excludes sections by section number', () => {
95
+ const result = filterExcludedSections(sectionList, ['2.1'])
96
+ expect(result).toHaveLength(7)
97
+ expect(result.find((s) => s.number === '2.1')).toBeUndefined()
98
+ })
99
+
100
+ it('excludes multiple sections with multiple patterns', () => {
101
+ const result = filterExcludedSections(sectionList, [
102
+ 'License',
103
+ 'Overview',
104
+ ])
105
+ expect(result).toHaveLength(6)
106
+ expect(result.find((s) => s.heading === 'License')).toBeUndefined()
107
+ expect(result.find((s) => s.heading === 'Overview')).toBeUndefined()
108
+ })
109
+
110
+ it('handles case-insensitive matching', () => {
111
+ const result = filterExcludedSections(sectionList, ['LICENSE'])
112
+ expect(result).toHaveLength(7)
113
+ expect(result.find((s) => s.heading === 'License')).toBeUndefined()
114
+ })
115
+ })
116
+
117
+ describe('extractSectionContent with exclusion', () => {
118
+ const doc = createDocument([
119
+ createSection('Introduction', 1, [
120
+ createSection('Getting Started', 2),
121
+ createSection('Quick Start', 2),
122
+ ]),
123
+ createSection('API', 1, [
124
+ createSection('Methods', 2),
125
+ createSection('Properties', 2),
126
+ ]),
127
+ createSection('License', 1),
128
+ ])
129
+
130
+ it('extracts all matching sections without exclusion', () => {
131
+ const result = extractSectionContent(doc, '*')
132
+ expect(result.matchedNumbers).toHaveLength(7)
133
+ expect(result.excludedNumbers).toHaveLength(0)
134
+ })
135
+
136
+ it('excludes sections matching exclusion pattern', () => {
137
+ const result = extractSectionContent(doc, '*', {
138
+ exclude: ['License'],
139
+ })
140
+ expect(result.matchedNumbers).toHaveLength(6)
141
+ expect(result.excludedNumbers).toEqual(['3'])
142
+ expect(
143
+ result.sections.find((s) => s.heading === 'License'),
144
+ ).toBeUndefined()
145
+ })
146
+
147
+ it('reports excluded sections in excludedNumbers', () => {
148
+ const result = extractSectionContent(doc, '*', {
149
+ exclude: ['Quick Start', 'Properties'],
150
+ })
151
+ expect(result.excludedNumbers).toContain('1.2')
152
+ expect(result.excludedNumbers).toContain('2.2')
153
+ })
154
+
155
+ it('combines shallow and exclude options', () => {
156
+ const result = extractSectionContent(doc, 'Introduction', {
157
+ shallow: true,
158
+ exclude: ['Getting Started'],
159
+ })
160
+ // With shallow, we only get Introduction without children
161
+ // The exclude pattern only affects the matched sections list
162
+ expect(result.sections).toHaveLength(1)
163
+ expect(result.sections[0]?.heading).toBe('Introduction')
164
+ })
165
+ })
166
+
167
+ describe('filterDocumentSections', () => {
168
+ const doc = createDocument([
169
+ createSection('Introduction', 1, [
170
+ createSection('Overview', 2),
171
+ createSection('Goals', 2),
172
+ ]),
173
+ createSection('Installation', 1),
174
+ createSection('License', 1),
175
+ ])
176
+
177
+ it('returns original document when no exclusion patterns', () => {
178
+ const result = filterDocumentSections(doc, [])
179
+ expect(result.document).toBe(doc)
180
+ expect(result.excludedCount).toBe(0)
181
+ })
182
+
183
+ it('filters out matching sections from document', () => {
184
+ const result = filterDocumentSections(doc, ['License'])
185
+ expect(result.excludedCount).toBe(1)
186
+ expect(result.document.sections).toHaveLength(2)
187
+ expect(
188
+ result.document.sections.find((s) => s.heading === 'License'),
189
+ ).toBeUndefined()
190
+ })
191
+
192
+ it('filters out nested sections', () => {
193
+ const result = filterDocumentSections(doc, ['Overview'])
194
+ expect(result.excludedCount).toBe(1)
195
+ // Find Introduction section
196
+ const intro = result.document.sections.find(
197
+ (s) => s.heading === 'Introduction',
198
+ )
199
+ expect(intro).toBeDefined()
200
+ // Overview should be removed from children
201
+ expect(
202
+ intro?.children.find((c) => c.heading === 'Overview'),
203
+ ).toBeUndefined()
204
+ // Goals should still be there
205
+ expect(intro?.children.find((c) => c.heading === 'Goals')).toBeDefined()
206
+ })
207
+
208
+ it('filters multiple sections with glob pattern', () => {
209
+ const result = filterDocumentSections(doc, ['*stallation*', 'License'])
210
+ expect(result.excludedCount).toBe(2)
211
+ expect(result.document.sections).toHaveLength(1)
212
+ expect(result.document.sections[0]?.heading).toBe('Introduction')
213
+ })
214
+
215
+ it('preserves document structure for non-matching sections', () => {
216
+ const result = filterDocumentSections(doc, ['NonExistent'])
217
+ expect(result.document).toBe(doc)
218
+ expect(result.excludedCount).toBe(0)
219
+ })
220
+
221
+ it('counts descendants when parent section is excluded', () => {
222
+ // Introduction has 2 children (Overview, Goals), so excluding Introduction
223
+ // should count 3 total excluded sections
224
+ const result = filterDocumentSections(doc, ['Introduction'])
225
+ expect(result.excludedCount).toBe(3) // Introduction + Overview + Goals
226
+ expect(result.document.sections).toHaveLength(2) // Installation + License
227
+ expect(
228
+ result.document.sections.find((s) => s.heading === 'Introduction'),
229
+ ).toBeUndefined()
230
+ })
231
+
232
+ it('counts deeply nested descendants correctly', () => {
233
+ const deepDoc = createDocument([
234
+ createSection('Root', 1, [
235
+ createSection('Child 1', 2, [
236
+ createSection('Grandchild 1', 3),
237
+ createSection('Grandchild 2', 3),
238
+ ]),
239
+ createSection('Child 2', 2),
240
+ ]),
241
+ createSection('Other', 1),
242
+ ])
243
+ const result = filterDocumentSections(deepDoc, ['Root'])
244
+ // Root + Child 1 + Grandchild 1 + Grandchild 2 + Child 2 = 5
245
+ expect(result.excludedCount).toBe(5)
246
+ expect(result.document.sections).toHaveLength(1)
247
+ expect(result.document.sections[0]?.heading).toBe('Other')
248
+ })
249
+
250
+ it('does not double-count when multiple patterns match same section', () => {
251
+ const result = filterDocumentSections(doc, ['Introduction', 'Intro*'])
252
+ // Both patterns match Introduction, but should only count once
253
+ // Introduction + Overview + Goals = 3
254
+ expect(result.excludedCount).toBe(3)
255
+ })
256
+ })
257
+
258
+ describe('buildSectionList', () => {
259
+ const doc = createDocument([
260
+ createSection('A', 1, [
261
+ createSection('A.1', 2, [createSection('A.1.1', 3)]),
262
+ createSection('A.2', 2),
263
+ ]),
264
+ createSection('B', 1),
265
+ ])
266
+
267
+ it('assigns correct hierarchical numbers', () => {
268
+ const list = buildSectionList(doc)
269
+ expect(list).toHaveLength(5)
270
+ expect(list[0]).toMatchObject({ number: '1', heading: 'A' })
271
+ expect(list[1]).toMatchObject({ number: '1.1', heading: 'A.1' })
272
+ expect(list[2]).toMatchObject({ number: '1.1.1', heading: 'A.1.1' })
273
+ expect(list[3]).toMatchObject({ number: '1.2', heading: 'A.2' })
274
+ expect(list[4]).toMatchObject({ number: '2', heading: 'B' })
275
+ })
276
+ })
277
+ })
@@ -0,0 +1,392 @@
1
+ /**
2
+ * Section filtering utilities for extracting specific sections from markdown documents
3
+ */
4
+
5
+ import type { MdDocument, MdSection } from '../core/types.js'
6
+
7
+ // ============================================================================
8
+ // Simple Glob Matching
9
+ // ============================================================================
10
+
11
+ /**
12
+ * Simple glob pattern matching (supports * and ?)
13
+ */
14
+ const globMatch = (text: string, pattern: string): boolean => {
15
+ // Convert glob pattern to regex
16
+ const regexPattern = pattern
17
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape regex special chars except * and ?
18
+ .replace(/\*/g, '.*')
19
+ .replace(/\?/g, '.')
20
+
21
+ const regex = new RegExp(`^${regexPattern}$`, 'i')
22
+ return regex.test(text)
23
+ }
24
+
25
+ // ============================================================================
26
+ // Types
27
+ // ============================================================================
28
+
29
+ export interface SectionListItem {
30
+ readonly number: string
31
+ readonly heading: string
32
+ readonly level: number
33
+ readonly tokenCount: number
34
+ }
35
+
36
+ export interface SectionFilterOptions {
37
+ /** If true, don't include nested subsections */
38
+ readonly shallow?: boolean
39
+ /** Patterns to exclude from results */
40
+ readonly exclude?: readonly string[]
41
+ }
42
+
43
+ // ============================================================================
44
+ // Section Map Building
45
+ // ============================================================================
46
+
47
+ /**
48
+ * Build a flat list of all sections with their hierarchical numbers
49
+ * e.g., "1", "1.1", "1.2", "2", "2.1", etc.
50
+ */
51
+ export const buildSectionList = (document: MdDocument): SectionListItem[] => {
52
+ const result: SectionListItem[] = []
53
+
54
+ const processSection = (
55
+ section: MdSection,
56
+ prefix: string,
57
+ index: number,
58
+ ): void => {
59
+ const number = prefix ? `${prefix}.${index + 1}` : `${index + 1}`
60
+
61
+ result.push({
62
+ number,
63
+ heading: section.heading,
64
+ level: section.level,
65
+ tokenCount: section.metadata.tokenCount,
66
+ })
67
+
68
+ // Process children
69
+ section.children.forEach((child, i) => {
70
+ processSection(child, number, i)
71
+ })
72
+ }
73
+
74
+ document.sections.forEach((section, i) => {
75
+ processSection(section, '', i)
76
+ })
77
+
78
+ return result
79
+ }
80
+
81
+ /**
82
+ * Format section list for display
83
+ */
84
+ export const formatSectionList = (sections: SectionListItem[]): string => {
85
+ const lines: string[] = []
86
+
87
+ for (const section of sections) {
88
+ // Indent based on dots in number
89
+ const depth = (section.number.match(/\./g) || []).length
90
+ const indent = ' '.repeat(depth)
91
+ lines.push(
92
+ `${indent}${section.number}. ${section.heading} (${section.tokenCount} tokens)`,
93
+ )
94
+ }
95
+
96
+ return lines.join('\n')
97
+ }
98
+
99
+ // ============================================================================
100
+ // Section Matching
101
+ // ============================================================================
102
+
103
+ /**
104
+ * Check if a section matches a selector (by number, exact name, or glob pattern)
105
+ */
106
+ const matchesSelector = (
107
+ section: SectionListItem,
108
+ selector: string,
109
+ ): boolean => {
110
+ // Check if it's a number match (e.g., "5.3")
111
+ if (/^[\d.]+$/.test(selector)) {
112
+ // Exact number match
113
+ return section.number === selector
114
+ }
115
+
116
+ // Check for exact heading match (case-insensitive)
117
+ if (section.heading.toLowerCase() === selector.toLowerCase()) {
118
+ return true
119
+ }
120
+
121
+ // Check for glob pattern match
122
+ if (selector.includes('*') || selector.includes('?')) {
123
+ return globMatch(section.heading, selector)
124
+ }
125
+
126
+ // Partial match (contains)
127
+ return section.heading.toLowerCase().includes(selector.toLowerCase())
128
+ }
129
+
130
+ /**
131
+ * Check if a section matches any of the exclusion patterns
132
+ */
133
+ const matchesExclusionPatterns = (
134
+ section: SectionListItem,
135
+ excludePatterns: readonly string[],
136
+ ): boolean => {
137
+ return excludePatterns.some((pattern) => matchesSelector(section, pattern))
138
+ }
139
+
140
+ /**
141
+ * Find all sections matching a selector
142
+ */
143
+ export const findMatchingSections = (
144
+ sectionList: SectionListItem[],
145
+ selector: string,
146
+ ): SectionListItem[] => {
147
+ return sectionList.filter((s) => matchesSelector(s, selector))
148
+ }
149
+
150
+ /**
151
+ * Filter sections by exclusion patterns
152
+ * Returns sections that don't match any of the exclusion patterns
153
+ */
154
+ export const filterExcludedSections = (
155
+ sectionList: SectionListItem[],
156
+ excludePatterns: readonly string[],
157
+ ): SectionListItem[] => {
158
+ if (excludePatterns.length === 0) {
159
+ return sectionList
160
+ }
161
+ return sectionList.filter(
162
+ (s) => !matchesExclusionPatterns(s, excludePatterns),
163
+ )
164
+ }
165
+
166
+ /**
167
+ * Get all descendant section numbers for a given section number
168
+ */
169
+ const getDescendantNumbers = (
170
+ sectionList: SectionListItem[],
171
+ parentNumber: string,
172
+ ): Set<string> => {
173
+ const result = new Set<string>()
174
+ const prefix = `${parentNumber}.`
175
+
176
+ for (const section of sectionList) {
177
+ if (section.number.startsWith(prefix)) {
178
+ result.add(section.number)
179
+ }
180
+ }
181
+
182
+ return result
183
+ }
184
+
185
+ // ============================================================================
186
+ // Section Content Extraction
187
+ // ============================================================================
188
+
189
+ /**
190
+ * Extract content for specific sections from a document
191
+ */
192
+ export const extractSectionContent = (
193
+ document: MdDocument,
194
+ selector: string,
195
+ options: SectionFilterOptions = {},
196
+ ): {
197
+ sections: MdSection[]
198
+ matchedNumbers: string[]
199
+ excludedNumbers: string[]
200
+ } => {
201
+ const sectionList = buildSectionList(document)
202
+ let matchedSections = findMatchingSections(sectionList, selector)
203
+
204
+ // Track which sections were excluded
205
+ const excludedNumbers: string[] = []
206
+
207
+ // Apply exclusion patterns if provided
208
+ if (options.exclude && options.exclude.length > 0) {
209
+ const beforeFilter = matchedSections
210
+ matchedSections = filterExcludedSections(matchedSections, options.exclude)
211
+
212
+ // Track excluded sections for feedback
213
+ for (const section of beforeFilter) {
214
+ if (!matchedSections.includes(section)) {
215
+ excludedNumbers.push(section.number)
216
+ }
217
+ }
218
+ }
219
+
220
+ if (matchedSections.length === 0) {
221
+ return { sections: [], matchedNumbers: [], excludedNumbers }
222
+ }
223
+
224
+ // Get all section numbers to include
225
+ const numbersToInclude = new Set<string>()
226
+ const matchedNumbers: string[] = []
227
+
228
+ for (const matched of matchedSections) {
229
+ numbersToInclude.add(matched.number)
230
+ matchedNumbers.push(matched.number)
231
+
232
+ if (!options.shallow) {
233
+ // Include all descendants
234
+ const descendants = getDescendantNumbers(sectionList, matched.number)
235
+ for (const desc of descendants) {
236
+ numbersToInclude.add(desc)
237
+ }
238
+ }
239
+ }
240
+
241
+ // Build a map from section number to section for efficient lookup
242
+ const numberToSection = new Map<string, MdSection>()
243
+
244
+ const mapSections = (
245
+ sections: readonly MdSection[],
246
+ prefix: string,
247
+ ): void => {
248
+ sections.forEach((section, i) => {
249
+ const number = prefix ? `${prefix}.${i + 1}` : `${i + 1}`
250
+ numberToSection.set(number, section)
251
+ mapSections(section.children, number)
252
+ })
253
+ }
254
+
255
+ mapSections(document.sections, '')
256
+
257
+ // Extract matching sections
258
+ const extractedSections: MdSection[] = []
259
+
260
+ for (const number of matchedNumbers) {
261
+ const section = numberToSection.get(number)
262
+ if (section) {
263
+ if (options.shallow) {
264
+ // Clone without children for shallow mode
265
+ extractedSections.push({
266
+ ...section,
267
+ children: [],
268
+ })
269
+ } else {
270
+ extractedSections.push(section)
271
+ }
272
+ }
273
+ }
274
+
275
+ return { sections: extractedSections, matchedNumbers, excludedNumbers }
276
+ }
277
+
278
+ /**
279
+ * Format extracted sections as markdown content
280
+ */
281
+ export const formatExtractedSections = (sections: MdSection[]): string => {
282
+ const formatSection = (
283
+ section: MdSection,
284
+ includeChildren: boolean,
285
+ ): string => {
286
+ const lines: string[] = []
287
+
288
+ // Add heading
289
+ const headingPrefix = '#'.repeat(section.level)
290
+ lines.push(`${headingPrefix} ${section.heading}`)
291
+ lines.push('')
292
+
293
+ // Add content (strip the heading line if it starts with #)
294
+ const contentLines = section.content.split('\n')
295
+ const contentWithoutHeading = contentLines
296
+ .filter((line, i) => i > 0 || !line.startsWith('#'))
297
+ .join('\n')
298
+ .trim()
299
+
300
+ if (contentWithoutHeading) {
301
+ lines.push(contentWithoutHeading)
302
+ }
303
+
304
+ if (includeChildren) {
305
+ for (const child of section.children) {
306
+ lines.push('')
307
+ lines.push(formatSection(child, true))
308
+ }
309
+ }
310
+
311
+ return lines.join('\n')
312
+ }
313
+
314
+ return sections.map((s) => formatSection(s, true)).join('\n\n')
315
+ }
316
+
317
+ // ============================================================================
318
+ // Document Section Filtering
319
+ // ============================================================================
320
+
321
+ /**
322
+ * Filter sections from an MdDocument based on exclusion patterns
323
+ * Returns a new document with matching sections removed
324
+ */
325
+ export const filterDocumentSections = (
326
+ document: MdDocument,
327
+ excludePatterns: readonly string[],
328
+ ): { document: MdDocument; excludedCount: number } => {
329
+ if (excludePatterns.length === 0) {
330
+ return { document, excludedCount: 0 }
331
+ }
332
+
333
+ const sectionList = buildSectionList(document)
334
+ let excludedCount = 0
335
+
336
+ // Build set of section numbers to exclude (including descendants)
337
+ const numbersToExclude = new Set<string>()
338
+ for (const section of sectionList) {
339
+ if (matchesExclusionPatterns(section, excludePatterns)) {
340
+ // Add the matched section and all its descendants
341
+ const prefix = `${section.number}.`
342
+ for (const candidate of sectionList) {
343
+ if (
344
+ candidate.number === section.number ||
345
+ candidate.number.startsWith(prefix)
346
+ ) {
347
+ if (!numbersToExclude.has(candidate.number)) {
348
+ numbersToExclude.add(candidate.number)
349
+ excludedCount++
350
+ }
351
+ }
352
+ }
353
+ }
354
+ }
355
+
356
+ if (numbersToExclude.size === 0) {
357
+ return { document, excludedCount: 0 }
358
+ }
359
+
360
+ // Build mapping of numbers to filter
361
+ const filterSections = (
362
+ sections: readonly MdSection[],
363
+ prefix: string,
364
+ ): MdSection[] => {
365
+ const result: MdSection[] = []
366
+
367
+ sections.forEach((section, i) => {
368
+ const number = prefix ? `${prefix}.${i + 1}` : `${i + 1}`
369
+
370
+ if (!numbersToExclude.has(number)) {
371
+ // Keep this section, recursively filter children
372
+ result.push({
373
+ ...section,
374
+ children: filterSections(section.children, number),
375
+ })
376
+ }
377
+ // If excluded, skip this section entirely (including children)
378
+ })
379
+
380
+ return result
381
+ }
382
+
383
+ const filteredSections = filterSections(document.sections, '')
384
+
385
+ return {
386
+ document: {
387
+ ...document,
388
+ sections: filteredSections,
389
+ },
390
+ excludedCount,
391
+ }
392
+ }