mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,597 @@
1
+ /**
2
+ * Summarization engine for mdcontext
3
+ *
4
+ * Provides hierarchical summarization and multi-document context assembly
5
+ */
6
+
7
+ import * as fs from 'node:fs/promises'
8
+ import * as path from 'node:path'
9
+ import { Effect } from 'effect'
10
+ import type { MdDocument, MdSection, ParseError } from '../core/types.js'
11
+ import type { FileReadError } from '../errors/index.js'
12
+ import { parseFile } from '../parser/parser.js'
13
+ import { filterDocumentSections } from '../parser/section-filter.js'
14
+ import { countTokensApprox } from '../utils/tokens.js'
15
+ import { formatSummary as formatSummaryImpl } from './formatters.js'
16
+
17
+ /**
18
+ * Error type from parseFile function
19
+ * Uses centralized errors from src/errors/index.ts
20
+ */
21
+ type ParseFileError = ParseError | FileReadError
22
+
23
+ // ============================================================================
24
+ // Types
25
+ // ============================================================================
26
+
27
+ export type CompressionLevel = 'brief' | 'summary' | 'full'
28
+
29
+ export interface SummarizeOptions {
30
+ /** Compression level */
31
+ readonly level?: CompressionLevel | undefined
32
+ /** Maximum tokens for output */
33
+ readonly maxTokens?: number | undefined
34
+ /** Section patterns to exclude from output */
35
+ readonly exclude?: readonly string[] | undefined
36
+ }
37
+
38
+ export interface SectionSummary {
39
+ readonly heading: string
40
+ readonly level: number
41
+ readonly originalTokens: number
42
+ readonly summaryTokens: number
43
+ readonly summary: string
44
+ readonly children: readonly SectionSummary[]
45
+ readonly hasCode: boolean
46
+ readonly hasList: boolean
47
+ readonly hasTable: boolean
48
+ }
49
+
50
+ export interface DocumentSummary {
51
+ readonly path: string
52
+ readonly title: string
53
+ readonly originalTokens: number
54
+ readonly summaryTokens: number
55
+ readonly compressionRatio: number
56
+ readonly sections: readonly SectionSummary[]
57
+ readonly keyTopics: readonly string[]
58
+ /** True if content was truncated to fit budget */
59
+ readonly truncated?: boolean
60
+ /** Number of sections that were omitted due to budget constraints */
61
+ readonly truncatedCount?: number
62
+ }
63
+
64
+ export interface AssembleContextOptions {
65
+ /** Total token budget */
66
+ readonly budget: number
67
+ /** Compression level for each source */
68
+ readonly level?: CompressionLevel | undefined
69
+ /** Section patterns to exclude from output */
70
+ readonly exclude?: readonly string[] | undefined
71
+ }
72
+
73
+ export interface AssembledContext {
74
+ readonly sources: readonly SourceContext[]
75
+ readonly totalTokens: number
76
+ readonly budget: number
77
+ readonly overflow: readonly string[]
78
+ }
79
+
80
+ export interface SourceContext {
81
+ readonly path: string
82
+ readonly title: string
83
+ readonly tokens: number
84
+ readonly content: string
85
+ }
86
+
87
+ // ============================================================================
88
+ // Constants
89
+ // ============================================================================
90
+
91
+ /** Token budgets per compression level */
92
+ const TOKEN_BUDGETS: Record<CompressionLevel, number> = {
93
+ brief: 100,
94
+ summary: 500,
95
+ full: Infinity,
96
+ }
97
+
98
+ /** Minimum character length for a sentence to be considered meaningful */
99
+ const MIN_SENTENCE_LENGTH = 10
100
+
101
+ /** Score weights for sentence importance heuristics */
102
+ const SENTENCE_SCORE_DEFINITION = 2 // sentences with colons (definitions)
103
+ const SENTENCE_SCORE_PROPER_START = 1 // sentences starting with capital
104
+ const SENTENCE_SCORE_MEDIUM_LENGTH = 1 // sentences in ideal length range
105
+ const SENTENCE_SCORE_EMPHASIS = 1 // sentences with emphasis or code
106
+
107
+ /** Ideal sentence length range for summaries */
108
+ const SENTENCE_LENGTH_MIN = 50
109
+ const SENTENCE_LENGTH_MAX = 200
110
+
111
+ /** Target compression ratio for summaries (30% of original) */
112
+ const SUMMARY_COMPRESSION_RATIO = 0.3
113
+
114
+ /** Minimum tokens for any section summary */
115
+ const MIN_SECTION_TOKENS = 20
116
+
117
+ /** Minimum sentences to include in any summary */
118
+ const MIN_SUMMARY_SENTENCES = 2
119
+
120
+ /** Approximate tokens per sentence (for calculating max sentences) */
121
+ const TOKENS_PER_SENTENCE_ESTIMATE = 30
122
+
123
+ /** Topic heading length constraints */
124
+ const MIN_TOPIC_LENGTH = 2
125
+ const MAX_TOPIC_LENGTH = 50
126
+
127
+ /** Maximum topics to extract from a document */
128
+ const MAX_TOPICS = 10
129
+
130
+ /** Minimum remaining budget to include partial content */
131
+ const MIN_PARTIAL_BUDGET = 50
132
+
133
+ // ============================================================================
134
+ // Section Summarization
135
+ // ============================================================================
136
+
137
+ const extractKeyPoints = (content: string, maxSentences: number): string[] => {
138
+ // Split into sentences
139
+ const sentences = content
140
+ .replace(/\n+/g, ' ')
141
+ .split(/(?<=[.!?])\s+/)
142
+ .filter((s) => s.trim().length > MIN_SENTENCE_LENGTH)
143
+
144
+ if (sentences.length <= maxSentences) {
145
+ return sentences
146
+ }
147
+
148
+ // Simple heuristic: prefer sentences with key indicators
149
+ const scored = sentences.map((s) => {
150
+ let score = 0
151
+ // Prefer sentences with:
152
+ if (s.includes(':')) score += SENTENCE_SCORE_DEFINITION
153
+ if (/^[A-Z]/.test(s)) score += SENTENCE_SCORE_PROPER_START
154
+ if (s.length > SENTENCE_LENGTH_MIN && s.length < SENTENCE_LENGTH_MAX)
155
+ score += SENTENCE_SCORE_MEDIUM_LENGTH
156
+ if (/\*\*|`/.test(s)) score += SENTENCE_SCORE_EMPHASIS
157
+ return { sentence: s, score }
158
+ })
159
+
160
+ // Sort by score and take top sentences
161
+ scored.sort((a, b) => b.score - a.score)
162
+ return scored.slice(0, maxSentences).map((s) => s.sentence)
163
+ }
164
+
165
+ const summarizeSection = (
166
+ section: MdSection,
167
+ level: CompressionLevel,
168
+ ): SectionSummary => {
169
+ const originalTokens = section.metadata.tokenCount
170
+
171
+ // Get children summaries first
172
+ const children = section.children.map((child) =>
173
+ summarizeSection(child, level),
174
+ )
175
+
176
+ // Calculate target tokens based on level
177
+ const targetTokens = Math.min(
178
+ TOKEN_BUDGETS[level],
179
+ Math.max(originalTokens * SUMMARY_COMPRESSION_RATIO, MIN_SECTION_TOKENS),
180
+ )
181
+
182
+ let summary: string
183
+
184
+ if (level === 'full' || originalTokens <= targetTokens) {
185
+ // Include full content for "full" level or if already small
186
+ // Use plainText instead of content to avoid including the heading markdown
187
+ // (the heading is output separately by the formatter)
188
+ summary = section.plainText
189
+ } else if (level === 'brief') {
190
+ // Just heading and metadata for brief
191
+ const meta: string[] = []
192
+ if (section.metadata.hasCode) meta.push('code')
193
+ if (section.metadata.hasList) meta.push('list')
194
+ if (section.metadata.hasTable) meta.push('table')
195
+ summary = meta.length > 0 ? `[${meta.join(', ')}]` : ''
196
+ } else {
197
+ // Summary level: extract key points
198
+ const maxSentences = Math.max(
199
+ MIN_SUMMARY_SENTENCES,
200
+ Math.floor(targetTokens / TOKENS_PER_SENTENCE_ESTIMATE),
201
+ )
202
+ const keyPoints = extractKeyPoints(section.plainText, maxSentences)
203
+
204
+ if (keyPoints.length > 0) {
205
+ summary = keyPoints.join(' ')
206
+ } else {
207
+ // Fallback: truncate
208
+ const words = section.plainText.split(/\s+/).slice(0, targetTokens)
209
+ summary =
210
+ words.join(' ') +
211
+ (words.length < section.plainText.split(/\s+/).length ? '...' : '')
212
+ }
213
+ }
214
+
215
+ const summaryTokens = countTokensApprox(summary)
216
+
217
+ return {
218
+ heading: section.heading,
219
+ level: section.level,
220
+ originalTokens,
221
+ summaryTokens,
222
+ summary,
223
+ children,
224
+ hasCode: section.metadata.hasCode,
225
+ hasList: section.metadata.hasList,
226
+ hasTable: section.metadata.hasTable,
227
+ }
228
+ }
229
+
230
+ // ============================================================================
231
+ // Document Summarization
232
+ // ============================================================================
233
+
234
+ const extractTopics = (document: MdDocument): string[] => {
235
+ const topics: Set<string> = new Set()
236
+
237
+ // Extract from headings
238
+ const processSection = (section: MdSection) => {
239
+ // Clean heading and add as topic
240
+ const cleanHeading = section.heading
241
+ .replace(/[:#\-_]/g, ' ')
242
+ .trim()
243
+ .toLowerCase()
244
+ if (
245
+ cleanHeading.length > MIN_TOPIC_LENGTH &&
246
+ cleanHeading.length < MAX_TOPIC_LENGTH
247
+ ) {
248
+ topics.add(cleanHeading)
249
+ }
250
+
251
+ for (const child of section.children) {
252
+ processSection(child)
253
+ }
254
+ }
255
+
256
+ for (const section of document.sections) {
257
+ processSection(section)
258
+ }
259
+
260
+ // Also extract from frontmatter tags if present
261
+ const frontmatter = document.frontmatter as Record<string, unknown>
262
+ if (frontmatter.tags && Array.isArray(frontmatter.tags)) {
263
+ for (const tag of frontmatter.tags) {
264
+ if (typeof tag === 'string') {
265
+ topics.add(tag.toLowerCase())
266
+ }
267
+ }
268
+ }
269
+
270
+ return Array.from(topics).slice(0, MAX_TOPICS)
271
+ }
272
+
273
+ export const summarizeDocument = (
274
+ document: MdDocument,
275
+ options: SummarizeOptions = {},
276
+ ): DocumentSummary => {
277
+ const level = options.level ?? 'summary'
278
+ const maxTokens = options.maxTokens ?? TOKEN_BUDGETS[level]
279
+
280
+ // Summarize all sections
281
+ const allSections = document.sections.map((s) => summarizeSection(s, level))
282
+
283
+ // Calculate totals and collect all flattened sections with their tokens
284
+ const originalTokens = document.metadata.tokenCount
285
+ let totalSummaryTokens = 0
286
+ const flatSections: SectionSummary[] = []
287
+
288
+ const flattenWithTokens = (section: SectionSummary) => {
289
+ flatSections.push(section)
290
+ totalSummaryTokens += section.summaryTokens
291
+ for (const child of section.children) {
292
+ flattenWithTokens(child)
293
+ }
294
+ }
295
+
296
+ for (const section of allSections) {
297
+ flattenWithTokens(section)
298
+ }
299
+
300
+ // Calculate formatting overhead dynamically based on actual content
301
+ // Header includes: "# {title}\nPath: {path}\nTokens: X (Y% reduction from Z)\n"
302
+ // Plus topics line if present, plus possible truncation warning
303
+ const topics = extractTopics(document)
304
+ const headerTemplate = `# ${document.title}\nPath: ${document.path}\nTokens: 9999 (99% reduction from ${document.metadata.tokenCount})\n`
305
+ const topicsLine =
306
+ topics.length > 0 ? `\n**Topics:** ${topics.join(', ')}\n` : ''
307
+ const truncationWarning =
308
+ '\n⚠️ TRUNCATED: 999 sections omitted to fit token budget'
309
+ // Add all possible overhead plus a generous safety margin (20% of overhead + 20 base)
310
+ // This accounts for variance in token estimation
311
+ const baseOverhead = countTokensApprox(
312
+ headerTemplate + topicsLine + truncationWarning,
313
+ )
314
+ const formattingOverhead = Math.ceil(baseOverhead * 1.2) + 20
315
+ const contentBudget = maxTokens - formattingOverhead
316
+
317
+ // If over budget, truncate sections to fit
318
+ let truncated = false
319
+ let truncatedCount = 0
320
+ let sections: SectionSummary[]
321
+ let summaryTokens: number
322
+
323
+ if (totalSummaryTokens > contentBudget && contentBudget > 0) {
324
+ // Need to truncate - use greedy tree traversal that can include children
325
+ // even when parent doesn't fit (orphan rescue)
326
+ let tokensUsed = 0
327
+
328
+ // Process tree with orphan rescue: if parent doesn't fit, still try children
329
+ const truncateSections = (
330
+ sectionList: readonly SectionSummary[],
331
+ ): SectionSummary[] => {
332
+ const result: SectionSummary[] = []
333
+
334
+ for (const section of sectionList) {
335
+ const sectionOwnTokens = section.summaryTokens
336
+ const fitsInBudget = tokensUsed + sectionOwnTokens <= contentBudget
337
+
338
+ if (fitsInBudget) {
339
+ // Section fits - include it and recursively process children
340
+ tokensUsed += sectionOwnTokens
341
+ const truncatedChildren = truncateSections(section.children)
342
+ result.push({
343
+ ...section,
344
+ children: truncatedChildren,
345
+ })
346
+ } else {
347
+ // Section doesn't fit - but still try to rescue children (orphan rescue)
348
+ truncatedCount++
349
+ const rescuedChildren = truncateSections(section.children)
350
+ // Add rescued children as top-level items in result
351
+ result.push(...rescuedChildren)
352
+ }
353
+ }
354
+
355
+ return result
356
+ }
357
+
358
+ sections = truncateSections(allSections)
359
+ summaryTokens = tokensUsed
360
+ truncated = truncatedCount > 0
361
+ } else {
362
+ sections = allSections
363
+ summaryTokens = totalSummaryTokens
364
+ }
365
+
366
+ const compressionRatio =
367
+ originalTokens > 0 ? 1 - summaryTokens / originalTokens : 0
368
+
369
+ const result: DocumentSummary = {
370
+ path: document.path,
371
+ title: document.title,
372
+ originalTokens,
373
+ summaryTokens,
374
+ compressionRatio,
375
+ sections,
376
+ keyTopics: topics,
377
+ }
378
+
379
+ if (truncated) {
380
+ return {
381
+ ...result,
382
+ truncated: true,
383
+ truncatedCount,
384
+ }
385
+ }
386
+
387
+ return result
388
+ }
389
+
390
+ /**
391
+ * Summarize a markdown file
392
+ *
393
+ * @throws ParseError - File content cannot be parsed
394
+ * @throws FileReadError - File cannot be read from filesystem
395
+ */
396
+ export const summarizeFile = (
397
+ filePath: string,
398
+ options: SummarizeOptions = {},
399
+ ): Effect.Effect<DocumentSummary, ParseFileError> =>
400
+ Effect.gen(function* () {
401
+ let document = yield* parseFile(filePath)
402
+
403
+ // Apply exclusion filter if patterns provided
404
+ if (options.exclude && options.exclude.length > 0) {
405
+ const { document: filteredDoc } = filterDocumentSections(
406
+ document,
407
+ options.exclude,
408
+ )
409
+ document = filteredDoc
410
+ }
411
+
412
+ return summarizeDocument(document, options)
413
+ })
414
+
415
+ // ============================================================================
416
+ // Format Summary for Output (re-exported from formatters.ts)
417
+ // ============================================================================
418
+
419
+ export { type FormatSummaryOptions, formatSummary } from './formatters.js'
420
+
421
+ // ============================================================================
422
+ // Multi-Document Context Assembly
423
+ // ============================================================================
424
+
425
+ /**
426
+ * Assemble context from multiple markdown files within a token budget
427
+ *
428
+ * @throws ParseError - File content cannot be parsed
429
+ * @throws FileReadError - File cannot be read from filesystem
430
+ */
431
+ export const assembleContext = (
432
+ rootPath: string,
433
+ sourcePaths: readonly string[],
434
+ options: AssembleContextOptions,
435
+ ): Effect.Effect<AssembledContext, ParseFileError> =>
436
+ Effect.gen(function* () {
437
+ const budget = options.budget
438
+ const level = options.level ?? 'summary'
439
+ const excludePatterns = options.exclude ?? []
440
+
441
+ const sources: SourceContext[] = []
442
+ const overflow: string[] = []
443
+ let totalTokens = 0
444
+
445
+ // Calculate per-source budget (even distribution)
446
+ const perSourceBudget = Math.floor(budget / sourcePaths.length)
447
+
448
+ for (const sourcePath of sourcePaths) {
449
+ const resolvedPath = path.isAbsolute(sourcePath)
450
+ ? sourcePath
451
+ : path.join(rootPath, sourcePath)
452
+
453
+ // Use catchAll for graceful degradation - individual file failures
454
+ // shouldn't stop the entire context assembly operation
455
+ const summaryResult = yield* summarizeFile(resolvedPath, {
456
+ level,
457
+ maxTokens: perSourceBudget,
458
+ exclude: excludePatterns,
459
+ }).pipe(
460
+ Effect.map((s): DocumentSummary | null => s),
461
+ // Log error for observability before gracefully degrading
462
+ Effect.tapError((error) =>
463
+ Effect.logError(`Failed to summarize ${sourcePath}`, error),
464
+ ),
465
+ // Note: catchAll intentional for batch processing - individual file
466
+ // failures add to overflow instead of stopping assembly
467
+ Effect.catchAll(() => Effect.succeed(null as DocumentSummary | null)),
468
+ )
469
+
470
+ if (!summaryResult) {
471
+ overflow.push(sourcePath)
472
+ continue
473
+ }
474
+
475
+ const summary = summaryResult
476
+ const content = formatSummaryImpl(summary)
477
+ // Count actual formatted output tokens, not pre-format summary tokens
478
+ const tokens = countTokensApprox(content)
479
+
480
+ if (totalTokens + tokens <= budget) {
481
+ sources.push({
482
+ path: path.relative(rootPath, resolvedPath),
483
+ title: summary.title,
484
+ tokens,
485
+ content,
486
+ })
487
+ totalTokens += tokens
488
+ } else {
489
+ // Over budget
490
+ const remaining = budget - totalTokens
491
+ if (remaining > MIN_PARTIAL_BUDGET) {
492
+ // Include partial if we have some room
493
+ const briefSummary = yield* summarizeFile(resolvedPath, {
494
+ level: 'brief',
495
+ maxTokens: remaining,
496
+ exclude: excludePatterns,
497
+ }).pipe(
498
+ Effect.map((s): DocumentSummary | null => s),
499
+ // Log error for observability before gracefully degrading
500
+ Effect.tapError((error) =>
501
+ Effect.logError(
502
+ `Failed to create brief summary for ${sourcePath}`,
503
+ error,
504
+ ),
505
+ ),
506
+ Effect.catchAll(() =>
507
+ Effect.succeed(null as DocumentSummary | null),
508
+ ),
509
+ )
510
+
511
+ if (briefSummary) {
512
+ const briefContent = formatSummaryImpl(briefSummary)
513
+ // Count actual formatted output tokens, not pre-format summary tokens
514
+ const briefTokens = countTokensApprox(briefContent)
515
+
516
+ sources.push({
517
+ path: path.relative(rootPath, resolvedPath),
518
+ title: briefSummary.title,
519
+ tokens: briefTokens,
520
+ content: briefContent,
521
+ })
522
+ totalTokens += briefTokens
523
+ } else {
524
+ overflow.push(path.relative(rootPath, resolvedPath))
525
+ }
526
+ } else {
527
+ overflow.push(path.relative(rootPath, resolvedPath))
528
+ }
529
+ }
530
+ }
531
+
532
+ return {
533
+ sources,
534
+ totalTokens,
535
+ budget,
536
+ overflow,
537
+ }
538
+ })
539
+
540
+ // ============================================================================
541
+ // Format Assembled Context (re-exported from formatters.ts)
542
+ // ============================================================================
543
+
544
+ export { formatAssembledContext } from './formatters.js'
545
+
546
+ // ============================================================================
547
+ // Measure Token Reduction
548
+ // ============================================================================
549
+
550
+ export interface TokenReductionReport {
551
+ readonly originalTokens: number
552
+ readonly summaryTokens: number
553
+ readonly reduction: number
554
+ readonly reductionPercent: number
555
+ }
556
+
557
+ export const measureReduction = async (
558
+ filePath: string,
559
+ level: CompressionLevel = 'summary',
560
+ ): Promise<TokenReductionReport> => {
561
+ // Read original content
562
+ const originalContent = await fs.readFile(filePath, 'utf-8')
563
+ const originalTokens = countTokensApprox(originalContent)
564
+
565
+ // Get summary
566
+ // Note: catchAll is intentional - measureReduction is a utility function
567
+ // where failures should return default values (no reduction) rather than throw
568
+ const result = await Effect.runPromise(
569
+ summarizeFile(filePath, { level }).pipe(
570
+ // Log error for observability before gracefully degrading
571
+ Effect.tapError((error) =>
572
+ Effect.logError(`Failed to measure reduction for ${filePath}`, error),
573
+ ),
574
+ Effect.catchAll(() => Effect.succeed(null)),
575
+ ),
576
+ )
577
+
578
+ if (!result) {
579
+ return {
580
+ originalTokens,
581
+ summaryTokens: originalTokens,
582
+ reduction: 0,
583
+ reductionPercent: 0,
584
+ }
585
+ }
586
+
587
+ const summaryTokens = result.summaryTokens
588
+ const reduction = originalTokens - summaryTokens
589
+ const reductionPercent = originalTokens > 0 ? reduction / originalTokens : 0
590
+
591
+ return {
592
+ originalTokens,
593
+ summaryTokens,
594
+ reduction,
595
+ reductionPercent,
596
+ }
597
+ }