mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,419 @@
1
+ /**
2
+ * Formatting functions for summarization output
3
+ *
4
+ * Responsible for converting summary data structures into human-readable text
5
+ */
6
+
7
+ import { countTokensApprox } from '../utils/tokens.js'
8
+ import type {
9
+ AssembledContext,
10
+ DocumentSummary,
11
+ SectionSummary,
12
+ } from './summarizer.js'
13
+
14
+ export interface FormatSummaryOptions {
15
+ /** Maximum tokens for formatted output. If exceeded, sections will be truncated. */
16
+ readonly maxTokens?: number | undefined
17
+ }
18
+
19
+ /**
20
+ * Format a document summary for display
21
+ *
22
+ * Outputs a markdown-formatted summary with:
23
+ * - Title and path
24
+ * - Accurate token count (of the formatted output)
25
+ * - Key topics
26
+ * - Hierarchical section summaries
27
+ *
28
+ * When maxTokens is specified, strictly enforces the budget by iteratively
29
+ * removing sections until the output fits.
30
+ *
31
+ * TRUNCATION UX: When truncated, shows a warning at the TOP with:
32
+ * - Percentage of tokens shown
33
+ * - List of sections included/excluded
34
+ * - Actionable guidance for getting more content
35
+ */
36
+ export const formatSummary = (
37
+ summary: DocumentSummary,
38
+ options: FormatSummaryOptions = {},
39
+ ): string => {
40
+ const maxTokens = options.maxTokens
41
+
42
+ // Flatten sections in order for incremental building
43
+ // Uses depth-first order so children follow parents - this enables "orphan rescue"
44
+ // where children can still be included even if their parent was too large
45
+ const flatSections: {
46
+ section: SectionSummary
47
+ depth: number
48
+ number: string
49
+ }[] = []
50
+
51
+ // Track section numbers for included/excluded listing
52
+ const collectSections = (
53
+ section: SectionSummary,
54
+ depth: number = 0,
55
+ parentNumber: string = '',
56
+ index: number = 0,
57
+ ) => {
58
+ const number = parentNumber
59
+ ? `${parentNumber}.${index + 1}`
60
+ : `${index + 1}`
61
+ flatSections.push({ section, depth, number })
62
+ section.children.forEach((child, i) => {
63
+ collectSections(child, depth + 1, number, i)
64
+ })
65
+ }
66
+
67
+ summary.sections.forEach((section, i) => {
68
+ collectSections(section, 0, '', i)
69
+ })
70
+
71
+ // Helper to build output with a given set of section indices
72
+ const buildOutput = (
73
+ includedSectionIndices: Set<number>,
74
+ truncationInfo: {
75
+ showWarning: boolean
76
+ truncatedCount: number
77
+ includedNumbers: string[]
78
+ excludedNumbers: string[]
79
+ tokensShown: number
80
+ tokensTotal: number
81
+ },
82
+ includeTopics: boolean,
83
+ ): string => {
84
+ const lines: string[] = []
85
+
86
+ // TRUNCATION WARNING AT TOP (when truncated)
87
+ if (
88
+ truncationInfo.showWarning &&
89
+ truncationInfo.truncatedCount > 0 &&
90
+ truncationInfo.tokensTotal > 0
91
+ ) {
92
+ const pct = Math.round(
93
+ (truncationInfo.tokensShown / truncationInfo.tokensTotal) * 100,
94
+ )
95
+ lines.push(
96
+ `⚠️ Truncated: Showing ~${truncationInfo.tokensShown}/${truncationInfo.tokensTotal} tokens (${pct}%)`,
97
+ )
98
+
99
+ // Show included sections (first few)
100
+ if (truncationInfo.includedNumbers.length > 0) {
101
+ const includedDisplay =
102
+ truncationInfo.includedNumbers.length <= 6
103
+ ? truncationInfo.includedNumbers.join(', ')
104
+ : truncationInfo.includedNumbers.slice(0, 5).join(', ') +
105
+ `, ... (+${truncationInfo.includedNumbers.length - 5} more)`
106
+ lines.push(`Sections included: ${includedDisplay}`)
107
+ }
108
+
109
+ // Show excluded sections (first few)
110
+ if (truncationInfo.excludedNumbers.length > 0) {
111
+ const excludedDisplay =
112
+ truncationInfo.excludedNumbers.length <= 6
113
+ ? truncationInfo.excludedNumbers.join(', ')
114
+ : truncationInfo.excludedNumbers.slice(0, 5).join(', ') +
115
+ `, ... (+${truncationInfo.excludedNumbers.length - 5} more)`
116
+ lines.push(`Sections excluded: ${excludedDisplay}`)
117
+ }
118
+
119
+ lines.push(
120
+ 'Use --full for complete content or --section to target specific sections.',
121
+ )
122
+ lines.push('')
123
+ }
124
+
125
+ lines.push(`# ${summary.title}`)
126
+ lines.push(`Path: ${summary.path}`)
127
+
128
+ // Placeholder for token line - we'll calculate actual tokens after building
129
+ const tokenLineIndex = lines.length
130
+ lines.push('PLACEHOLDER')
131
+ lines.push('')
132
+
133
+ const fullTopicsLine =
134
+ summary.keyTopics.length > 0
135
+ ? `**Topics:** ${summary.keyTopics.join(', ')}`
136
+ : ''
137
+
138
+ if (includeTopics && fullTopicsLine) {
139
+ lines.push(fullTopicsLine)
140
+ lines.push('')
141
+ }
142
+
143
+ // Build section content
144
+ const sectionLines: string[] = []
145
+ for (let i = 0; i < flatSections.length; i++) {
146
+ if (!includedSectionIndices.has(i)) continue
147
+ const { section, depth } = flatSections[i]!
148
+ const indent = ' '.repeat(depth)
149
+ const prefix = '#'.repeat(section.level)
150
+ sectionLines.push(`${indent}${prefix} ${section.heading}`)
151
+ if (section.summary) {
152
+ sectionLines.push(`${indent}${section.summary}`)
153
+ }
154
+ }
155
+
156
+ lines.push(sectionLines.join('\n'))
157
+
158
+ // Calculate actual token count for this output
159
+ // Build output without token line first
160
+ const tempOutput = lines.join('\n')
161
+ const tokensWithoutLine = countTokensApprox(
162
+ tempOutput.replace('PLACEHOLDER', ''),
163
+ )
164
+
165
+ // The token line itself adds tokens - iterate to find stable count
166
+ // Token line format: "Tokens: XXX (YY% reduction from ZZZ)"
167
+ let estimatedTotal = tokensWithoutLine + 8 // Initial estimate for token line
168
+ for (let iter = 0; iter < 3; iter++) {
169
+ const testTokenLine = `Tokens: ${estimatedTotal} (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`
170
+ const testOutput = tempOutput.replace('PLACEHOLDER', testTokenLine)
171
+ const actualTotal = countTokensApprox(testOutput)
172
+ if (actualTotal === estimatedTotal) break
173
+ estimatedTotal = actualTotal
174
+ }
175
+
176
+ // Final token line with converged count
177
+ const finalTokenLine = `Tokens: ${estimatedTotal} (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`
178
+ lines[tokenLineIndex] = finalTokenLine
179
+
180
+ return lines.join('\n')
181
+ }
182
+
183
+ // If no budget, include everything
184
+ if (maxTokens === undefined) {
185
+ const allIndices = new Set(flatSections.map((_, i) => i))
186
+ const hasPriorTruncation = summary.truncated && summary.truncatedCount
187
+ return buildOutput(
188
+ allIndices,
189
+ {
190
+ showWarning: !!hasPriorTruncation,
191
+ truncatedCount: summary.truncatedCount ?? 0,
192
+ includedNumbers: flatSections.map((s) => s.number),
193
+ excludedNumbers: [],
194
+ tokensShown: summary.summaryTokens,
195
+ tokensTotal: summary.originalTokens,
196
+ },
197
+ true,
198
+ )
199
+ }
200
+
201
+ // With budget: greedily add sections, then validate and trim if needed
202
+ const includedIndices = new Set<number>()
203
+ let truncatedCount = 0
204
+ let includeTopics = true
205
+
206
+ // First pass: estimate what fits using conservative token counting
207
+ // Add 15% safety margin to each section's token count
208
+ const SAFETY_MARGIN = 1.15
209
+
210
+ // Calculate minimum header overhead (title, path, token line)
211
+ const minHeaderTemplate = [
212
+ `# ${summary.title}`,
213
+ `Path: ${summary.path}`,
214
+ `Tokens: 9999 (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`,
215
+ '',
216
+ '',
217
+ ].join('\n')
218
+ const minHeaderTokens = Math.ceil(
219
+ countTokensApprox(minHeaderTemplate) * SAFETY_MARGIN,
220
+ )
221
+
222
+ // Calculate topics overhead
223
+ const fullTopicsLine =
224
+ summary.keyTopics.length > 0
225
+ ? `**Topics:** ${summary.keyTopics.join(', ')}\n`
226
+ : ''
227
+ const topicsTokens = fullTopicsLine
228
+ ? Math.ceil(countTokensApprox(fullTopicsLine) * SAFETY_MARGIN)
229
+ : 0
230
+
231
+ // Truncation warning overhead (larger now with section lists)
232
+ const truncationWarningTokens = Math.ceil(
233
+ countTokensApprox(
234
+ `⚠️ Truncated: Showing ~9999/9999 tokens (99%)\nSections included: 1, 2, 3, 4, 5, ... (+99 more)\nSections excluded: 6, 7, 8, 9, 10, ... (+99 more)\nUse --full for complete content or --section to target specific sections.\n`,
235
+ ) * SAFETY_MARGIN,
236
+ )
237
+
238
+ // Start with header + topics
239
+ let headerTokens = minHeaderTokens + topicsTokens
240
+
241
+ // If header alone exceeds budget, drop topics
242
+ if (headerTokens >= maxTokens) {
243
+ includeTopics = false
244
+ headerTokens = minHeaderTokens
245
+ }
246
+
247
+ // Calculate content budget (reserve space for potential truncation warning)
248
+ let contentBudget = maxTokens - headerTokens - truncationWarningTokens
249
+ let tokensUsed = 0
250
+
251
+ // Greedy section selection
252
+ for (let i = 0; i < flatSections.length; i++) {
253
+ const { section, depth } = flatSections[i]!
254
+ const indent = ' '.repeat(depth)
255
+ const prefix = '#'.repeat(section.level)
256
+ const sectionContent = section.summary
257
+ ? `${indent}${prefix} ${section.heading}\n${indent}${section.summary}`
258
+ : `${indent}${prefix} ${section.heading}`
259
+
260
+ const sectionTokens = Math.ceil(
261
+ countTokensApprox(sectionContent) * SAFETY_MARGIN,
262
+ )
263
+
264
+ if (tokensUsed + sectionTokens <= contentBudget) {
265
+ includedIndices.add(i)
266
+ tokensUsed += sectionTokens
267
+ } else {
268
+ truncatedCount++
269
+ }
270
+ }
271
+
272
+ // If nothing was truncated, we can use the full content budget
273
+ if (truncatedCount === 0) {
274
+ contentBudget += truncationWarningTokens
275
+ }
276
+
277
+ // Collect included/excluded section numbers
278
+ const includedNumbers: string[] = []
279
+ const excludedNumbers: string[] = []
280
+ for (let i = 0; i < flatSections.length; i++) {
281
+ if (includedIndices.has(i)) {
282
+ includedNumbers.push(flatSections[i]!.number)
283
+ } else {
284
+ excludedNumbers.push(flatSections[i]!.number)
285
+ }
286
+ }
287
+
288
+ // Calculate tokens shown vs total
289
+ let tokensShown = 0
290
+ for (const idx of includedIndices) {
291
+ tokensShown += flatSections[idx]!.section.summaryTokens
292
+ }
293
+
294
+ // Build output and validate it fits
295
+ let output = buildOutput(
296
+ includedIndices,
297
+ {
298
+ showWarning: truncatedCount > 0,
299
+ truncatedCount,
300
+ includedNumbers,
301
+ excludedNumbers,
302
+ tokensShown,
303
+ tokensTotal: summary.originalTokens,
304
+ },
305
+ includeTopics,
306
+ )
307
+ let actualTokens = countTokensApprox(output)
308
+
309
+ // Final validation loop: remove sections from the end until we fit
310
+ // This handles any estimation errors
311
+ const sortedIndices = Array.from(includedIndices).sort((a, b) => b - a) // Reverse order
312
+ let removalIndex = 0
313
+
314
+ while (actualTokens > maxTokens && removalIndex < sortedIndices.length) {
315
+ // Remove the last section
316
+ const indexToRemove = sortedIndices[removalIndex]!
317
+ includedIndices.delete(indexToRemove)
318
+ truncatedCount++
319
+ removalIndex++
320
+
321
+ // Update included/excluded lists
322
+ const removedNumber = flatSections[indexToRemove]!.number
323
+ const includedIdx = includedNumbers.indexOf(removedNumber)
324
+ if (includedIdx !== -1) {
325
+ includedNumbers.splice(includedIdx, 1)
326
+ excludedNumbers.push(removedNumber)
327
+ }
328
+
329
+ // Update tokens shown
330
+ tokensShown -= flatSections[indexToRemove]!.section.summaryTokens
331
+
332
+ // Rebuild and recheck
333
+ output = buildOutput(
334
+ includedIndices,
335
+ {
336
+ showWarning: true,
337
+ truncatedCount,
338
+ includedNumbers,
339
+ excludedNumbers,
340
+ tokensShown,
341
+ tokensTotal: summary.originalTokens,
342
+ },
343
+ includeTopics,
344
+ )
345
+ actualTokens = countTokensApprox(output)
346
+ }
347
+
348
+ // If still over budget and we haven't dropped topics yet, try that
349
+ if (actualTokens > maxTokens && includeTopics) {
350
+ includeTopics = false
351
+ output = buildOutput(
352
+ includedIndices,
353
+ {
354
+ showWarning: truncatedCount > 0,
355
+ truncatedCount,
356
+ includedNumbers,
357
+ excludedNumbers,
358
+ tokensShown,
359
+ tokensTotal: summary.originalTokens,
360
+ },
361
+ includeTopics,
362
+ )
363
+ actualTokens = countTokensApprox(output)
364
+ }
365
+
366
+ // If still over budget, try dropping the truncation warning as last resort
367
+ // (only if we're showing it and have truncated sections)
368
+ if (actualTokens > maxTokens && truncatedCount > 0) {
369
+ output = buildOutput(
370
+ includedIndices,
371
+ {
372
+ showWarning: false,
373
+ truncatedCount,
374
+ includedNumbers,
375
+ excludedNumbers,
376
+ tokensShown,
377
+ tokensTotal: summary.originalTokens,
378
+ },
379
+ includeTopics,
380
+ )
381
+ actualTokens = countTokensApprox(output)
382
+ }
383
+
384
+ return output
385
+ }
386
+
387
+ /**
388
+ * Format assembled context for display
389
+ *
390
+ * Outputs a combined view of multiple document summaries with:
391
+ * - Header showing total tokens and source count
392
+ * - Individual source summaries separated by dividers
393
+ * - Overflow list for sources that didn't fit the budget
394
+ */
395
+ export const formatAssembledContext = (context: AssembledContext): string => {
396
+ const lines: string[] = []
397
+
398
+ lines.push('# Context Assembly')
399
+ lines.push(`Total tokens: ${context.totalTokens}/${context.budget}`)
400
+ lines.push(`Sources: ${context.sources.length}`)
401
+ lines.push('')
402
+
403
+ for (const source of context.sources) {
404
+ lines.push('---')
405
+ lines.push('')
406
+ lines.push(source.content)
407
+ }
408
+
409
+ if (context.overflow.length > 0) {
410
+ lines.push('---')
411
+ lines.push('')
412
+ lines.push('## Overflow (not included due to budget)')
413
+ for (const overflowPath of context.overflow) {
414
+ lines.push(`- ${overflowPath}`)
415
+ }
416
+ }
417
+
418
+ return lines.join('\n')
419
+ }
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Summarization module exports
3
+ */
4
+
5
+ export {
6
+ type AssembleContextOptions,
7
+ type AssembledContext,
8
+ assembleContext,
9
+ type CompressionLevel,
10
+ type DocumentSummary,
11
+ formatAssembledContext,
12
+ formatSummary,
13
+ measureReduction,
14
+ type SectionSummary,
15
+ type SourceContext,
16
+ type SummarizeOptions,
17
+ summarizeDocument,
18
+ summarizeFile,
19
+ type TokenReductionReport,
20
+ } from './summarizer.js'
@@ -0,0 +1,275 @@
1
+ /**
2
+ * Tests for summarization engine
3
+ *
4
+ * Focuses on token count accuracy - ensuring displayed counts match actual output
5
+ */
6
+
7
+ import { describe, expect, it } from 'vitest'
8
+ import { countTokensApprox } from '../utils/tokens.js'
9
+ import { formatSummary } from './formatters.js'
10
+ import type { DocumentSummary, SectionSummary } from './summarizer.js'
11
+
12
+ describe('summarizer token counting', () => {
13
+ describe('formatSummary token accuracy', () => {
14
+ it('displays token count matching actual output', () => {
15
+ const mockSummary: DocumentSummary = {
16
+ path: '/test/file.md',
17
+ title: 'Test Document',
18
+ originalTokens: 1000,
19
+ summaryTokens: 100, // This is the pre-format count
20
+ compressionRatio: 0.9,
21
+ sections: [
22
+ {
23
+ heading: 'Section 1',
24
+ level: 2,
25
+ originalTokens: 500,
26
+ summaryTokens: 50,
27
+ summary: 'This is the summary of section 1.',
28
+ children: [],
29
+ hasCode: false,
30
+ hasList: false,
31
+ hasTable: false,
32
+ },
33
+ ],
34
+ keyTopics: ['topic1', 'topic2'],
35
+ }
36
+
37
+ const output = formatSummary(mockSummary)
38
+
39
+ // Extract the displayed token count
40
+ const tokenMatch = output.match(/Tokens: (\d+)/)
41
+ expect(tokenMatch).toBeTruthy()
42
+ const displayedTokens = parseInt(tokenMatch![1]!, 10)
43
+
44
+ // Count actual tokens in the output
45
+ const actualTokens = countTokensApprox(output)
46
+
47
+ // The displayed count should be close to actual (within 10%)
48
+ // Note: The token line itself adds tokens, so we allow some margin
49
+ const tolerance = Math.max(actualTokens * 0.1, 5)
50
+ expect(Math.abs(displayedTokens - actualTokens)).toBeLessThan(tolerance)
51
+ })
52
+
53
+ it('handles document with no topics', () => {
54
+ const mockSummary: DocumentSummary = {
55
+ path: '/test/file.md',
56
+ title: 'Simple Doc',
57
+ originalTokens: 100,
58
+ summaryTokens: 50,
59
+ compressionRatio: 0.5,
60
+ sections: [],
61
+ keyTopics: [],
62
+ }
63
+
64
+ const output = formatSummary(mockSummary)
65
+
66
+ // Should not contain Topics line
67
+ expect(output).not.toContain('**Topics:**')
68
+
69
+ // Token count should still be accurate
70
+ const tokenMatch = output.match(/Tokens: (\d+)/)
71
+ expect(tokenMatch).toBeTruthy()
72
+ const displayedTokens = parseInt(tokenMatch![1]!, 10)
73
+ const actualTokens = countTokensApprox(output)
74
+
75
+ expect(Math.abs(displayedTokens - actualTokens)).toBeLessThan(5)
76
+ })
77
+
78
+ it('handles nested sections', () => {
79
+ const childSection: SectionSummary = {
80
+ heading: 'Child Section',
81
+ level: 3,
82
+ originalTokens: 100,
83
+ summaryTokens: 20,
84
+ summary: 'Child summary content.',
85
+ children: [],
86
+ hasCode: true,
87
+ hasList: false,
88
+ hasTable: false,
89
+ }
90
+
91
+ const mockSummary: DocumentSummary = {
92
+ path: '/test/nested.md',
93
+ title: 'Nested Document',
94
+ originalTokens: 500,
95
+ summaryTokens: 100,
96
+ compressionRatio: 0.8,
97
+ sections: [
98
+ {
99
+ heading: 'Parent Section',
100
+ level: 2,
101
+ originalTokens: 300,
102
+ summaryTokens: 60,
103
+ summary: 'Parent summary content.',
104
+ children: [childSection],
105
+ hasCode: false,
106
+ hasList: true,
107
+ hasTable: false,
108
+ },
109
+ ],
110
+ keyTopics: ['parent', 'child'],
111
+ }
112
+
113
+ const output = formatSummary(mockSummary)
114
+
115
+ // Verify structure is present
116
+ expect(output).toContain('## Parent Section')
117
+ expect(output).toContain('### Child Section')
118
+
119
+ // Token count should still be accurate
120
+ const tokenMatch = output.match(/Tokens: (\d+)/)
121
+ expect(tokenMatch).toBeTruthy()
122
+ const displayedTokens = parseInt(tokenMatch![1]!, 10)
123
+ const actualTokens = countTokensApprox(output)
124
+
125
+ // Allow slightly more tolerance for nested content
126
+ const tolerance = Math.max(actualTokens * 0.15, 5)
127
+ expect(Math.abs(displayedTokens - actualTokens)).toBeLessThan(tolerance)
128
+ })
129
+
130
+ it('includes compression ratio in output', () => {
131
+ const mockSummary: DocumentSummary = {
132
+ path: '/test/file.md',
133
+ title: 'Test',
134
+ originalTokens: 1000,
135
+ summaryTokens: 200,
136
+ compressionRatio: 0.8, // 80% reduction
137
+ sections: [],
138
+ keyTopics: [],
139
+ }
140
+
141
+ const output = formatSummary(mockSummary)
142
+
143
+ // Should show 80% reduction
144
+ expect(output).toContain('80% reduction')
145
+ expect(output).toContain('from 1000')
146
+ })
147
+
148
+ it('respects maxTokens budget', () => {
149
+ const mockSummary: DocumentSummary = {
150
+ path: '/test/file.md',
151
+ title: 'Test Document',
152
+ originalTokens: 1000,
153
+ summaryTokens: 500,
154
+ compressionRatio: 0.5,
155
+ sections: [
156
+ {
157
+ heading: 'Section 1',
158
+ level: 2,
159
+ originalTokens: 200,
160
+ summaryTokens: 100,
161
+ summary:
162
+ 'This is a longer summary that contains many words to test token budget enforcement.',
163
+ children: [],
164
+ hasCode: false,
165
+ hasList: false,
166
+ hasTable: false,
167
+ },
168
+ {
169
+ heading: 'Section 2',
170
+ level: 2,
171
+ originalTokens: 200,
172
+ summaryTokens: 100,
173
+ summary: 'Another section with substantial content for testing.',
174
+ children: [],
175
+ hasCode: false,
176
+ hasList: false,
177
+ hasTable: false,
178
+ },
179
+ ],
180
+ keyTopics: ['test', 'budget'],
181
+ }
182
+
183
+ const output = formatSummary(mockSummary, { maxTokens: 100 })
184
+ const actualTokens = countTokensApprox(output)
185
+
186
+ // Output should stay within budget
187
+ expect(actualTokens).toBeLessThanOrEqual(100)
188
+ })
189
+
190
+ it('shows truncation warning when sections are omitted', () => {
191
+ const mockSummary: DocumentSummary = {
192
+ path: '/test/file.md',
193
+ title: 'Test',
194
+ originalTokens: 1000,
195
+ summaryTokens: 500,
196
+ compressionRatio: 0.5,
197
+ sections: [
198
+ {
199
+ heading: 'Section 1',
200
+ level: 2,
201
+ originalTokens: 200,
202
+ summaryTokens: 100,
203
+ summary: 'Long content '.repeat(50),
204
+ children: [],
205
+ hasCode: false,
206
+ hasList: false,
207
+ hasTable: false,
208
+ },
209
+ ],
210
+ keyTopics: [],
211
+ }
212
+
213
+ // Increased budget to account for enhanced truncation warning with section lists
214
+ const output = formatSummary(mockSummary, { maxTokens: 150 })
215
+
216
+ // Should show truncation warning (lowercase in new format)
217
+ expect(output).toContain('Truncated')
218
+ })
219
+ })
220
+
221
+ describe('token budget edge cases', () => {
222
+ it('handles very tight budget gracefully', () => {
223
+ const mockSummary: DocumentSummary = {
224
+ path: '/test/file.md',
225
+ title: 'Test',
226
+ originalTokens: 100,
227
+ summaryTokens: 50,
228
+ compressionRatio: 0.5,
229
+ sections: [
230
+ {
231
+ heading: 'Section',
232
+ level: 2,
233
+ originalTokens: 50,
234
+ summaryTokens: 25,
235
+ summary: 'Content',
236
+ children: [],
237
+ hasCode: false,
238
+ hasList: false,
239
+ hasTable: false,
240
+ },
241
+ ],
242
+ keyTopics: [],
243
+ }
244
+
245
+ // Should not throw with very tight budget
246
+ const output = formatSummary(mockSummary, { maxTokens: 30 })
247
+ expect(output).toBeTruthy()
248
+ })
249
+
250
+ it('handles long file paths in overhead calculation', () => {
251
+ const mockSummary: DocumentSummary = {
252
+ path: '/very/long/path/to/some/deeply/nested/directory/structure/file.md',
253
+ title: 'A Very Long Document Title That Takes Up Many Tokens',
254
+ originalTokens: 1000,
255
+ summaryTokens: 100,
256
+ compressionRatio: 0.9,
257
+ sections: [],
258
+ keyTopics: [
259
+ 'topic1',
260
+ 'topic2',
261
+ 'topic3',
262
+ 'topic4',
263
+ 'topic5',
264
+ 'another-long-topic',
265
+ ],
266
+ }
267
+
268
+ const output = formatSummary(mockSummary, { maxTokens: 200 })
269
+ const actualTokens = countTokensApprox(output)
270
+
271
+ // Should stay within budget even with long paths/titles
272
+ expect(actualTokens).toBeLessThanOrEqual(200)
273
+ })
274
+ })
275
+ })