mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Formatting functions for summarization output
|
|
3
|
+
*
|
|
4
|
+
* Responsible for converting summary data structures into human-readable text
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { countTokensApprox } from '../utils/tokens.js'
|
|
8
|
+
import type {
|
|
9
|
+
AssembledContext,
|
|
10
|
+
DocumentSummary,
|
|
11
|
+
SectionSummary,
|
|
12
|
+
} from './summarizer.js'
|
|
13
|
+
|
|
14
|
+
export interface FormatSummaryOptions {
|
|
15
|
+
/** Maximum tokens for formatted output. If exceeded, sections will be truncated. */
|
|
16
|
+
readonly maxTokens?: number | undefined
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Format a document summary for display
|
|
21
|
+
*
|
|
22
|
+
* Outputs a markdown-formatted summary with:
|
|
23
|
+
* - Title and path
|
|
24
|
+
* - Accurate token count (of the formatted output)
|
|
25
|
+
* - Key topics
|
|
26
|
+
* - Hierarchical section summaries
|
|
27
|
+
*
|
|
28
|
+
* When maxTokens is specified, strictly enforces the budget by iteratively
|
|
29
|
+
* removing sections until the output fits.
|
|
30
|
+
*
|
|
31
|
+
* TRUNCATION UX: When truncated, shows a warning at the TOP with:
|
|
32
|
+
* - Percentage of tokens shown
|
|
33
|
+
* - List of sections included/excluded
|
|
34
|
+
* - Actionable guidance for getting more content
|
|
35
|
+
*/
|
|
36
|
+
export const formatSummary = (
|
|
37
|
+
summary: DocumentSummary,
|
|
38
|
+
options: FormatSummaryOptions = {},
|
|
39
|
+
): string => {
|
|
40
|
+
const maxTokens = options.maxTokens
|
|
41
|
+
|
|
42
|
+
// Flatten sections in order for incremental building
|
|
43
|
+
// Uses depth-first order so children follow parents - this enables "orphan rescue"
|
|
44
|
+
// where children can still be included even if their parent was too large
|
|
45
|
+
const flatSections: {
|
|
46
|
+
section: SectionSummary
|
|
47
|
+
depth: number
|
|
48
|
+
number: string
|
|
49
|
+
}[] = []
|
|
50
|
+
|
|
51
|
+
// Track section numbers for included/excluded listing
|
|
52
|
+
const collectSections = (
|
|
53
|
+
section: SectionSummary,
|
|
54
|
+
depth: number = 0,
|
|
55
|
+
parentNumber: string = '',
|
|
56
|
+
index: number = 0,
|
|
57
|
+
) => {
|
|
58
|
+
const number = parentNumber
|
|
59
|
+
? `${parentNumber}.${index + 1}`
|
|
60
|
+
: `${index + 1}`
|
|
61
|
+
flatSections.push({ section, depth, number })
|
|
62
|
+
section.children.forEach((child, i) => {
|
|
63
|
+
collectSections(child, depth + 1, number, i)
|
|
64
|
+
})
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
summary.sections.forEach((section, i) => {
|
|
68
|
+
collectSections(section, 0, '', i)
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
// Helper to build output with a given set of section indices
|
|
72
|
+
const buildOutput = (
|
|
73
|
+
includedSectionIndices: Set<number>,
|
|
74
|
+
truncationInfo: {
|
|
75
|
+
showWarning: boolean
|
|
76
|
+
truncatedCount: number
|
|
77
|
+
includedNumbers: string[]
|
|
78
|
+
excludedNumbers: string[]
|
|
79
|
+
tokensShown: number
|
|
80
|
+
tokensTotal: number
|
|
81
|
+
},
|
|
82
|
+
includeTopics: boolean,
|
|
83
|
+
): string => {
|
|
84
|
+
const lines: string[] = []
|
|
85
|
+
|
|
86
|
+
// TRUNCATION WARNING AT TOP (when truncated)
|
|
87
|
+
if (
|
|
88
|
+
truncationInfo.showWarning &&
|
|
89
|
+
truncationInfo.truncatedCount > 0 &&
|
|
90
|
+
truncationInfo.tokensTotal > 0
|
|
91
|
+
) {
|
|
92
|
+
const pct = Math.round(
|
|
93
|
+
(truncationInfo.tokensShown / truncationInfo.tokensTotal) * 100,
|
|
94
|
+
)
|
|
95
|
+
lines.push(
|
|
96
|
+
`⚠️ Truncated: Showing ~${truncationInfo.tokensShown}/${truncationInfo.tokensTotal} tokens (${pct}%)`,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
// Show included sections (first few)
|
|
100
|
+
if (truncationInfo.includedNumbers.length > 0) {
|
|
101
|
+
const includedDisplay =
|
|
102
|
+
truncationInfo.includedNumbers.length <= 6
|
|
103
|
+
? truncationInfo.includedNumbers.join(', ')
|
|
104
|
+
: truncationInfo.includedNumbers.slice(0, 5).join(', ') +
|
|
105
|
+
`, ... (+${truncationInfo.includedNumbers.length - 5} more)`
|
|
106
|
+
lines.push(`Sections included: ${includedDisplay}`)
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Show excluded sections (first few)
|
|
110
|
+
if (truncationInfo.excludedNumbers.length > 0) {
|
|
111
|
+
const excludedDisplay =
|
|
112
|
+
truncationInfo.excludedNumbers.length <= 6
|
|
113
|
+
? truncationInfo.excludedNumbers.join(', ')
|
|
114
|
+
: truncationInfo.excludedNumbers.slice(0, 5).join(', ') +
|
|
115
|
+
`, ... (+${truncationInfo.excludedNumbers.length - 5} more)`
|
|
116
|
+
lines.push(`Sections excluded: ${excludedDisplay}`)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
lines.push(
|
|
120
|
+
'Use --full for complete content or --section to target specific sections.',
|
|
121
|
+
)
|
|
122
|
+
lines.push('')
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
lines.push(`# ${summary.title}`)
|
|
126
|
+
lines.push(`Path: ${summary.path}`)
|
|
127
|
+
|
|
128
|
+
// Placeholder for token line - we'll calculate actual tokens after building
|
|
129
|
+
const tokenLineIndex = lines.length
|
|
130
|
+
lines.push('PLACEHOLDER')
|
|
131
|
+
lines.push('')
|
|
132
|
+
|
|
133
|
+
const fullTopicsLine =
|
|
134
|
+
summary.keyTopics.length > 0
|
|
135
|
+
? `**Topics:** ${summary.keyTopics.join(', ')}`
|
|
136
|
+
: ''
|
|
137
|
+
|
|
138
|
+
if (includeTopics && fullTopicsLine) {
|
|
139
|
+
lines.push(fullTopicsLine)
|
|
140
|
+
lines.push('')
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Build section content
|
|
144
|
+
const sectionLines: string[] = []
|
|
145
|
+
for (let i = 0; i < flatSections.length; i++) {
|
|
146
|
+
if (!includedSectionIndices.has(i)) continue
|
|
147
|
+
const { section, depth } = flatSections[i]!
|
|
148
|
+
const indent = ' '.repeat(depth)
|
|
149
|
+
const prefix = '#'.repeat(section.level)
|
|
150
|
+
sectionLines.push(`${indent}${prefix} ${section.heading}`)
|
|
151
|
+
if (section.summary) {
|
|
152
|
+
sectionLines.push(`${indent}${section.summary}`)
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
lines.push(sectionLines.join('\n'))
|
|
157
|
+
|
|
158
|
+
// Calculate actual token count for this output
|
|
159
|
+
// Build output without token line first
|
|
160
|
+
const tempOutput = lines.join('\n')
|
|
161
|
+
const tokensWithoutLine = countTokensApprox(
|
|
162
|
+
tempOutput.replace('PLACEHOLDER', ''),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
// The token line itself adds tokens - iterate to find stable count
|
|
166
|
+
// Token line format: "Tokens: XXX (YY% reduction from ZZZ)"
|
|
167
|
+
let estimatedTotal = tokensWithoutLine + 8 // Initial estimate for token line
|
|
168
|
+
for (let iter = 0; iter < 3; iter++) {
|
|
169
|
+
const testTokenLine = `Tokens: ${estimatedTotal} (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`
|
|
170
|
+
const testOutput = tempOutput.replace('PLACEHOLDER', testTokenLine)
|
|
171
|
+
const actualTotal = countTokensApprox(testOutput)
|
|
172
|
+
if (actualTotal === estimatedTotal) break
|
|
173
|
+
estimatedTotal = actualTotal
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Final token line with converged count
|
|
177
|
+
const finalTokenLine = `Tokens: ${estimatedTotal} (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`
|
|
178
|
+
lines[tokenLineIndex] = finalTokenLine
|
|
179
|
+
|
|
180
|
+
return lines.join('\n')
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// If no budget, include everything
|
|
184
|
+
if (maxTokens === undefined) {
|
|
185
|
+
const allIndices = new Set(flatSections.map((_, i) => i))
|
|
186
|
+
const hasPriorTruncation = summary.truncated && summary.truncatedCount
|
|
187
|
+
return buildOutput(
|
|
188
|
+
allIndices,
|
|
189
|
+
{
|
|
190
|
+
showWarning: !!hasPriorTruncation,
|
|
191
|
+
truncatedCount: summary.truncatedCount ?? 0,
|
|
192
|
+
includedNumbers: flatSections.map((s) => s.number),
|
|
193
|
+
excludedNumbers: [],
|
|
194
|
+
tokensShown: summary.summaryTokens,
|
|
195
|
+
tokensTotal: summary.originalTokens,
|
|
196
|
+
},
|
|
197
|
+
true,
|
|
198
|
+
)
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// With budget: greedily add sections, then validate and trim if needed
|
|
202
|
+
const includedIndices = new Set<number>()
|
|
203
|
+
let truncatedCount = 0
|
|
204
|
+
let includeTopics = true
|
|
205
|
+
|
|
206
|
+
// First pass: estimate what fits using conservative token counting
|
|
207
|
+
// Add 15% safety margin to each section's token count
|
|
208
|
+
const SAFETY_MARGIN = 1.15
|
|
209
|
+
|
|
210
|
+
// Calculate minimum header overhead (title, path, token line)
|
|
211
|
+
const minHeaderTemplate = [
|
|
212
|
+
`# ${summary.title}`,
|
|
213
|
+
`Path: ${summary.path}`,
|
|
214
|
+
`Tokens: 9999 (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`,
|
|
215
|
+
'',
|
|
216
|
+
'',
|
|
217
|
+
].join('\n')
|
|
218
|
+
const minHeaderTokens = Math.ceil(
|
|
219
|
+
countTokensApprox(minHeaderTemplate) * SAFETY_MARGIN,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
// Calculate topics overhead
|
|
223
|
+
const fullTopicsLine =
|
|
224
|
+
summary.keyTopics.length > 0
|
|
225
|
+
? `**Topics:** ${summary.keyTopics.join(', ')}\n`
|
|
226
|
+
: ''
|
|
227
|
+
const topicsTokens = fullTopicsLine
|
|
228
|
+
? Math.ceil(countTokensApprox(fullTopicsLine) * SAFETY_MARGIN)
|
|
229
|
+
: 0
|
|
230
|
+
|
|
231
|
+
// Truncation warning overhead (larger now with section lists)
|
|
232
|
+
const truncationWarningTokens = Math.ceil(
|
|
233
|
+
countTokensApprox(
|
|
234
|
+
`⚠️ Truncated: Showing ~9999/9999 tokens (99%)\nSections included: 1, 2, 3, 4, 5, ... (+99 more)\nSections excluded: 6, 7, 8, 9, 10, ... (+99 more)\nUse --full for complete content or --section to target specific sections.\n`,
|
|
235
|
+
) * SAFETY_MARGIN,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
// Start with header + topics
|
|
239
|
+
let headerTokens = minHeaderTokens + topicsTokens
|
|
240
|
+
|
|
241
|
+
// If header alone exceeds budget, drop topics
|
|
242
|
+
if (headerTokens >= maxTokens) {
|
|
243
|
+
includeTopics = false
|
|
244
|
+
headerTokens = minHeaderTokens
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Calculate content budget (reserve space for potential truncation warning)
|
|
248
|
+
let contentBudget = maxTokens - headerTokens - truncationWarningTokens
|
|
249
|
+
let tokensUsed = 0
|
|
250
|
+
|
|
251
|
+
// Greedy section selection
|
|
252
|
+
for (let i = 0; i < flatSections.length; i++) {
|
|
253
|
+
const { section, depth } = flatSections[i]!
|
|
254
|
+
const indent = ' '.repeat(depth)
|
|
255
|
+
const prefix = '#'.repeat(section.level)
|
|
256
|
+
const sectionContent = section.summary
|
|
257
|
+
? `${indent}${prefix} ${section.heading}\n${indent}${section.summary}`
|
|
258
|
+
: `${indent}${prefix} ${section.heading}`
|
|
259
|
+
|
|
260
|
+
const sectionTokens = Math.ceil(
|
|
261
|
+
countTokensApprox(sectionContent) * SAFETY_MARGIN,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
if (tokensUsed + sectionTokens <= contentBudget) {
|
|
265
|
+
includedIndices.add(i)
|
|
266
|
+
tokensUsed += sectionTokens
|
|
267
|
+
} else {
|
|
268
|
+
truncatedCount++
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// If nothing was truncated, we can use the full content budget
|
|
273
|
+
if (truncatedCount === 0) {
|
|
274
|
+
contentBudget += truncationWarningTokens
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Collect included/excluded section numbers
|
|
278
|
+
const includedNumbers: string[] = []
|
|
279
|
+
const excludedNumbers: string[] = []
|
|
280
|
+
for (let i = 0; i < flatSections.length; i++) {
|
|
281
|
+
if (includedIndices.has(i)) {
|
|
282
|
+
includedNumbers.push(flatSections[i]!.number)
|
|
283
|
+
} else {
|
|
284
|
+
excludedNumbers.push(flatSections[i]!.number)
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Calculate tokens shown vs total
|
|
289
|
+
let tokensShown = 0
|
|
290
|
+
for (const idx of includedIndices) {
|
|
291
|
+
tokensShown += flatSections[idx]!.section.summaryTokens
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Build output and validate it fits
|
|
295
|
+
let output = buildOutput(
|
|
296
|
+
includedIndices,
|
|
297
|
+
{
|
|
298
|
+
showWarning: truncatedCount > 0,
|
|
299
|
+
truncatedCount,
|
|
300
|
+
includedNumbers,
|
|
301
|
+
excludedNumbers,
|
|
302
|
+
tokensShown,
|
|
303
|
+
tokensTotal: summary.originalTokens,
|
|
304
|
+
},
|
|
305
|
+
includeTopics,
|
|
306
|
+
)
|
|
307
|
+
let actualTokens = countTokensApprox(output)
|
|
308
|
+
|
|
309
|
+
// Final validation loop: remove sections from the end until we fit
|
|
310
|
+
// This handles any estimation errors
|
|
311
|
+
const sortedIndices = Array.from(includedIndices).sort((a, b) => b - a) // Reverse order
|
|
312
|
+
let removalIndex = 0
|
|
313
|
+
|
|
314
|
+
while (actualTokens > maxTokens && removalIndex < sortedIndices.length) {
|
|
315
|
+
// Remove the last section
|
|
316
|
+
const indexToRemove = sortedIndices[removalIndex]!
|
|
317
|
+
includedIndices.delete(indexToRemove)
|
|
318
|
+
truncatedCount++
|
|
319
|
+
removalIndex++
|
|
320
|
+
|
|
321
|
+
// Update included/excluded lists
|
|
322
|
+
const removedNumber = flatSections[indexToRemove]!.number
|
|
323
|
+
const includedIdx = includedNumbers.indexOf(removedNumber)
|
|
324
|
+
if (includedIdx !== -1) {
|
|
325
|
+
includedNumbers.splice(includedIdx, 1)
|
|
326
|
+
excludedNumbers.push(removedNumber)
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Update tokens shown
|
|
330
|
+
tokensShown -= flatSections[indexToRemove]!.section.summaryTokens
|
|
331
|
+
|
|
332
|
+
// Rebuild and recheck
|
|
333
|
+
output = buildOutput(
|
|
334
|
+
includedIndices,
|
|
335
|
+
{
|
|
336
|
+
showWarning: true,
|
|
337
|
+
truncatedCount,
|
|
338
|
+
includedNumbers,
|
|
339
|
+
excludedNumbers,
|
|
340
|
+
tokensShown,
|
|
341
|
+
tokensTotal: summary.originalTokens,
|
|
342
|
+
},
|
|
343
|
+
includeTopics,
|
|
344
|
+
)
|
|
345
|
+
actualTokens = countTokensApprox(output)
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// If still over budget and we haven't dropped topics yet, try that
|
|
349
|
+
if (actualTokens > maxTokens && includeTopics) {
|
|
350
|
+
includeTopics = false
|
|
351
|
+
output = buildOutput(
|
|
352
|
+
includedIndices,
|
|
353
|
+
{
|
|
354
|
+
showWarning: truncatedCount > 0,
|
|
355
|
+
truncatedCount,
|
|
356
|
+
includedNumbers,
|
|
357
|
+
excludedNumbers,
|
|
358
|
+
tokensShown,
|
|
359
|
+
tokensTotal: summary.originalTokens,
|
|
360
|
+
},
|
|
361
|
+
includeTopics,
|
|
362
|
+
)
|
|
363
|
+
actualTokens = countTokensApprox(output)
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// If still over budget, try dropping the truncation warning as last resort
|
|
367
|
+
// (only if we're showing it and have truncated sections)
|
|
368
|
+
if (actualTokens > maxTokens && truncatedCount > 0) {
|
|
369
|
+
output = buildOutput(
|
|
370
|
+
includedIndices,
|
|
371
|
+
{
|
|
372
|
+
showWarning: false,
|
|
373
|
+
truncatedCount,
|
|
374
|
+
includedNumbers,
|
|
375
|
+
excludedNumbers,
|
|
376
|
+
tokensShown,
|
|
377
|
+
tokensTotal: summary.originalTokens,
|
|
378
|
+
},
|
|
379
|
+
includeTopics,
|
|
380
|
+
)
|
|
381
|
+
actualTokens = countTokensApprox(output)
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
return output
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
/**
|
|
388
|
+
* Format assembled context for display
|
|
389
|
+
*
|
|
390
|
+
* Outputs a combined view of multiple document summaries with:
|
|
391
|
+
* - Header showing total tokens and source count
|
|
392
|
+
* - Individual source summaries separated by dividers
|
|
393
|
+
* - Overflow list for sources that didn't fit the budget
|
|
394
|
+
*/
|
|
395
|
+
export const formatAssembledContext = (context: AssembledContext): string => {
|
|
396
|
+
const lines: string[] = []
|
|
397
|
+
|
|
398
|
+
lines.push('# Context Assembly')
|
|
399
|
+
lines.push(`Total tokens: ${context.totalTokens}/${context.budget}`)
|
|
400
|
+
lines.push(`Sources: ${context.sources.length}`)
|
|
401
|
+
lines.push('')
|
|
402
|
+
|
|
403
|
+
for (const source of context.sources) {
|
|
404
|
+
lines.push('---')
|
|
405
|
+
lines.push('')
|
|
406
|
+
lines.push(source.content)
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
if (context.overflow.length > 0) {
|
|
410
|
+
lines.push('---')
|
|
411
|
+
lines.push('')
|
|
412
|
+
lines.push('## Overflow (not included due to budget)')
|
|
413
|
+
for (const overflowPath of context.overflow) {
|
|
414
|
+
lines.push(`- ${overflowPath}`)
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
return lines.join('\n')
|
|
419
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Summarization module exports
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export {
|
|
6
|
+
type AssembleContextOptions,
|
|
7
|
+
type AssembledContext,
|
|
8
|
+
assembleContext,
|
|
9
|
+
type CompressionLevel,
|
|
10
|
+
type DocumentSummary,
|
|
11
|
+
formatAssembledContext,
|
|
12
|
+
formatSummary,
|
|
13
|
+
measureReduction,
|
|
14
|
+
type SectionSummary,
|
|
15
|
+
type SourceContext,
|
|
16
|
+
type SummarizeOptions,
|
|
17
|
+
summarizeDocument,
|
|
18
|
+
summarizeFile,
|
|
19
|
+
type TokenReductionReport,
|
|
20
|
+
} from './summarizer.js'
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for summarization engine
|
|
3
|
+
*
|
|
4
|
+
* Focuses on token count accuracy - ensuring displayed counts match actual output
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { describe, expect, it } from 'vitest'
|
|
8
|
+
import { countTokensApprox } from '../utils/tokens.js'
|
|
9
|
+
import { formatSummary } from './formatters.js'
|
|
10
|
+
import type { DocumentSummary, SectionSummary } from './summarizer.js'
|
|
11
|
+
|
|
12
|
+
describe('summarizer token counting', () => {
|
|
13
|
+
describe('formatSummary token accuracy', () => {
|
|
14
|
+
it('displays token count matching actual output', () => {
|
|
15
|
+
const mockSummary: DocumentSummary = {
|
|
16
|
+
path: '/test/file.md',
|
|
17
|
+
title: 'Test Document',
|
|
18
|
+
originalTokens: 1000,
|
|
19
|
+
summaryTokens: 100, // This is the pre-format count
|
|
20
|
+
compressionRatio: 0.9,
|
|
21
|
+
sections: [
|
|
22
|
+
{
|
|
23
|
+
heading: 'Section 1',
|
|
24
|
+
level: 2,
|
|
25
|
+
originalTokens: 500,
|
|
26
|
+
summaryTokens: 50,
|
|
27
|
+
summary: 'This is the summary of section 1.',
|
|
28
|
+
children: [],
|
|
29
|
+
hasCode: false,
|
|
30
|
+
hasList: false,
|
|
31
|
+
hasTable: false,
|
|
32
|
+
},
|
|
33
|
+
],
|
|
34
|
+
keyTopics: ['topic1', 'topic2'],
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const output = formatSummary(mockSummary)
|
|
38
|
+
|
|
39
|
+
// Extract the displayed token count
|
|
40
|
+
const tokenMatch = output.match(/Tokens: (\d+)/)
|
|
41
|
+
expect(tokenMatch).toBeTruthy()
|
|
42
|
+
const displayedTokens = parseInt(tokenMatch![1]!, 10)
|
|
43
|
+
|
|
44
|
+
// Count actual tokens in the output
|
|
45
|
+
const actualTokens = countTokensApprox(output)
|
|
46
|
+
|
|
47
|
+
// The displayed count should be close to actual (within 10%)
|
|
48
|
+
// Note: The token line itself adds tokens, so we allow some margin
|
|
49
|
+
const tolerance = Math.max(actualTokens * 0.1, 5)
|
|
50
|
+
expect(Math.abs(displayedTokens - actualTokens)).toBeLessThan(tolerance)
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
it('handles document with no topics', () => {
|
|
54
|
+
const mockSummary: DocumentSummary = {
|
|
55
|
+
path: '/test/file.md',
|
|
56
|
+
title: 'Simple Doc',
|
|
57
|
+
originalTokens: 100,
|
|
58
|
+
summaryTokens: 50,
|
|
59
|
+
compressionRatio: 0.5,
|
|
60
|
+
sections: [],
|
|
61
|
+
keyTopics: [],
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const output = formatSummary(mockSummary)
|
|
65
|
+
|
|
66
|
+
// Should not contain Topics line
|
|
67
|
+
expect(output).not.toContain('**Topics:**')
|
|
68
|
+
|
|
69
|
+
// Token count should still be accurate
|
|
70
|
+
const tokenMatch = output.match(/Tokens: (\d+)/)
|
|
71
|
+
expect(tokenMatch).toBeTruthy()
|
|
72
|
+
const displayedTokens = parseInt(tokenMatch![1]!, 10)
|
|
73
|
+
const actualTokens = countTokensApprox(output)
|
|
74
|
+
|
|
75
|
+
expect(Math.abs(displayedTokens - actualTokens)).toBeLessThan(5)
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
it('handles nested sections', () => {
|
|
79
|
+
const childSection: SectionSummary = {
|
|
80
|
+
heading: 'Child Section',
|
|
81
|
+
level: 3,
|
|
82
|
+
originalTokens: 100,
|
|
83
|
+
summaryTokens: 20,
|
|
84
|
+
summary: 'Child summary content.',
|
|
85
|
+
children: [],
|
|
86
|
+
hasCode: true,
|
|
87
|
+
hasList: false,
|
|
88
|
+
hasTable: false,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const mockSummary: DocumentSummary = {
|
|
92
|
+
path: '/test/nested.md',
|
|
93
|
+
title: 'Nested Document',
|
|
94
|
+
originalTokens: 500,
|
|
95
|
+
summaryTokens: 100,
|
|
96
|
+
compressionRatio: 0.8,
|
|
97
|
+
sections: [
|
|
98
|
+
{
|
|
99
|
+
heading: 'Parent Section',
|
|
100
|
+
level: 2,
|
|
101
|
+
originalTokens: 300,
|
|
102
|
+
summaryTokens: 60,
|
|
103
|
+
summary: 'Parent summary content.',
|
|
104
|
+
children: [childSection],
|
|
105
|
+
hasCode: false,
|
|
106
|
+
hasList: true,
|
|
107
|
+
hasTable: false,
|
|
108
|
+
},
|
|
109
|
+
],
|
|
110
|
+
keyTopics: ['parent', 'child'],
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const output = formatSummary(mockSummary)
|
|
114
|
+
|
|
115
|
+
// Verify structure is present
|
|
116
|
+
expect(output).toContain('## Parent Section')
|
|
117
|
+
expect(output).toContain('### Child Section')
|
|
118
|
+
|
|
119
|
+
// Token count should still be accurate
|
|
120
|
+
const tokenMatch = output.match(/Tokens: (\d+)/)
|
|
121
|
+
expect(tokenMatch).toBeTruthy()
|
|
122
|
+
const displayedTokens = parseInt(tokenMatch![1]!, 10)
|
|
123
|
+
const actualTokens = countTokensApprox(output)
|
|
124
|
+
|
|
125
|
+
// Allow slightly more tolerance for nested content
|
|
126
|
+
const tolerance = Math.max(actualTokens * 0.15, 5)
|
|
127
|
+
expect(Math.abs(displayedTokens - actualTokens)).toBeLessThan(tolerance)
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
it('includes compression ratio in output', () => {
|
|
131
|
+
const mockSummary: DocumentSummary = {
|
|
132
|
+
path: '/test/file.md',
|
|
133
|
+
title: 'Test',
|
|
134
|
+
originalTokens: 1000,
|
|
135
|
+
summaryTokens: 200,
|
|
136
|
+
compressionRatio: 0.8, // 80% reduction
|
|
137
|
+
sections: [],
|
|
138
|
+
keyTopics: [],
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const output = formatSummary(mockSummary)
|
|
142
|
+
|
|
143
|
+
// Should show 80% reduction
|
|
144
|
+
expect(output).toContain('80% reduction')
|
|
145
|
+
expect(output).toContain('from 1000')
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
it('respects maxTokens budget', () => {
|
|
149
|
+
const mockSummary: DocumentSummary = {
|
|
150
|
+
path: '/test/file.md',
|
|
151
|
+
title: 'Test Document',
|
|
152
|
+
originalTokens: 1000,
|
|
153
|
+
summaryTokens: 500,
|
|
154
|
+
compressionRatio: 0.5,
|
|
155
|
+
sections: [
|
|
156
|
+
{
|
|
157
|
+
heading: 'Section 1',
|
|
158
|
+
level: 2,
|
|
159
|
+
originalTokens: 200,
|
|
160
|
+
summaryTokens: 100,
|
|
161
|
+
summary:
|
|
162
|
+
'This is a longer summary that contains many words to test token budget enforcement.',
|
|
163
|
+
children: [],
|
|
164
|
+
hasCode: false,
|
|
165
|
+
hasList: false,
|
|
166
|
+
hasTable: false,
|
|
167
|
+
},
|
|
168
|
+
{
|
|
169
|
+
heading: 'Section 2',
|
|
170
|
+
level: 2,
|
|
171
|
+
originalTokens: 200,
|
|
172
|
+
summaryTokens: 100,
|
|
173
|
+
summary: 'Another section with substantial content for testing.',
|
|
174
|
+
children: [],
|
|
175
|
+
hasCode: false,
|
|
176
|
+
hasList: false,
|
|
177
|
+
hasTable: false,
|
|
178
|
+
},
|
|
179
|
+
],
|
|
180
|
+
keyTopics: ['test', 'budget'],
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const output = formatSummary(mockSummary, { maxTokens: 100 })
|
|
184
|
+
const actualTokens = countTokensApprox(output)
|
|
185
|
+
|
|
186
|
+
// Output should stay within budget
|
|
187
|
+
expect(actualTokens).toBeLessThanOrEqual(100)
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
it('shows truncation warning when sections are omitted', () => {
|
|
191
|
+
const mockSummary: DocumentSummary = {
|
|
192
|
+
path: '/test/file.md',
|
|
193
|
+
title: 'Test',
|
|
194
|
+
originalTokens: 1000,
|
|
195
|
+
summaryTokens: 500,
|
|
196
|
+
compressionRatio: 0.5,
|
|
197
|
+
sections: [
|
|
198
|
+
{
|
|
199
|
+
heading: 'Section 1',
|
|
200
|
+
level: 2,
|
|
201
|
+
originalTokens: 200,
|
|
202
|
+
summaryTokens: 100,
|
|
203
|
+
summary: 'Long content '.repeat(50),
|
|
204
|
+
children: [],
|
|
205
|
+
hasCode: false,
|
|
206
|
+
hasList: false,
|
|
207
|
+
hasTable: false,
|
|
208
|
+
},
|
|
209
|
+
],
|
|
210
|
+
keyTopics: [],
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Increased budget to account for enhanced truncation warning with section lists
|
|
214
|
+
const output = formatSummary(mockSummary, { maxTokens: 150 })
|
|
215
|
+
|
|
216
|
+
// Should show truncation warning (lowercase in new format)
|
|
217
|
+
expect(output).toContain('Truncated')
|
|
218
|
+
})
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
describe('token budget edge cases', () => {
|
|
222
|
+
it('handles very tight budget gracefully', () => {
|
|
223
|
+
const mockSummary: DocumentSummary = {
|
|
224
|
+
path: '/test/file.md',
|
|
225
|
+
title: 'Test',
|
|
226
|
+
originalTokens: 100,
|
|
227
|
+
summaryTokens: 50,
|
|
228
|
+
compressionRatio: 0.5,
|
|
229
|
+
sections: [
|
|
230
|
+
{
|
|
231
|
+
heading: 'Section',
|
|
232
|
+
level: 2,
|
|
233
|
+
originalTokens: 50,
|
|
234
|
+
summaryTokens: 25,
|
|
235
|
+
summary: 'Content',
|
|
236
|
+
children: [],
|
|
237
|
+
hasCode: false,
|
|
238
|
+
hasList: false,
|
|
239
|
+
hasTable: false,
|
|
240
|
+
},
|
|
241
|
+
],
|
|
242
|
+
keyTopics: [],
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Should not throw with very tight budget
|
|
246
|
+
const output = formatSummary(mockSummary, { maxTokens: 30 })
|
|
247
|
+
expect(output).toBeTruthy()
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
it('handles long file paths in overhead calculation', () => {
|
|
251
|
+
const mockSummary: DocumentSummary = {
|
|
252
|
+
path: '/very/long/path/to/some/deeply/nested/directory/structure/file.md',
|
|
253
|
+
title: 'A Very Long Document Title That Takes Up Many Tokens',
|
|
254
|
+
originalTokens: 1000,
|
|
255
|
+
summaryTokens: 100,
|
|
256
|
+
compressionRatio: 0.9,
|
|
257
|
+
sections: [],
|
|
258
|
+
keyTopics: [
|
|
259
|
+
'topic1',
|
|
260
|
+
'topic2',
|
|
261
|
+
'topic3',
|
|
262
|
+
'topic4',
|
|
263
|
+
'topic5',
|
|
264
|
+
'another-long-topic',
|
|
265
|
+
],
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
const output = formatSummary(mockSummary, { maxTokens: 200 })
|
|
269
|
+
const actualTokens = countTokensApprox(output)
|
|
270
|
+
|
|
271
|
+
// Should stay within budget even with long paths/titles
|
|
272
|
+
expect(actualTokens).toBeLessThanOrEqual(200)
|
|
273
|
+
})
|
|
274
|
+
})
|
|
275
|
+
})
|