mdcontext 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.github/workflows/ci.yml +83 -0
  4. package/.github/workflows/release.yml +113 -0
  5. package/.tldrignore +112 -0
  6. package/AGENTS.md +46 -0
  7. package/BACKLOG.md +338 -0
  8. package/README.md +231 -11
  9. package/biome.json +36 -0
  10. package/cspell.config.yaml +14 -0
  11. package/dist/chunk-KRYIFLQR.js +92 -0
  12. package/dist/chunk-S7E6TFX6.js +742 -0
  13. package/dist/chunk-VVTGZNBT.js +1519 -0
  14. package/dist/cli/main.d.ts +1 -0
  15. package/dist/cli/main.js +2015 -0
  16. package/dist/index.d.ts +266 -0
  17. package/dist/index.js +86 -0
  18. package/dist/mcp/server.d.ts +1 -0
  19. package/dist/mcp/server.js +376 -0
  20. package/docs/019-USAGE.md +586 -0
  21. package/docs/020-current-implementation.md +364 -0
  22. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  23. package/docs/BACKLOG.md +80 -0
  24. package/docs/DESIGN.md +439 -0
  25. package/docs/PROJECT.md +88 -0
  26. package/docs/ROADMAP.md +407 -0
  27. package/docs/test-links.md +9 -0
  28. package/package.json +69 -10
  29. package/pnpm-workspace.yaml +5 -0
  30. package/research/config-analysis/01-current-implementation.md +470 -0
  31. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  32. package/research/config-analysis/03-task-candidates.md +715 -0
  33. package/research/config-analysis/033-research-configuration-management.md +828 -0
  34. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  35. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  36. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  37. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  38. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  39. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  40. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  41. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  42. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  43. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  44. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  45. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  46. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  47. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  48. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  49. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  50. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  51. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  52. package/research/effect-cli-error-handling.md +845 -0
  53. package/research/effect-errors-as-values.md +943 -0
  54. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  55. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  56. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  57. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  58. package/research/mdcontext-error-analysis.md +521 -0
  59. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  60. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  61. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  62. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  63. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  64. package/research/semantic-search/002-research-embedding-models.md +490 -0
  65. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  66. package/research/semantic-search/004-research-vector-search.md +841 -0
  67. package/research/semantic-search/032-research-semantic-search.md +427 -0
  68. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  69. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  70. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  71. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  72. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  73. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  74. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  75. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  76. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  77. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  78. package/scripts/rebuild-hnswlib.js +63 -0
  79. package/src/cli/argv-preprocessor.test.ts +210 -0
  80. package/src/cli/argv-preprocessor.ts +202 -0
  81. package/src/cli/cli.test.ts +430 -0
  82. package/src/cli/commands/backlinks.ts +54 -0
  83. package/src/cli/commands/context.ts +197 -0
  84. package/src/cli/commands/index-cmd.ts +300 -0
  85. package/src/cli/commands/index.ts +13 -0
  86. package/src/cli/commands/links.ts +52 -0
  87. package/src/cli/commands/search.ts +451 -0
  88. package/src/cli/commands/stats.ts +146 -0
  89. package/src/cli/commands/tree.ts +107 -0
  90. package/src/cli/flag-schemas.ts +275 -0
  91. package/src/cli/help.ts +386 -0
  92. package/src/cli/index.ts +9 -0
  93. package/src/cli/main.ts +145 -0
  94. package/src/cli/options.ts +31 -0
  95. package/src/cli/typo-suggester.test.ts +105 -0
  96. package/src/cli/typo-suggester.ts +130 -0
  97. package/src/cli/utils.ts +126 -0
  98. package/src/core/index.ts +1 -0
  99. package/src/core/types.ts +140 -0
  100. package/src/embeddings/index.ts +8 -0
  101. package/src/embeddings/openai-provider.ts +165 -0
  102. package/src/embeddings/semantic-search.ts +583 -0
  103. package/src/embeddings/types.ts +82 -0
  104. package/src/embeddings/vector-store.ts +299 -0
  105. package/src/index/index.ts +4 -0
  106. package/src/index/indexer.ts +446 -0
  107. package/src/index/storage.ts +196 -0
  108. package/src/index/types.ts +109 -0
  109. package/src/index/watcher.ts +131 -0
  110. package/src/index.ts +8 -0
  111. package/src/mcp/server.ts +483 -0
  112. package/src/parser/index.ts +1 -0
  113. package/src/parser/parser.test.ts +291 -0
  114. package/src/parser/parser.ts +395 -0
  115. package/src/parser/section-filter.ts +270 -0
  116. package/src/search/query-parser.test.ts +260 -0
  117. package/src/search/query-parser.ts +319 -0
  118. package/src/search/searcher.test.ts +182 -0
  119. package/src/search/searcher.ts +602 -0
  120. package/src/summarize/budget-bugs.test.ts +620 -0
  121. package/src/summarize/formatters.ts +419 -0
  122. package/src/summarize/index.ts +20 -0
  123. package/src/summarize/summarizer.test.ts +275 -0
  124. package/src/summarize/summarizer.ts +528 -0
  125. package/src/summarize/verify-bugs.test.ts +238 -0
  126. package/src/utils/index.ts +1 -0
  127. package/src/utils/tokens.test.ts +142 -0
  128. package/src/utils/tokens.ts +186 -0
  129. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  130. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  131. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  132. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +233 -0
  133. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  134. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +1264 -0
  135. package/tests/fixtures/cli/README.md +9 -0
  136. package/tests/fixtures/cli/api-reference.md +11 -0
  137. package/tests/fixtures/cli/getting-started.md +11 -0
  138. package/tsconfig.json +26 -0
  139. package/vitest.config.ts +21 -0
  140. package/vitest.setup.ts +12 -0
@@ -0,0 +1,528 @@
1
+ /**
2
+ * Summarization engine for mdcontext
3
+ *
4
+ * Provides hierarchical summarization and multi-document context assembly
5
+ */
6
+
7
+ import * as fs from 'node:fs/promises'
8
+ import * as path from 'node:path'
9
+ import { Effect } from 'effect'
10
+ import type { MdDocument, MdSection } from '../core/types.js'
11
+ import { parseFile } from '../parser/parser.js'
12
+ import { countTokensApprox } from '../utils/tokens.js'
13
+ import { formatSummary as formatSummaryImpl } from './formatters.js'
14
+
15
+ // ============================================================================
16
+ // Types
17
+ // ============================================================================
18
+
19
+ export type CompressionLevel = 'brief' | 'summary' | 'full'
20
+
21
+ export interface SummarizeOptions {
22
+ /** Compression level */
23
+ readonly level?: CompressionLevel | undefined
24
+ /** Maximum tokens for output */
25
+ readonly maxTokens?: number | undefined
26
+ }
27
+
28
+ export interface SectionSummary {
29
+ readonly heading: string
30
+ readonly level: number
31
+ readonly originalTokens: number
32
+ readonly summaryTokens: number
33
+ readonly summary: string
34
+ readonly children: readonly SectionSummary[]
35
+ readonly hasCode: boolean
36
+ readonly hasList: boolean
37
+ readonly hasTable: boolean
38
+ }
39
+
40
+ export interface DocumentSummary {
41
+ readonly path: string
42
+ readonly title: string
43
+ readonly originalTokens: number
44
+ readonly summaryTokens: number
45
+ readonly compressionRatio: number
46
+ readonly sections: readonly SectionSummary[]
47
+ readonly keyTopics: readonly string[]
48
+ /** True if content was truncated to fit budget */
49
+ readonly truncated?: boolean
50
+ /** Number of sections that were omitted due to budget constraints */
51
+ readonly truncatedCount?: number
52
+ }
53
+
54
+ export interface AssembleContextOptions {
55
+ /** Total token budget */
56
+ readonly budget: number
57
+ /** Compression level for each source */
58
+ readonly level?: CompressionLevel | undefined
59
+ }
60
+
61
+ export interface AssembledContext {
62
+ readonly sources: readonly SourceContext[]
63
+ readonly totalTokens: number
64
+ readonly budget: number
65
+ readonly overflow: readonly string[]
66
+ }
67
+
68
+ export interface SourceContext {
69
+ readonly path: string
70
+ readonly title: string
71
+ readonly tokens: number
72
+ readonly content: string
73
+ }
74
+
75
+ // ============================================================================
76
+ // Constants
77
+ // ============================================================================
78
+
79
+ /** Token budgets per compression level */
80
+ const TOKEN_BUDGETS: Record<CompressionLevel, number> = {
81
+ brief: 100,
82
+ summary: 500,
83
+ full: Infinity,
84
+ }
85
+
86
+ /** Minimum character length for a sentence to be considered meaningful */
87
+ const MIN_SENTENCE_LENGTH = 10
88
+
89
+ /** Score weights for sentence importance heuristics */
90
+ const SENTENCE_SCORE_DEFINITION = 2 // sentences with colons (definitions)
91
+ const SENTENCE_SCORE_PROPER_START = 1 // sentences starting with capital
92
+ const SENTENCE_SCORE_MEDIUM_LENGTH = 1 // sentences in ideal length range
93
+ const SENTENCE_SCORE_EMPHASIS = 1 // sentences with emphasis or code
94
+
95
+ /** Ideal sentence length range for summaries */
96
+ const SENTENCE_LENGTH_MIN = 50
97
+ const SENTENCE_LENGTH_MAX = 200
98
+
99
+ /** Target compression ratio for summaries (30% of original) */
100
+ const SUMMARY_COMPRESSION_RATIO = 0.3
101
+
102
+ /** Minimum tokens for any section summary */
103
+ const MIN_SECTION_TOKENS = 20
104
+
105
+ /** Minimum sentences to include in any summary */
106
+ const MIN_SUMMARY_SENTENCES = 2
107
+
108
+ /** Approximate tokens per sentence (for calculating max sentences) */
109
+ const TOKENS_PER_SENTENCE_ESTIMATE = 30
110
+
111
+ /** Topic heading length constraints */
112
+ const MIN_TOPIC_LENGTH = 2
113
+ const MAX_TOPIC_LENGTH = 50
114
+
115
+ /** Maximum topics to extract from a document */
116
+ const MAX_TOPICS = 10
117
+
118
+ /** Minimum remaining budget to include partial content */
119
+ const MIN_PARTIAL_BUDGET = 50
120
+
121
+ // ============================================================================
122
+ // Section Summarization
123
+ // ============================================================================
124
+
125
+ const extractKeyPoints = (content: string, maxSentences: number): string[] => {
126
+ // Split into sentences
127
+ const sentences = content
128
+ .replace(/\n+/g, ' ')
129
+ .split(/(?<=[.!?])\s+/)
130
+ .filter((s) => s.trim().length > MIN_SENTENCE_LENGTH)
131
+
132
+ if (sentences.length <= maxSentences) {
133
+ return sentences
134
+ }
135
+
136
+ // Simple heuristic: prefer sentences with key indicators
137
+ const scored = sentences.map((s) => {
138
+ let score = 0
139
+ // Prefer sentences with:
140
+ if (s.includes(':')) score += SENTENCE_SCORE_DEFINITION
141
+ if (/^[A-Z]/.test(s)) score += SENTENCE_SCORE_PROPER_START
142
+ if (s.length > SENTENCE_LENGTH_MIN && s.length < SENTENCE_LENGTH_MAX)
143
+ score += SENTENCE_SCORE_MEDIUM_LENGTH
144
+ if (/\*\*|`/.test(s)) score += SENTENCE_SCORE_EMPHASIS
145
+ return { sentence: s, score }
146
+ })
147
+
148
+ // Sort by score and take top sentences
149
+ scored.sort((a, b) => b.score - a.score)
150
+ return scored.slice(0, maxSentences).map((s) => s.sentence)
151
+ }
152
+
153
+ const summarizeSection = (
154
+ section: MdSection,
155
+ level: CompressionLevel,
156
+ ): SectionSummary => {
157
+ const originalTokens = section.metadata.tokenCount
158
+
159
+ // Get children summaries first
160
+ const children = section.children.map((child) =>
161
+ summarizeSection(child, level),
162
+ )
163
+
164
+ // Calculate target tokens based on level
165
+ const targetTokens = Math.min(
166
+ TOKEN_BUDGETS[level],
167
+ Math.max(originalTokens * SUMMARY_COMPRESSION_RATIO, MIN_SECTION_TOKENS),
168
+ )
169
+
170
+ let summary: string
171
+
172
+ if (level === 'full' || originalTokens <= targetTokens) {
173
+ // Include full content for "full" level or if already small
174
+ // Use plainText instead of content to avoid including the heading markdown
175
+ // (the heading is output separately by the formatter)
176
+ summary = section.plainText
177
+ } else if (level === 'brief') {
178
+ // Just heading and metadata for brief
179
+ const meta: string[] = []
180
+ if (section.metadata.hasCode) meta.push('code')
181
+ if (section.metadata.hasList) meta.push('list')
182
+ if (section.metadata.hasTable) meta.push('table')
183
+ summary = meta.length > 0 ? `[${meta.join(', ')}]` : ''
184
+ } else {
185
+ // Summary level: extract key points
186
+ const maxSentences = Math.max(
187
+ MIN_SUMMARY_SENTENCES,
188
+ Math.floor(targetTokens / TOKENS_PER_SENTENCE_ESTIMATE),
189
+ )
190
+ const keyPoints = extractKeyPoints(section.plainText, maxSentences)
191
+
192
+ if (keyPoints.length > 0) {
193
+ summary = keyPoints.join(' ')
194
+ } else {
195
+ // Fallback: truncate
196
+ const words = section.plainText.split(/\s+/).slice(0, targetTokens)
197
+ summary =
198
+ words.join(' ') +
199
+ (words.length < section.plainText.split(/\s+/).length ? '...' : '')
200
+ }
201
+ }
202
+
203
+ const summaryTokens = countTokensApprox(summary)
204
+
205
+ return {
206
+ heading: section.heading,
207
+ level: section.level,
208
+ originalTokens,
209
+ summaryTokens,
210
+ summary,
211
+ children,
212
+ hasCode: section.metadata.hasCode,
213
+ hasList: section.metadata.hasList,
214
+ hasTable: section.metadata.hasTable,
215
+ }
216
+ }
217
+
218
+ // ============================================================================
219
+ // Document Summarization
220
+ // ============================================================================
221
+
222
+ const extractTopics = (document: MdDocument): string[] => {
223
+ const topics: Set<string> = new Set()
224
+
225
+ // Extract from headings
226
+ const processSection = (section: MdSection) => {
227
+ // Clean heading and add as topic
228
+ const cleanHeading = section.heading
229
+ .replace(/[:#\-_]/g, ' ')
230
+ .trim()
231
+ .toLowerCase()
232
+ if (
233
+ cleanHeading.length > MIN_TOPIC_LENGTH &&
234
+ cleanHeading.length < MAX_TOPIC_LENGTH
235
+ ) {
236
+ topics.add(cleanHeading)
237
+ }
238
+
239
+ for (const child of section.children) {
240
+ processSection(child)
241
+ }
242
+ }
243
+
244
+ for (const section of document.sections) {
245
+ processSection(section)
246
+ }
247
+
248
+ // Also extract from frontmatter tags if present
249
+ const frontmatter = document.frontmatter as Record<string, unknown>
250
+ if (frontmatter.tags && Array.isArray(frontmatter.tags)) {
251
+ for (const tag of frontmatter.tags) {
252
+ if (typeof tag === 'string') {
253
+ topics.add(tag.toLowerCase())
254
+ }
255
+ }
256
+ }
257
+
258
+ return Array.from(topics).slice(0, MAX_TOPICS)
259
+ }
260
+
261
+ export const summarizeDocument = (
262
+ document: MdDocument,
263
+ options: SummarizeOptions = {},
264
+ ): DocumentSummary => {
265
+ const level = options.level ?? 'summary'
266
+ const maxTokens = options.maxTokens ?? TOKEN_BUDGETS[level]
267
+
268
+ // Summarize all sections
269
+ const allSections = document.sections.map((s) => summarizeSection(s, level))
270
+
271
+ // Calculate totals and collect all flattened sections with their tokens
272
+ const originalTokens = document.metadata.tokenCount
273
+ let totalSummaryTokens = 0
274
+ const flatSections: SectionSummary[] = []
275
+
276
+ const flattenWithTokens = (section: SectionSummary) => {
277
+ flatSections.push(section)
278
+ totalSummaryTokens += section.summaryTokens
279
+ for (const child of section.children) {
280
+ flattenWithTokens(child)
281
+ }
282
+ }
283
+
284
+ for (const section of allSections) {
285
+ flattenWithTokens(section)
286
+ }
287
+
288
+ // Calculate formatting overhead dynamically based on actual content
289
+ // Header includes: "# {title}\nPath: {path}\nTokens: X (Y% reduction from Z)\n"
290
+ // Plus topics line if present, plus possible truncation warning
291
+ const topics = extractTopics(document)
292
+ const headerTemplate = `# ${document.title}\nPath: ${document.path}\nTokens: 9999 (99% reduction from ${document.metadata.tokenCount})\n`
293
+ const topicsLine =
294
+ topics.length > 0 ? `\n**Topics:** ${topics.join(', ')}\n` : ''
295
+ const truncationWarning =
296
+ '\n⚠️ TRUNCATED: 999 sections omitted to fit token budget'
297
+ // Add all possible overhead plus a generous safety margin (20% of overhead + 20 base)
298
+ // This accounts for variance in token estimation
299
+ const baseOverhead = countTokensApprox(
300
+ headerTemplate + topicsLine + truncationWarning,
301
+ )
302
+ const formattingOverhead = Math.ceil(baseOverhead * 1.2) + 20
303
+ const contentBudget = maxTokens - formattingOverhead
304
+
305
+ // If over budget, truncate sections to fit
306
+ let truncated = false
307
+ let truncatedCount = 0
308
+ let sections: SectionSummary[]
309
+ let summaryTokens: number
310
+
311
+ if (totalSummaryTokens > contentBudget && contentBudget > 0) {
312
+ // Need to truncate - use greedy tree traversal that can include children
313
+ // even when parent doesn't fit (orphan rescue)
314
+ let tokensUsed = 0
315
+
316
+ // Process tree with orphan rescue: if parent doesn't fit, still try children
317
+ const truncateSections = (
318
+ sectionList: readonly SectionSummary[],
319
+ ): SectionSummary[] => {
320
+ const result: SectionSummary[] = []
321
+
322
+ for (const section of sectionList) {
323
+ const sectionOwnTokens = section.summaryTokens
324
+ const fitsInBudget = tokensUsed + sectionOwnTokens <= contentBudget
325
+
326
+ if (fitsInBudget) {
327
+ // Section fits - include it and recursively process children
328
+ tokensUsed += sectionOwnTokens
329
+ const truncatedChildren = truncateSections(section.children)
330
+ result.push({
331
+ ...section,
332
+ children: truncatedChildren,
333
+ })
334
+ } else {
335
+ // Section doesn't fit - but still try to rescue children (orphan rescue)
336
+ truncatedCount++
337
+ const rescuedChildren = truncateSections(section.children)
338
+ // Add rescued children as top-level items in result
339
+ result.push(...rescuedChildren)
340
+ }
341
+ }
342
+
343
+ return result
344
+ }
345
+
346
+ sections = truncateSections(allSections)
347
+ summaryTokens = tokensUsed
348
+ truncated = truncatedCount > 0
349
+ } else {
350
+ sections = allSections
351
+ summaryTokens = totalSummaryTokens
352
+ }
353
+
354
+ const compressionRatio =
355
+ originalTokens > 0 ? 1 - summaryTokens / originalTokens : 0
356
+
357
+ const result: DocumentSummary = {
358
+ path: document.path,
359
+ title: document.title,
360
+ originalTokens,
361
+ summaryTokens,
362
+ compressionRatio,
363
+ sections,
364
+ keyTopics: topics,
365
+ }
366
+
367
+ if (truncated) {
368
+ return {
369
+ ...result,
370
+ truncated: true,
371
+ truncatedCount,
372
+ }
373
+ }
374
+
375
+ return result
376
+ }
377
+
378
+ export const summarizeFile = (
379
+ filePath: string,
380
+ options: SummarizeOptions = {},
381
+ ): Effect.Effect<DocumentSummary, Error> =>
382
+ Effect.gen(function* () {
383
+ const document = yield* parseFile(filePath).pipe(
384
+ Effect.mapError((e) => new Error(`${e._tag}: ${e.message}`)),
385
+ )
386
+
387
+ return summarizeDocument(document, options)
388
+ })
389
+
390
+ // ============================================================================
391
+ // Format Summary for Output (re-exported from formatters.ts)
392
+ // ============================================================================
393
+
394
+ export { type FormatSummaryOptions, formatSummary } from './formatters.js'
395
+
396
+ // ============================================================================
397
+ // Multi-Document Context Assembly
398
+ // ============================================================================
399
+
400
+ export const assembleContext = (
401
+ rootPath: string,
402
+ sourcePaths: readonly string[],
403
+ options: AssembleContextOptions,
404
+ ): Effect.Effect<AssembledContext, Error> =>
405
+ Effect.gen(function* () {
406
+ const budget = options.budget
407
+ const level = options.level ?? 'summary'
408
+
409
+ const sources: SourceContext[] = []
410
+ const overflow: string[] = []
411
+ let totalTokens = 0
412
+
413
+ // Calculate per-source budget (even distribution)
414
+ const perSourceBudget = Math.floor(budget / sourcePaths.length)
415
+
416
+ for (const sourcePath of sourcePaths) {
417
+ const resolvedPath = path.isAbsolute(sourcePath)
418
+ ? sourcePath
419
+ : path.join(rootPath, sourcePath)
420
+
421
+ try {
422
+ const summary = yield* summarizeFile(resolvedPath, {
423
+ level,
424
+ maxTokens: perSourceBudget,
425
+ })
426
+
427
+ const content = formatSummaryImpl(summary)
428
+ // Count actual formatted output tokens, not pre-format summary tokens
429
+ const tokens = countTokensApprox(content)
430
+
431
+ if (totalTokens + tokens <= budget) {
432
+ sources.push({
433
+ path: path.relative(rootPath, resolvedPath),
434
+ title: summary.title,
435
+ tokens,
436
+ content,
437
+ })
438
+ totalTokens += tokens
439
+ } else {
440
+ // Over budget
441
+ const remaining = budget - totalTokens
442
+ if (remaining > MIN_PARTIAL_BUDGET) {
443
+ // Include partial if we have some room
444
+ const briefSummary = yield* summarizeFile(resolvedPath, {
445
+ level: 'brief',
446
+ maxTokens: remaining,
447
+ })
448
+ const briefContent = formatSummaryImpl(briefSummary)
449
+ // Count actual formatted output tokens, not pre-format summary tokens
450
+ const briefTokens = countTokensApprox(briefContent)
451
+
452
+ sources.push({
453
+ path: path.relative(rootPath, resolvedPath),
454
+ title: briefSummary.title,
455
+ tokens: briefTokens,
456
+ content: briefContent,
457
+ })
458
+ totalTokens += briefTokens
459
+ } else {
460
+ overflow.push(path.relative(rootPath, resolvedPath))
461
+ }
462
+ }
463
+ } catch (_e) {
464
+ // Skip files that can't be processed
465
+ overflow.push(sourcePath)
466
+ }
467
+ }
468
+
469
+ return {
470
+ sources,
471
+ totalTokens,
472
+ budget,
473
+ overflow,
474
+ }
475
+ })
476
+
477
+ // ============================================================================
478
+ // Format Assembled Context (re-exported from formatters.ts)
479
+ // ============================================================================
480
+
481
+ export { formatAssembledContext } from './formatters.js'
482
+
483
+ // ============================================================================
484
+ // Measure Token Reduction
485
+ // ============================================================================
486
+
487
+ export interface TokenReductionReport {
488
+ readonly originalTokens: number
489
+ readonly summaryTokens: number
490
+ readonly reduction: number
491
+ readonly reductionPercent: number
492
+ }
493
+
494
+ export const measureReduction = async (
495
+ filePath: string,
496
+ level: CompressionLevel = 'summary',
497
+ ): Promise<TokenReductionReport> => {
498
+ // Read original content
499
+ const originalContent = await fs.readFile(filePath, 'utf-8')
500
+ const originalTokens = countTokensApprox(originalContent)
501
+
502
+ // Get summary
503
+ const result = await Effect.runPromise(
504
+ summarizeFile(filePath, { level }).pipe(
505
+ Effect.catchAll(() => Effect.succeed(null)),
506
+ ),
507
+ )
508
+
509
+ if (!result) {
510
+ return {
511
+ originalTokens,
512
+ summaryTokens: originalTokens,
513
+ reduction: 0,
514
+ reductionPercent: 0,
515
+ }
516
+ }
517
+
518
+ const summaryTokens = result.summaryTokens
519
+ const reduction = originalTokens - summaryTokens
520
+ const reductionPercent = originalTokens > 0 ? reduction / originalTokens : 0
521
+
522
+ return {
523
+ originalTokens,
524
+ summaryTokens,
525
+ reduction,
526
+ reductionPercent,
527
+ }
528
+ }