mdcontext 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. package/.changeset/config.json +9 -9
  2. package/.claude/settings.local.json +25 -0
  3. package/.github/workflows/claude-code-review.yml +44 -0
  4. package/.github/workflows/claude.yml +85 -0
  5. package/CONTRIBUTING.md +186 -0
  6. package/NOTES/NOTES +44 -0
  7. package/README.md +206 -3
  8. package/biome.json +1 -1
  9. package/dist/chunk-23UPXDNL.js +3044 -0
  10. package/dist/chunk-2W7MO2DL.js +1366 -0
  11. package/dist/chunk-3NUAZGMA.js +1689 -0
  12. package/dist/chunk-7TOWB2XB.js +366 -0
  13. package/dist/chunk-7XOTOADQ.js +3065 -0
  14. package/dist/chunk-AH2PDM2K.js +3042 -0
  15. package/dist/chunk-BNXWSZ63.js +3742 -0
  16. package/dist/chunk-BTL5DJVU.js +3222 -0
  17. package/dist/chunk-HDHYG7E4.js +104 -0
  18. package/dist/chunk-HLR4KZBP.js +3234 -0
  19. package/dist/chunk-IP3FRFEB.js +1045 -0
  20. package/dist/chunk-KHU56VDO.js +3042 -0
  21. package/dist/chunk-KRYIFLQR.js +85 -89
  22. package/dist/chunk-LBSDNLEM.js +287 -0
  23. package/dist/chunk-MNTQ7HCP.js +2643 -0
  24. package/dist/chunk-MUJELQQ6.js +1387 -0
  25. package/dist/chunk-MXJGMSLV.js +2199 -0
  26. package/dist/chunk-N6QJGC3Z.js +2636 -0
  27. package/dist/chunk-OBELGBPM.js +1713 -0
  28. package/dist/chunk-OT7R5XTA.js +3192 -0
  29. package/dist/chunk-P7X4RA2T.js +106 -0
  30. package/dist/chunk-PIDUQNC2.js +3185 -0
  31. package/dist/chunk-POGCDIH4.js +3187 -0
  32. package/dist/chunk-PSIEOQGZ.js +3043 -0
  33. package/dist/chunk-PVRT3IHA.js +3238 -0
  34. package/dist/chunk-QNN4TT23.js +1430 -0
  35. package/dist/chunk-RE3R45RJ.js +3042 -0
  36. package/dist/chunk-S7E6TFX6.js +718 -657
  37. package/dist/chunk-SG6GLU4U.js +1378 -0
  38. package/dist/chunk-SJCDV2ST.js +274 -0
  39. package/dist/chunk-SYE5XLF3.js +104 -0
  40. package/dist/chunk-T5VLYBZD.js +103 -0
  41. package/dist/chunk-TOQB7VWU.js +3238 -0
  42. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  43. package/dist/chunk-VVTGZNBT.js +1533 -1423
  44. package/dist/chunk-W7Q4RFEV.js +104 -0
  45. package/dist/chunk-XTYYVRLO.js +3190 -0
  46. package/dist/chunk-Y6MDYVJD.js +3063 -0
  47. package/dist/cli/main.js +4072 -629
  48. package/dist/index.d.ts +420 -33
  49. package/dist/index.js +8 -15
  50. package/dist/mcp/server.js +103 -7
  51. package/dist/schema-BAWSG7KY.js +22 -0
  52. package/dist/schema-E3QUPL26.js +20 -0
  53. package/dist/schema-EHL7WUT6.js +20 -0
  54. package/docs/019-USAGE.md +44 -5
  55. package/docs/020-current-implementation.md +8 -8
  56. package/docs/021-DOGFOODING-FINDINGS.md +1 -1
  57. package/docs/CONFIG.md +1123 -0
  58. package/docs/ERRORS.md +383 -0
  59. package/docs/summarization.md +320 -0
  60. package/justfile +40 -0
  61. package/package.json +39 -33
  62. package/research/INDEX.md +315 -0
  63. package/research/code-review/README.md +90 -0
  64. package/research/code-review/cli-error-handling-review.md +979 -0
  65. package/research/code-review/code-review-validation-report.md +464 -0
  66. package/research/code-review/main-ts-review.md +1128 -0
  67. package/research/config-docs/SUMMARY.md +357 -0
  68. package/research/config-docs/TEST-RESULTS.md +776 -0
  69. package/research/config-docs/TODO.md +542 -0
  70. package/research/config-docs/analysis.md +744 -0
  71. package/research/config-docs/fix-validation.md +502 -0
  72. package/research/config-docs/help-audit.md +264 -0
  73. package/research/config-docs/help-system-analysis.md +890 -0
  74. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  75. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  76. package/research/issue-review.md +603 -0
  77. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  78. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  79. package/research/llm-summarization/anthropic-2026.md +367 -0
  80. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  81. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  82. package/research/llm-summarization/openai-2026.md +473 -0
  83. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  84. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  85. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  86. package/research/llm-summarization/prototype-results.md +56 -0
  87. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  88. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  89. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  90. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  91. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  92. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  93. package/research/mdcontext-pudding/02-search.md +970 -0
  94. package/research/mdcontext-pudding/03-context.md +779 -0
  95. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  96. package/research/mdcontext-pudding/04-tree.md +704 -0
  97. package/research/mdcontext-pudding/05-config.md +1038 -0
  98. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  99. package/research/mdcontext-pudding/06-links.md +679 -0
  100. package/research/mdcontext-pudding/07-stats.md +693 -0
  101. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  102. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  103. package/research/mdcontext-pudding/README.md +168 -0
  104. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  105. package/research/research-quality-review.md +834 -0
  106. package/research/semantic-search/embedding-text-analysis.md +156 -0
  107. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  108. package/research/semantic-search/query-processing-analysis.md +207 -0
  109. package/research/semantic-search/root-cause-and-solution.md +114 -0
  110. package/research/semantic-search/threshold-validation-report.md +69 -0
  111. package/research/semantic-search/vector-search-analysis.md +63 -0
  112. package/research/test-path-issues.md +276 -0
  113. package/review/ALP-76/1-error-type-design.md +962 -0
  114. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  115. package/review/ALP-76/3-error-presentation.md +624 -0
  116. package/review/ALP-76/4-test-coverage.md +625 -0
  117. package/review/ALP-76/5-migration-completeness.md +440 -0
  118. package/review/ALP-76/6-effect-best-practices.md +755 -0
  119. package/scripts/apply-branch-protection.sh +47 -0
  120. package/scripts/branch-protection-templates.json +79 -0
  121. package/scripts/prototype-summarization.ts +346 -0
  122. package/scripts/rebuild-hnswlib.js +32 -37
  123. package/scripts/setup-branch-protection.sh +64 -0
  124. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  125. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  126. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  127. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  128. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  129. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  130. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  131. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  132. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  133. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  134. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  135. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  136. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  137. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  138. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  139. package/src/cli/argv-preprocessor.test.ts +2 -2
  140. package/src/cli/cli.test.ts +230 -33
  141. package/src/cli/commands/config-cmd.ts +642 -0
  142. package/src/cli/commands/context.ts +97 -9
  143. package/src/cli/commands/duplicates.ts +122 -0
  144. package/src/cli/commands/embeddings.ts +529 -0
  145. package/src/cli/commands/index-cmd.ts +210 -30
  146. package/src/cli/commands/index.ts +3 -0
  147. package/src/cli/commands/search.ts +894 -64
  148. package/src/cli/commands/stats.ts +3 -0
  149. package/src/cli/commands/tree.ts +26 -5
  150. package/src/cli/config-layer.ts +176 -0
  151. package/src/cli/error-handler.test.ts +235 -0
  152. package/src/cli/error-handler.ts +655 -0
  153. package/src/cli/flag-schemas.ts +66 -0
  154. package/src/cli/help.ts +209 -7
  155. package/src/cli/main.ts +348 -58
  156. package/src/cli/options.ts +10 -0
  157. package/src/cli/shared-error-handling.ts +199 -0
  158. package/src/cli/utils.ts +150 -17
  159. package/src/config/file-provider.test.ts +320 -0
  160. package/src/config/file-provider.ts +273 -0
  161. package/src/config/index.ts +72 -0
  162. package/src/config/integration.test.ts +667 -0
  163. package/src/config/precedence.test.ts +277 -0
  164. package/src/config/precedence.ts +451 -0
  165. package/src/config/schema.test.ts +414 -0
  166. package/src/config/schema.ts +603 -0
  167. package/src/config/service.test.ts +320 -0
  168. package/src/config/service.ts +243 -0
  169. package/src/config/testing.test.ts +264 -0
  170. package/src/config/testing.ts +110 -0
  171. package/src/core/types.ts +6 -33
  172. package/src/duplicates/detector.test.ts +183 -0
  173. package/src/duplicates/detector.ts +414 -0
  174. package/src/duplicates/index.ts +18 -0
  175. package/src/embeddings/embedding-namespace.test.ts +300 -0
  176. package/src/embeddings/embedding-namespace.ts +947 -0
  177. package/src/embeddings/heading-boost.test.ts +222 -0
  178. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  179. package/src/embeddings/hyde.test.ts +272 -0
  180. package/src/embeddings/hyde.ts +264 -0
  181. package/src/embeddings/index.ts +2 -0
  182. package/src/embeddings/openai-provider.ts +332 -83
  183. package/src/embeddings/pricing.json +22 -0
  184. package/src/embeddings/provider-constants.ts +204 -0
  185. package/src/embeddings/provider-errors.test.ts +967 -0
  186. package/src/embeddings/provider-errors.ts +565 -0
  187. package/src/embeddings/provider-factory.test.ts +240 -0
  188. package/src/embeddings/provider-factory.ts +225 -0
  189. package/src/embeddings/provider-integration.test.ts +788 -0
  190. package/src/embeddings/query-preprocessing.test.ts +187 -0
  191. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  192. package/src/embeddings/semantic-search.ts +780 -93
  193. package/src/embeddings/types.ts +293 -16
  194. package/src/embeddings/vector-store.ts +486 -77
  195. package/src/embeddings/voyage-provider.ts +313 -0
  196. package/src/errors/errors.test.ts +845 -0
  197. package/src/errors/index.ts +533 -0
  198. package/src/index/ignore-patterns.test.ts +354 -0
  199. package/src/index/ignore-patterns.ts +305 -0
  200. package/src/index/indexer.ts +286 -48
  201. package/src/index/storage.ts +94 -30
  202. package/src/index/types.ts +40 -2
  203. package/src/index/watcher.ts +67 -9
  204. package/src/index.ts +22 -0
  205. package/src/integration/search-keyword.test.ts +678 -0
  206. package/src/mcp/server.ts +135 -6
  207. package/src/parser/parser.ts +18 -19
  208. package/src/parser/section-filter.test.ts +277 -0
  209. package/src/parser/section-filter.ts +125 -3
  210. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  211. package/src/search/bm25-store.ts +366 -0
  212. package/src/search/cross-encoder.test.ts +253 -0
  213. package/src/search/cross-encoder.ts +406 -0
  214. package/src/search/fuzzy-search.test.ts +419 -0
  215. package/src/search/fuzzy-search.ts +273 -0
  216. package/src/search/hybrid-search.ts +448 -0
  217. package/src/search/path-matcher.test.ts +276 -0
  218. package/src/search/path-matcher.ts +33 -0
  219. package/src/search/searcher.test.ts +99 -1
  220. package/src/search/searcher.ts +189 -67
  221. package/src/search/wink-bm25.d.ts +30 -0
  222. package/src/summarization/cli-providers/claude.ts +202 -0
  223. package/src/summarization/cli-providers/detection.test.ts +273 -0
  224. package/src/summarization/cli-providers/detection.ts +118 -0
  225. package/src/summarization/cli-providers/index.ts +8 -0
  226. package/src/summarization/cost.test.ts +139 -0
  227. package/src/summarization/cost.ts +102 -0
  228. package/src/summarization/error-handler.test.ts +127 -0
  229. package/src/summarization/error-handler.ts +111 -0
  230. package/src/summarization/index.ts +102 -0
  231. package/src/summarization/pipeline.test.ts +498 -0
  232. package/src/summarization/pipeline.ts +231 -0
  233. package/src/summarization/prompts.test.ts +269 -0
  234. package/src/summarization/prompts.ts +133 -0
  235. package/src/summarization/provider-factory.test.ts +396 -0
  236. package/src/summarization/provider-factory.ts +178 -0
  237. package/src/summarization/types.ts +184 -0
  238. package/src/summarize/summarizer.ts +104 -35
  239. package/src/types/huggingface-transformers.d.ts +66 -0
  240. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  241. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  242. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  243. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
  244. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
  245. package/tests/integration/embed-index.test.ts +712 -0
  246. package/tests/integration/search-context.test.ts +469 -0
  247. package/tests/integration/search-semantic.test.ts +522 -0
  248. package/vitest.config.ts +1 -6
  249. package/AGENTS.md +0 -46
  250. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  251. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
@@ -0,0 +1,184 @@
1
+ /**
2
+ * Summarization Module Types
3
+ *
4
+ * Core interfaces for AI-powered summarization of search results.
5
+ * Supports both CLI-based providers (free with subscriptions) and
6
+ * API-based providers (pay-per-use via Vercel AI SDK).
7
+ */
8
+
9
+ /**
10
+ * Summarization mode - CLI providers are free, API providers cost money
11
+ */
12
+ export type SummarizationMode = 'cli' | 'api'
13
+
14
+ /**
15
+ * Known CLI tools that can be used for summarization
16
+ */
17
+ export type CLIProviderName =
18
+ | 'claude'
19
+ | 'copilot'
20
+ | 'cline'
21
+ | 'aider'
22
+ | 'opencode'
23
+ | 'amp'
24
+
25
+ /**
26
+ * Known API providers for summarization
27
+ */
28
+ export type APIProviderName =
29
+ | 'deepseek'
30
+ | 'anthropic'
31
+ | 'openai'
32
+ | 'gemini'
33
+ | 'qwen'
34
+
35
+ /**
36
+ * Information about a detected CLI tool
37
+ */
38
+ export interface CLIInfo {
39
+ /** Internal name identifier */
40
+ readonly name: CLIProviderName
41
+ /** Command to execute */
42
+ readonly command: string
43
+ /** Display name for UI */
44
+ readonly displayName: string
45
+ /** Arguments to pass to the CLI for summarization */
46
+ readonly args: readonly string[]
47
+ /** Whether to use stdin for input */
48
+ readonly useStdin: boolean
49
+ }
50
+
51
+ /**
52
+ * API provider pricing information
53
+ */
54
+ export interface APIProviderPricing {
55
+ /** Provider name */
56
+ readonly provider: APIProviderName
57
+ /** Display name */
58
+ readonly displayName: string
59
+ /** Cost per million input tokens */
60
+ readonly inputCostPer1M: number
61
+ /** Cost per million output tokens */
62
+ readonly outputCostPer1M: number
63
+ /** Default model to use */
64
+ readonly defaultModel: string
65
+ /** Base URL for the API */
66
+ readonly baseURL: string
67
+ /** Environment variable for API key */
68
+ readonly apiKeyEnvVar: string
69
+ }
70
+
71
+ /**
72
+ * Result from a summarization operation
73
+ */
74
+ export interface SummaryResult {
75
+ /** The generated summary text */
76
+ readonly summary: string
77
+ /** Provider that generated the summary */
78
+ readonly provider: CLIProviderName | APIProviderName
79
+ /** Mode used (cli or api) */
80
+ readonly mode: SummarizationMode
81
+ /** Estimated cost in USD (0 for CLI providers) */
82
+ readonly estimatedCost: number
83
+ /** Time taken in milliseconds */
84
+ readonly durationMs: number
85
+ /** Token count estimates (if available) */
86
+ readonly tokens?: {
87
+ readonly input: number
88
+ readonly output: number
89
+ }
90
+ }
91
+
92
+ /**
93
+ * Core summarizer interface - simple and focused
94
+ *
95
+ * Each provider just needs to implement this interface.
96
+ * CLI providers return 0 for cost, API providers calculate actual costs.
97
+ */
98
+ export interface Summarizer {
99
+ /** Generate a summary from input text */
100
+ summarize(input: string, prompt: string): Promise<SummaryResult>
101
+
102
+ /** Estimate cost before running (optional, defaults to 0 for CLI) */
103
+ estimateCost?(inputTokens: number): number
104
+
105
+ /** Check if the provider is available/configured */
106
+ isAvailable(): Promise<boolean>
107
+ }
108
+
109
+ /**
110
+ * Options for streaming summarization
111
+ */
112
+ export interface StreamOptions {
113
+ /** Callback for each chunk of text */
114
+ onChunk: (chunk: string) => void
115
+ /** Callback when streaming completes */
116
+ onComplete?: (result: SummaryResult) => void
117
+ /** Callback on error */
118
+ onError?: (error: Error) => void
119
+ }
120
+
121
+ /**
122
+ * Extended summarizer interface with streaming support
123
+ */
124
+ export interface StreamingSummarizer extends Summarizer {
125
+ /** Generate a summary with streaming output */
126
+ summarizeStream(
127
+ input: string,
128
+ prompt: string,
129
+ options: StreamOptions,
130
+ ): Promise<void>
131
+ }
132
+
133
+ /**
134
+ * Configuration for AI summarization (distinct from existing SummarizationConfig)
135
+ *
136
+ * This configures the AI provider for generating summaries, not the
137
+ * token budget settings in the existing SummarizationConfig.
138
+ */
139
+ export interface AISummarizationConfig {
140
+ /** Mode: 'cli' (free) or 'api' (pay-per-use) */
141
+ readonly mode: SummarizationMode
142
+ /** Provider name */
143
+ readonly provider: CLIProviderName | APIProviderName
144
+ /** Model name (for API providers) */
145
+ readonly model?: string
146
+ /** Enable streaming output */
147
+ readonly stream?: boolean
148
+ /** Custom API base URL */
149
+ readonly baseURL?: string
150
+ /** API key (for API providers, usually from env) */
151
+ readonly apiKey?: string
152
+ }
153
+
154
+ /**
155
+ * Factory function type for creating summarizers
156
+ */
157
+ export type SummarizerFactory = (
158
+ config: AISummarizationConfig,
159
+ ) => Promise<Summarizer>
160
+
161
+ /**
162
+ * Error types specific to summarization
163
+ */
164
+ export class SummarizationError extends Error {
165
+ constructor(
166
+ message: string,
167
+ public readonly code: SummarizationErrorCode,
168
+ public readonly provider?: string,
169
+ public readonly cause?: Error,
170
+ ) {
171
+ super(message)
172
+ this.name = 'SummarizationError'
173
+ }
174
+ }
175
+
176
+ export type SummarizationErrorCode =
177
+ | 'PROVIDER_NOT_FOUND'
178
+ | 'PROVIDER_NOT_AVAILABLE'
179
+ | 'CLI_EXECUTION_FAILED'
180
+ | 'API_REQUEST_FAILED'
181
+ | 'RATE_LIMITED'
182
+ | 'INVALID_RESPONSE'
183
+ | 'TIMEOUT'
184
+ | 'NO_API_KEY'
@@ -7,11 +7,19 @@
7
7
  import * as fs from 'node:fs/promises'
8
8
  import * as path from 'node:path'
9
9
  import { Effect } from 'effect'
10
- import type { MdDocument, MdSection } from '../core/types.js'
10
+ import type { MdDocument, MdSection, ParseError } from '../core/types.js'
11
+ import type { FileReadError } from '../errors/index.js'
11
12
  import { parseFile } from '../parser/parser.js'
13
+ import { filterDocumentSections } from '../parser/section-filter.js'
12
14
  import { countTokensApprox } from '../utils/tokens.js'
13
15
  import { formatSummary as formatSummaryImpl } from './formatters.js'
14
16
 
17
+ /**
18
+ * Error type from parseFile function
19
+ * Uses centralized errors from src/errors/index.ts
20
+ */
21
+ type ParseFileError = ParseError | FileReadError
22
+
15
23
  // ============================================================================
16
24
  // Types
17
25
  // ============================================================================
@@ -23,6 +31,8 @@ export interface SummarizeOptions {
23
31
  readonly level?: CompressionLevel | undefined
24
32
  /** Maximum tokens for output */
25
33
  readonly maxTokens?: number | undefined
34
+ /** Section patterns to exclude from output */
35
+ readonly exclude?: readonly string[] | undefined
26
36
  }
27
37
 
28
38
  export interface SectionSummary {
@@ -56,6 +66,8 @@ export interface AssembleContextOptions {
56
66
  readonly budget: number
57
67
  /** Compression level for each source */
58
68
  readonly level?: CompressionLevel | undefined
69
+ /** Section patterns to exclude from output */
70
+ readonly exclude?: readonly string[] | undefined
59
71
  }
60
72
 
61
73
  export interface AssembledContext {
@@ -375,14 +387,27 @@ export const summarizeDocument = (
375
387
  return result
376
388
  }
377
389
 
390
+ /**
391
+ * Summarize a markdown file
392
+ *
393
+ * @throws ParseError - File content cannot be parsed
394
+ * @throws FileReadError - File cannot be read from filesystem
395
+ */
378
396
  export const summarizeFile = (
379
397
  filePath: string,
380
398
  options: SummarizeOptions = {},
381
- ): Effect.Effect<DocumentSummary, Error> =>
399
+ ): Effect.Effect<DocumentSummary, ParseFileError> =>
382
400
  Effect.gen(function* () {
383
- const document = yield* parseFile(filePath).pipe(
384
- Effect.mapError((e) => new Error(`${e._tag}: ${e.message}`)),
385
- )
401
+ let document = yield* parseFile(filePath)
402
+
403
+ // Apply exclusion filter if patterns provided
404
+ if (options.exclude && options.exclude.length > 0) {
405
+ const { document: filteredDoc } = filterDocumentSections(
406
+ document,
407
+ options.exclude,
408
+ )
409
+ document = filteredDoc
410
+ }
386
411
 
387
412
  return summarizeDocument(document, options)
388
413
  })
@@ -397,14 +422,21 @@ export { type FormatSummaryOptions, formatSummary } from './formatters.js'
397
422
  // Multi-Document Context Assembly
398
423
  // ============================================================================
399
424
 
425
+ /**
426
+ * Assemble context from multiple markdown files within a token budget
427
+ *
428
+ * @throws ParseError - File content cannot be parsed
429
+ * @throws FileReadError - File cannot be read from filesystem
430
+ */
400
431
  export const assembleContext = (
401
432
  rootPath: string,
402
433
  sourcePaths: readonly string[],
403
434
  options: AssembleContextOptions,
404
- ): Effect.Effect<AssembledContext, Error> =>
435
+ ): Effect.Effect<AssembledContext, ParseFileError> =>
405
436
  Effect.gen(function* () {
406
437
  const budget = options.budget
407
438
  const level = options.level ?? 'summary'
439
+ const excludePatterns = options.exclude ?? []
408
440
 
409
441
  const sources: SourceContext[] = []
410
442
  const overflow: string[] = []
@@ -418,33 +450,65 @@ export const assembleContext = (
418
450
  ? sourcePath
419
451
  : path.join(rootPath, sourcePath)
420
452
 
421
- try {
422
- const summary = yield* summarizeFile(resolvedPath, {
423
- level,
424
- maxTokens: perSourceBudget,
425
- })
426
-
427
- const content = formatSummaryImpl(summary)
428
- // Count actual formatted output tokens, not pre-format summary tokens
429
- const tokens = countTokensApprox(content)
453
+ // Use catchAll for graceful degradation - individual file failures
454
+ // shouldn't stop the entire context assembly operation
455
+ const summaryResult = yield* summarizeFile(resolvedPath, {
456
+ level,
457
+ maxTokens: perSourceBudget,
458
+ exclude: excludePatterns,
459
+ }).pipe(
460
+ Effect.map((s): DocumentSummary | null => s),
461
+ // Log error for observability before gracefully degrading
462
+ Effect.tapError((error) =>
463
+ Effect.logError(`Failed to summarize ${sourcePath}`, error),
464
+ ),
465
+ // Note: catchAll intentional for batch processing - individual file
466
+ // failures add to overflow instead of stopping assembly
467
+ Effect.catchAll(() => Effect.succeed(null as DocumentSummary | null)),
468
+ )
469
+
470
+ if (!summaryResult) {
471
+ overflow.push(sourcePath)
472
+ continue
473
+ }
430
474
 
431
- if (totalTokens + tokens <= budget) {
432
- sources.push({
433
- path: path.relative(rootPath, resolvedPath),
434
- title: summary.title,
435
- tokens,
436
- content,
437
- })
438
- totalTokens += tokens
439
- } else {
440
- // Over budget
441
- const remaining = budget - totalTokens
442
- if (remaining > MIN_PARTIAL_BUDGET) {
443
- // Include partial if we have some room
444
- const briefSummary = yield* summarizeFile(resolvedPath, {
445
- level: 'brief',
446
- maxTokens: remaining,
447
- })
475
+ const summary = summaryResult
476
+ const content = formatSummaryImpl(summary)
477
+ // Count actual formatted output tokens, not pre-format summary tokens
478
+ const tokens = countTokensApprox(content)
479
+
480
+ if (totalTokens + tokens <= budget) {
481
+ sources.push({
482
+ path: path.relative(rootPath, resolvedPath),
483
+ title: summary.title,
484
+ tokens,
485
+ content,
486
+ })
487
+ totalTokens += tokens
488
+ } else {
489
+ // Over budget
490
+ const remaining = budget - totalTokens
491
+ if (remaining > MIN_PARTIAL_BUDGET) {
492
+ // Include partial if we have some room
493
+ const briefSummary = yield* summarizeFile(resolvedPath, {
494
+ level: 'brief',
495
+ maxTokens: remaining,
496
+ exclude: excludePatterns,
497
+ }).pipe(
498
+ Effect.map((s): DocumentSummary | null => s),
499
+ // Log error for observability before gracefully degrading
500
+ Effect.tapError((error) =>
501
+ Effect.logError(
502
+ `Failed to create brief summary for ${sourcePath}`,
503
+ error,
504
+ ),
505
+ ),
506
+ Effect.catchAll(() =>
507
+ Effect.succeed(null as DocumentSummary | null),
508
+ ),
509
+ )
510
+
511
+ if (briefSummary) {
448
512
  const briefContent = formatSummaryImpl(briefSummary)
449
513
  // Count actual formatted output tokens, not pre-format summary tokens
450
514
  const briefTokens = countTokensApprox(briefContent)
@@ -459,10 +523,9 @@ export const assembleContext = (
459
523
  } else {
460
524
  overflow.push(path.relative(rootPath, resolvedPath))
461
525
  }
526
+ } else {
527
+ overflow.push(path.relative(rootPath, resolvedPath))
462
528
  }
463
- } catch (_e) {
464
- // Skip files that can't be processed
465
- overflow.push(sourcePath)
466
529
  }
467
530
  }
468
531
 
@@ -500,8 +563,14 @@ export const measureReduction = async (
500
563
  const originalTokens = countTokensApprox(originalContent)
501
564
 
502
565
  // Get summary
566
+ // Note: catchAll is intentional - measureReduction is a utility function
567
+ // where failures should return default values (no reduction) rather than throw
503
568
  const result = await Effect.runPromise(
504
569
  summarizeFile(filePath, { level }).pipe(
570
+ // Log error for observability before gracefully degrading
571
+ Effect.tapError((error) =>
572
+ Effect.logError(`Failed to measure reduction for ${filePath}`, error),
573
+ ),
505
574
  Effect.catchAll(() => Effect.succeed(null)),
506
575
  ),
507
576
  )
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Type declarations for @huggingface/transformers (optional dependency)
3
+ *
4
+ * This package is an optional peer dependency used for cross-encoder re-ranking.
5
+ * Users who want re-ranking can install it with: npm install @huggingface/transformers
6
+ */
7
+
8
+ declare module '@huggingface/transformers' {
9
+ export interface ProgressCallbackData {
10
+ file?: string
11
+ progress?: number
12
+ }
13
+
14
+ export type ProgressCallback = (data: ProgressCallbackData) => void
15
+
16
+ export interface AutoModelOptions {
17
+ progress_callback?: ProgressCallback | undefined
18
+ }
19
+
20
+ export interface AutoTokenizerOptions {
21
+ progress_callback?: ProgressCallback | undefined
22
+ }
23
+
24
+ export interface TokenizerOutput {
25
+ input_ids: unknown
26
+ attention_mask: unknown
27
+ }
28
+
29
+ export interface ModelOutput {
30
+ logits: {
31
+ data: Float32Array
32
+ }
33
+ }
34
+
35
+ export const env: {
36
+ cacheDir: string
37
+ }
38
+
39
+ export type AutoTokenizerInstance = (
40
+ texts: string[],
41
+ options: {
42
+ text_pair?: string[]
43
+ padding?: boolean
44
+ truncation?: boolean
45
+ max_length?: number
46
+ },
47
+ ) => TokenizerOutput
48
+
49
+ export type AutoModelInstance = (
50
+ input: TokenizerOutput,
51
+ ) => Promise<ModelOutput>
52
+
53
+ export const AutoTokenizer: {
54
+ from_pretrained(
55
+ model: string,
56
+ options?: AutoTokenizerOptions,
57
+ ): Promise<AutoTokenizerInstance>
58
+ }
59
+
60
+ export const AutoModelForSequenceClassification: {
61
+ from_pretrained(
62
+ model: string,
63
+ options?: AutoModelOptions,
64
+ ): Promise<AutoModelInstance>
65
+ }
66
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "namespace": "openai_text-embedding-3-small_512",
3
+ "provider": "openai",
4
+ "model": "text-embedding-3-small",
5
+ "dimensions": 512,
6
+ "activatedAt": "2026-01-27T07:43:50.361Z"
7
+ }
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "version": 1,
3
- "rootPath": "/Users/alphab/Dev/LLM/DEV/mdcontext/tests/fixtures/cli",
3
+ "rootPath": "tests/fixtures/cli",
4
4
  "documents": {
5
5
  "README.md": {
6
6
  "id": "04c6e90faac2",
7
7
  "path": "README.md",
8
8
  "title": "Test Project",
9
- "mtime": 1768999918845,
9
+ "mtime": 1769492431399,
10
10
  "hash": "54872b0fdbf6858a",
11
11
  "tokenCount": 76,
12
12
  "sectionCount": 2
@@ -15,7 +15,7 @@
15
15
  "id": "392e93c0f22d",
16
16
  "path": "api-reference.md",
17
17
  "title": "API Reference",
18
- "mtime": 1768999919391,
18
+ "mtime": 1769492431399,
19
19
  "hash": "4a879da54a831235",
20
20
  "tokenCount": 109,
21
21
  "sectionCount": 2
@@ -24,7 +24,7 @@
24
24
  "id": "b6885e1f8555",
25
25
  "path": "getting-started.md",
26
26
  "title": "Getting Started",
27
- "mtime": 1769013198631,
27
+ "mtime": 1769492431400,
28
28
  "hash": "2d44a41d5d2579f2",
29
29
  "tokenCount": 66,
30
30
  "sectionCount": 3
@@ -110,6 +110,8 @@
110
110
  "04c6e90faac2-test-project",
111
111
  "04c6e90faac2-test-project",
112
112
  "04c6e90faac2-test-project",
113
+ "04c6e90faac2-test-project",
114
+ "04c6e90faac2-test-project",
113
115
  "04c6e90faac2-test-project"
114
116
  ],
115
117
  "overview": [
@@ -128,6 +130,8 @@
128
130
  "04c6e90faac2-overview",
129
131
  "04c6e90faac2-overview",
130
132
  "04c6e90faac2-overview",
133
+ "04c6e90faac2-overview",
134
+ "04c6e90faac2-overview",
131
135
  "04c6e90faac2-overview"
132
136
  ],
133
137
  "api reference": [
@@ -146,6 +150,8 @@
146
150
  "392e93c0f22d-api-reference",
147
151
  "392e93c0f22d-api-reference",
148
152
  "392e93c0f22d-api-reference",
153
+ "392e93c0f22d-api-reference",
154
+ "392e93c0f22d-api-reference",
149
155
  "392e93c0f22d-api-reference"
150
156
  ],
151
157
  "endpoints": [
@@ -164,6 +170,8 @@
164
170
  "392e93c0f22d-endpoints",
165
171
  "392e93c0f22d-endpoints",
166
172
  "392e93c0f22d-endpoints",
173
+ "392e93c0f22d-endpoints",
174
+ "392e93c0f22d-endpoints",
167
175
  "392e93c0f22d-endpoints"
168
176
  ],
169
177
  "getting started": [
@@ -182,6 +190,8 @@
182
190
  "b6885e1f8555-getting-started",
183
191
  "b6885e1f8555-getting-started",
184
192
  "b6885e1f8555-getting-started",
193
+ "b6885e1f8555-getting-started",
194
+ "b6885e1f8555-getting-started",
185
195
  "b6885e1f8555-getting-started"
186
196
  ],
187
197
  "installation": [
@@ -200,6 +210,8 @@
200
210
  "b6885e1f8555-installation",
201
211
  "b6885e1f8555-installation",
202
212
  "b6885e1f8555-installation",
213
+ "b6885e1f8555-installation",
214
+ "b6885e1f8555-installation",
203
215
  "b6885e1f8555-installation"
204
216
  ],
205
217
  "usage": [
@@ -218,6 +230,8 @@
218
230
  "b6885e1f8555-usage",
219
231
  "b6885e1f8555-usage",
220
232
  "b6885e1f8555-usage",
233
+ "b6885e1f8555-usage",
234
+ "b6885e1f8555-usage",
221
235
  "b6885e1f8555-usage"
222
236
  ]
223
237
  },