mdcontext 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/config.json +9 -9
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +206 -3
- package/biome.json +1 -1
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +85 -89
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +718 -657
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1533 -1423
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.js +4072 -629
- package/dist/index.d.ts +420 -33
- package/dist/index.js +8 -15
- package/dist/mcp/server.js +103 -7
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +44 -5
- package/docs/020-current-implementation.md +8 -8
- package/docs/021-DOGFOODING-FINDINGS.md +1 -1
- package/docs/CONFIG.md +1123 -0
- package/docs/ERRORS.md +383 -0
- package/docs/summarization.md +320 -0
- package/justfile +40 -0
- package/package.json +39 -33
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +32 -37
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +2 -2
- package/src/cli/cli.test.ts +230 -33
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +97 -9
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +210 -30
- package/src/cli/commands/index.ts +3 -0
- package/src/cli/commands/search.ts +894 -64
- package/src/cli/commands/stats.ts +3 -0
- package/src/cli/commands/tree.ts +26 -5
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +66 -0
- package/src/cli/help.ts +209 -7
- package/src/cli/main.ts +348 -58
- package/src/cli/options.ts +10 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/utils.ts +150 -17
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/types.ts +6 -33
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +2 -0
- package/src/embeddings/openai-provider.ts +332 -83
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +780 -93
- package/src/embeddings/types.ts +293 -16
- package/src/embeddings/vector-store.ts +486 -77
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/indexer.ts +286 -48
- package/src/index/storage.ts +94 -30
- package/src/index/types.ts +40 -2
- package/src/index/watcher.ts +67 -9
- package/src/index.ts +22 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +135 -6
- package/src/parser/parser.ts +18 -19
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +125 -3
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/searcher.test.ts +99 -1
- package/src/search/searcher.ts +189 -67
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/summarizer.ts +104 -35
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/vitest.config.ts +1 -6
- package/AGENTS.md +0 -46
- package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Summarization Module Types
|
|
3
|
+
*
|
|
4
|
+
* Core interfaces for AI-powered summarization of search results.
|
|
5
|
+
* Supports both CLI-based providers (free with subscriptions) and
|
|
6
|
+
* API-based providers (pay-per-use via Vercel AI SDK).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Summarization mode - CLI providers are free, API providers cost money
|
|
11
|
+
*/
|
|
12
|
+
export type SummarizationMode = 'cli' | 'api'
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Known CLI tools that can be used for summarization
|
|
16
|
+
*/
|
|
17
|
+
export type CLIProviderName =
|
|
18
|
+
| 'claude'
|
|
19
|
+
| 'copilot'
|
|
20
|
+
| 'cline'
|
|
21
|
+
| 'aider'
|
|
22
|
+
| 'opencode'
|
|
23
|
+
| 'amp'
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Known API providers for summarization
|
|
27
|
+
*/
|
|
28
|
+
export type APIProviderName =
|
|
29
|
+
| 'deepseek'
|
|
30
|
+
| 'anthropic'
|
|
31
|
+
| 'openai'
|
|
32
|
+
| 'gemini'
|
|
33
|
+
| 'qwen'
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Information about a detected CLI tool
|
|
37
|
+
*/
|
|
38
|
+
export interface CLIInfo {
|
|
39
|
+
/** Internal name identifier */
|
|
40
|
+
readonly name: CLIProviderName
|
|
41
|
+
/** Command to execute */
|
|
42
|
+
readonly command: string
|
|
43
|
+
/** Display name for UI */
|
|
44
|
+
readonly displayName: string
|
|
45
|
+
/** Arguments to pass to the CLI for summarization */
|
|
46
|
+
readonly args: readonly string[]
|
|
47
|
+
/** Whether to use stdin for input */
|
|
48
|
+
readonly useStdin: boolean
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* API provider pricing information
|
|
53
|
+
*/
|
|
54
|
+
export interface APIProviderPricing {
|
|
55
|
+
/** Provider name */
|
|
56
|
+
readonly provider: APIProviderName
|
|
57
|
+
/** Display name */
|
|
58
|
+
readonly displayName: string
|
|
59
|
+
/** Cost per million input tokens */
|
|
60
|
+
readonly inputCostPer1M: number
|
|
61
|
+
/** Cost per million output tokens */
|
|
62
|
+
readonly outputCostPer1M: number
|
|
63
|
+
/** Default model to use */
|
|
64
|
+
readonly defaultModel: string
|
|
65
|
+
/** Base URL for the API */
|
|
66
|
+
readonly baseURL: string
|
|
67
|
+
/** Environment variable for API key */
|
|
68
|
+
readonly apiKeyEnvVar: string
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Result from a summarization operation
|
|
73
|
+
*/
|
|
74
|
+
export interface SummaryResult {
|
|
75
|
+
/** The generated summary text */
|
|
76
|
+
readonly summary: string
|
|
77
|
+
/** Provider that generated the summary */
|
|
78
|
+
readonly provider: CLIProviderName | APIProviderName
|
|
79
|
+
/** Mode used (cli or api) */
|
|
80
|
+
readonly mode: SummarizationMode
|
|
81
|
+
/** Estimated cost in USD (0 for CLI providers) */
|
|
82
|
+
readonly estimatedCost: number
|
|
83
|
+
/** Time taken in milliseconds */
|
|
84
|
+
readonly durationMs: number
|
|
85
|
+
/** Token count estimates (if available) */
|
|
86
|
+
readonly tokens?: {
|
|
87
|
+
readonly input: number
|
|
88
|
+
readonly output: number
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Core summarizer interface - simple and focused
|
|
94
|
+
*
|
|
95
|
+
* Each provider just needs to implement this interface.
|
|
96
|
+
* CLI providers return 0 for cost, API providers calculate actual costs.
|
|
97
|
+
*/
|
|
98
|
+
export interface Summarizer {
|
|
99
|
+
/** Generate a summary from input text */
|
|
100
|
+
summarize(input: string, prompt: string): Promise<SummaryResult>
|
|
101
|
+
|
|
102
|
+
/** Estimate cost before running (optional, defaults to 0 for CLI) */
|
|
103
|
+
estimateCost?(inputTokens: number): number
|
|
104
|
+
|
|
105
|
+
/** Check if the provider is available/configured */
|
|
106
|
+
isAvailable(): Promise<boolean>
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Options for streaming summarization
|
|
111
|
+
*/
|
|
112
|
+
export interface StreamOptions {
|
|
113
|
+
/** Callback for each chunk of text */
|
|
114
|
+
onChunk: (chunk: string) => void
|
|
115
|
+
/** Callback when streaming completes */
|
|
116
|
+
onComplete?: (result: SummaryResult) => void
|
|
117
|
+
/** Callback on error */
|
|
118
|
+
onError?: (error: Error) => void
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Extended summarizer interface with streaming support
|
|
123
|
+
*/
|
|
124
|
+
export interface StreamingSummarizer extends Summarizer {
|
|
125
|
+
/** Generate a summary with streaming output */
|
|
126
|
+
summarizeStream(
|
|
127
|
+
input: string,
|
|
128
|
+
prompt: string,
|
|
129
|
+
options: StreamOptions,
|
|
130
|
+
): Promise<void>
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Configuration for AI summarization (distinct from existing SummarizationConfig)
|
|
135
|
+
*
|
|
136
|
+
* This configures the AI provider for generating summaries, not the
|
|
137
|
+
* token budget settings in the existing SummarizationConfig.
|
|
138
|
+
*/
|
|
139
|
+
export interface AISummarizationConfig {
|
|
140
|
+
/** Mode: 'cli' (free) or 'api' (pay-per-use) */
|
|
141
|
+
readonly mode: SummarizationMode
|
|
142
|
+
/** Provider name */
|
|
143
|
+
readonly provider: CLIProviderName | APIProviderName
|
|
144
|
+
/** Model name (for API providers) */
|
|
145
|
+
readonly model?: string
|
|
146
|
+
/** Enable streaming output */
|
|
147
|
+
readonly stream?: boolean
|
|
148
|
+
/** Custom API base URL */
|
|
149
|
+
readonly baseURL?: string
|
|
150
|
+
/** API key (for API providers, usually from env) */
|
|
151
|
+
readonly apiKey?: string
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Factory function type for creating summarizers
|
|
156
|
+
*/
|
|
157
|
+
export type SummarizerFactory = (
|
|
158
|
+
config: AISummarizationConfig,
|
|
159
|
+
) => Promise<Summarizer>
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Error types specific to summarization
|
|
163
|
+
*/
|
|
164
|
+
export class SummarizationError extends Error {
|
|
165
|
+
constructor(
|
|
166
|
+
message: string,
|
|
167
|
+
public readonly code: SummarizationErrorCode,
|
|
168
|
+
public readonly provider?: string,
|
|
169
|
+
public readonly cause?: Error,
|
|
170
|
+
) {
|
|
171
|
+
super(message)
|
|
172
|
+
this.name = 'SummarizationError'
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export type SummarizationErrorCode =
|
|
177
|
+
| 'PROVIDER_NOT_FOUND'
|
|
178
|
+
| 'PROVIDER_NOT_AVAILABLE'
|
|
179
|
+
| 'CLI_EXECUTION_FAILED'
|
|
180
|
+
| 'API_REQUEST_FAILED'
|
|
181
|
+
| 'RATE_LIMITED'
|
|
182
|
+
| 'INVALID_RESPONSE'
|
|
183
|
+
| 'TIMEOUT'
|
|
184
|
+
| 'NO_API_KEY'
|
|
@@ -7,11 +7,19 @@
|
|
|
7
7
|
import * as fs from 'node:fs/promises'
|
|
8
8
|
import * as path from 'node:path'
|
|
9
9
|
import { Effect } from 'effect'
|
|
10
|
-
import type { MdDocument, MdSection } from '../core/types.js'
|
|
10
|
+
import type { MdDocument, MdSection, ParseError } from '../core/types.js'
|
|
11
|
+
import type { FileReadError } from '../errors/index.js'
|
|
11
12
|
import { parseFile } from '../parser/parser.js'
|
|
13
|
+
import { filterDocumentSections } from '../parser/section-filter.js'
|
|
12
14
|
import { countTokensApprox } from '../utils/tokens.js'
|
|
13
15
|
import { formatSummary as formatSummaryImpl } from './formatters.js'
|
|
14
16
|
|
|
17
|
+
/**
|
|
18
|
+
* Error type from parseFile function
|
|
19
|
+
* Uses centralized errors from src/errors/index.ts
|
|
20
|
+
*/
|
|
21
|
+
type ParseFileError = ParseError | FileReadError
|
|
22
|
+
|
|
15
23
|
// ============================================================================
|
|
16
24
|
// Types
|
|
17
25
|
// ============================================================================
|
|
@@ -23,6 +31,8 @@ export interface SummarizeOptions {
|
|
|
23
31
|
readonly level?: CompressionLevel | undefined
|
|
24
32
|
/** Maximum tokens for output */
|
|
25
33
|
readonly maxTokens?: number | undefined
|
|
34
|
+
/** Section patterns to exclude from output */
|
|
35
|
+
readonly exclude?: readonly string[] | undefined
|
|
26
36
|
}
|
|
27
37
|
|
|
28
38
|
export interface SectionSummary {
|
|
@@ -56,6 +66,8 @@ export interface AssembleContextOptions {
|
|
|
56
66
|
readonly budget: number
|
|
57
67
|
/** Compression level for each source */
|
|
58
68
|
readonly level?: CompressionLevel | undefined
|
|
69
|
+
/** Section patterns to exclude from output */
|
|
70
|
+
readonly exclude?: readonly string[] | undefined
|
|
59
71
|
}
|
|
60
72
|
|
|
61
73
|
export interface AssembledContext {
|
|
@@ -375,14 +387,27 @@ export const summarizeDocument = (
|
|
|
375
387
|
return result
|
|
376
388
|
}
|
|
377
389
|
|
|
390
|
+
/**
|
|
391
|
+
* Summarize a markdown file
|
|
392
|
+
*
|
|
393
|
+
* @throws ParseError - File content cannot be parsed
|
|
394
|
+
* @throws FileReadError - File cannot be read from filesystem
|
|
395
|
+
*/
|
|
378
396
|
export const summarizeFile = (
|
|
379
397
|
filePath: string,
|
|
380
398
|
options: SummarizeOptions = {},
|
|
381
|
-
): Effect.Effect<DocumentSummary,
|
|
399
|
+
): Effect.Effect<DocumentSummary, ParseFileError> =>
|
|
382
400
|
Effect.gen(function* () {
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
401
|
+
let document = yield* parseFile(filePath)
|
|
402
|
+
|
|
403
|
+
// Apply exclusion filter if patterns provided
|
|
404
|
+
if (options.exclude && options.exclude.length > 0) {
|
|
405
|
+
const { document: filteredDoc } = filterDocumentSections(
|
|
406
|
+
document,
|
|
407
|
+
options.exclude,
|
|
408
|
+
)
|
|
409
|
+
document = filteredDoc
|
|
410
|
+
}
|
|
386
411
|
|
|
387
412
|
return summarizeDocument(document, options)
|
|
388
413
|
})
|
|
@@ -397,14 +422,21 @@ export { type FormatSummaryOptions, formatSummary } from './formatters.js'
|
|
|
397
422
|
// Multi-Document Context Assembly
|
|
398
423
|
// ============================================================================
|
|
399
424
|
|
|
425
|
+
/**
|
|
426
|
+
* Assemble context from multiple markdown files within a token budget
|
|
427
|
+
*
|
|
428
|
+
* @throws ParseError - File content cannot be parsed
|
|
429
|
+
* @throws FileReadError - File cannot be read from filesystem
|
|
430
|
+
*/
|
|
400
431
|
export const assembleContext = (
|
|
401
432
|
rootPath: string,
|
|
402
433
|
sourcePaths: readonly string[],
|
|
403
434
|
options: AssembleContextOptions,
|
|
404
|
-
): Effect.Effect<AssembledContext,
|
|
435
|
+
): Effect.Effect<AssembledContext, ParseFileError> =>
|
|
405
436
|
Effect.gen(function* () {
|
|
406
437
|
const budget = options.budget
|
|
407
438
|
const level = options.level ?? 'summary'
|
|
439
|
+
const excludePatterns = options.exclude ?? []
|
|
408
440
|
|
|
409
441
|
const sources: SourceContext[] = []
|
|
410
442
|
const overflow: string[] = []
|
|
@@ -418,33 +450,65 @@ export const assembleContext = (
|
|
|
418
450
|
? sourcePath
|
|
419
451
|
: path.join(rootPath, sourcePath)
|
|
420
452
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
453
|
+
// Use catchAll for graceful degradation - individual file failures
|
|
454
|
+
// shouldn't stop the entire context assembly operation
|
|
455
|
+
const summaryResult = yield* summarizeFile(resolvedPath, {
|
|
456
|
+
level,
|
|
457
|
+
maxTokens: perSourceBudget,
|
|
458
|
+
exclude: excludePatterns,
|
|
459
|
+
}).pipe(
|
|
460
|
+
Effect.map((s): DocumentSummary | null => s),
|
|
461
|
+
// Log error for observability before gracefully degrading
|
|
462
|
+
Effect.tapError((error) =>
|
|
463
|
+
Effect.logError(`Failed to summarize ${sourcePath}`, error),
|
|
464
|
+
),
|
|
465
|
+
// Note: catchAll intentional for batch processing - individual file
|
|
466
|
+
// failures add to overflow instead of stopping assembly
|
|
467
|
+
Effect.catchAll(() => Effect.succeed(null as DocumentSummary | null)),
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
if (!summaryResult) {
|
|
471
|
+
overflow.push(sourcePath)
|
|
472
|
+
continue
|
|
473
|
+
}
|
|
430
474
|
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
475
|
+
const summary = summaryResult
|
|
476
|
+
const content = formatSummaryImpl(summary)
|
|
477
|
+
// Count actual formatted output tokens, not pre-format summary tokens
|
|
478
|
+
const tokens = countTokensApprox(content)
|
|
479
|
+
|
|
480
|
+
if (totalTokens + tokens <= budget) {
|
|
481
|
+
sources.push({
|
|
482
|
+
path: path.relative(rootPath, resolvedPath),
|
|
483
|
+
title: summary.title,
|
|
484
|
+
tokens,
|
|
485
|
+
content,
|
|
486
|
+
})
|
|
487
|
+
totalTokens += tokens
|
|
488
|
+
} else {
|
|
489
|
+
// Over budget
|
|
490
|
+
const remaining = budget - totalTokens
|
|
491
|
+
if (remaining > MIN_PARTIAL_BUDGET) {
|
|
492
|
+
// Include partial if we have some room
|
|
493
|
+
const briefSummary = yield* summarizeFile(resolvedPath, {
|
|
494
|
+
level: 'brief',
|
|
495
|
+
maxTokens: remaining,
|
|
496
|
+
exclude: excludePatterns,
|
|
497
|
+
}).pipe(
|
|
498
|
+
Effect.map((s): DocumentSummary | null => s),
|
|
499
|
+
// Log error for observability before gracefully degrading
|
|
500
|
+
Effect.tapError((error) =>
|
|
501
|
+
Effect.logError(
|
|
502
|
+
`Failed to create brief summary for ${sourcePath}`,
|
|
503
|
+
error,
|
|
504
|
+
),
|
|
505
|
+
),
|
|
506
|
+
Effect.catchAll(() =>
|
|
507
|
+
Effect.succeed(null as DocumentSummary | null),
|
|
508
|
+
),
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
if (briefSummary) {
|
|
448
512
|
const briefContent = formatSummaryImpl(briefSummary)
|
|
449
513
|
// Count actual formatted output tokens, not pre-format summary tokens
|
|
450
514
|
const briefTokens = countTokensApprox(briefContent)
|
|
@@ -459,10 +523,9 @@ export const assembleContext = (
|
|
|
459
523
|
} else {
|
|
460
524
|
overflow.push(path.relative(rootPath, resolvedPath))
|
|
461
525
|
}
|
|
526
|
+
} else {
|
|
527
|
+
overflow.push(path.relative(rootPath, resolvedPath))
|
|
462
528
|
}
|
|
463
|
-
} catch (_e) {
|
|
464
|
-
// Skip files that can't be processed
|
|
465
|
-
overflow.push(sourcePath)
|
|
466
529
|
}
|
|
467
530
|
}
|
|
468
531
|
|
|
@@ -500,8 +563,14 @@ export const measureReduction = async (
|
|
|
500
563
|
const originalTokens = countTokensApprox(originalContent)
|
|
501
564
|
|
|
502
565
|
// Get summary
|
|
566
|
+
// Note: catchAll is intentional - measureReduction is a utility function
|
|
567
|
+
// where failures should return default values (no reduction) rather than throw
|
|
503
568
|
const result = await Effect.runPromise(
|
|
504
569
|
summarizeFile(filePath, { level }).pipe(
|
|
570
|
+
// Log error for observability before gracefully degrading
|
|
571
|
+
Effect.tapError((error) =>
|
|
572
|
+
Effect.logError(`Failed to measure reduction for ${filePath}`, error),
|
|
573
|
+
),
|
|
505
574
|
Effect.catchAll(() => Effect.succeed(null)),
|
|
506
575
|
),
|
|
507
576
|
)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type declarations for @huggingface/transformers (optional dependency)
|
|
3
|
+
*
|
|
4
|
+
* This package is an optional peer dependency used for cross-encoder re-ranking.
|
|
5
|
+
* Users who want re-ranking can install it with: npm install @huggingface/transformers
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
declare module '@huggingface/transformers' {
|
|
9
|
+
export interface ProgressCallbackData {
|
|
10
|
+
file?: string
|
|
11
|
+
progress?: number
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export type ProgressCallback = (data: ProgressCallbackData) => void
|
|
15
|
+
|
|
16
|
+
export interface AutoModelOptions {
|
|
17
|
+
progress_callback?: ProgressCallback | undefined
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface AutoTokenizerOptions {
|
|
21
|
+
progress_callback?: ProgressCallback | undefined
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface TokenizerOutput {
|
|
25
|
+
input_ids: unknown
|
|
26
|
+
attention_mask: unknown
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface ModelOutput {
|
|
30
|
+
logits: {
|
|
31
|
+
data: Float32Array
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export const env: {
|
|
36
|
+
cacheDir: string
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export type AutoTokenizerInstance = (
|
|
40
|
+
texts: string[],
|
|
41
|
+
options: {
|
|
42
|
+
text_pair?: string[]
|
|
43
|
+
padding?: boolean
|
|
44
|
+
truncation?: boolean
|
|
45
|
+
max_length?: number
|
|
46
|
+
},
|
|
47
|
+
) => TokenizerOutput
|
|
48
|
+
|
|
49
|
+
export type AutoModelInstance = (
|
|
50
|
+
input: TokenizerOutput,
|
|
51
|
+
) => Promise<ModelOutput>
|
|
52
|
+
|
|
53
|
+
export const AutoTokenizer: {
|
|
54
|
+
from_pretrained(
|
|
55
|
+
model: string,
|
|
56
|
+
options?: AutoTokenizerOptions,
|
|
57
|
+
): Promise<AutoTokenizerInstance>
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export const AutoModelForSequenceClassification: {
|
|
61
|
+
from_pretrained(
|
|
62
|
+
model: string,
|
|
63
|
+
options?: AutoModelOptions,
|
|
64
|
+
): Promise<AutoModelInstance>
|
|
65
|
+
}
|
|
66
|
+
}
|
package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin
ADDED
|
Binary file
|
package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin
ADDED
|
Binary file
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 1,
|
|
3
|
-
"rootPath": "
|
|
3
|
+
"rootPath": "tests/fixtures/cli",
|
|
4
4
|
"documents": {
|
|
5
5
|
"README.md": {
|
|
6
6
|
"id": "04c6e90faac2",
|
|
7
7
|
"path": "README.md",
|
|
8
8
|
"title": "Test Project",
|
|
9
|
-
"mtime":
|
|
9
|
+
"mtime": 1769492431399,
|
|
10
10
|
"hash": "54872b0fdbf6858a",
|
|
11
11
|
"tokenCount": 76,
|
|
12
12
|
"sectionCount": 2
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"id": "392e93c0f22d",
|
|
16
16
|
"path": "api-reference.md",
|
|
17
17
|
"title": "API Reference",
|
|
18
|
-
"mtime":
|
|
18
|
+
"mtime": 1769492431399,
|
|
19
19
|
"hash": "4a879da54a831235",
|
|
20
20
|
"tokenCount": 109,
|
|
21
21
|
"sectionCount": 2
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
"id": "b6885e1f8555",
|
|
25
25
|
"path": "getting-started.md",
|
|
26
26
|
"title": "Getting Started",
|
|
27
|
-
"mtime":
|
|
27
|
+
"mtime": 1769492431400,
|
|
28
28
|
"hash": "2d44a41d5d2579f2",
|
|
29
29
|
"tokenCount": 66,
|
|
30
30
|
"sectionCount": 3
|
|
@@ -110,6 +110,8 @@
|
|
|
110
110
|
"04c6e90faac2-test-project",
|
|
111
111
|
"04c6e90faac2-test-project",
|
|
112
112
|
"04c6e90faac2-test-project",
|
|
113
|
+
"04c6e90faac2-test-project",
|
|
114
|
+
"04c6e90faac2-test-project",
|
|
113
115
|
"04c6e90faac2-test-project"
|
|
114
116
|
],
|
|
115
117
|
"overview": [
|
|
@@ -128,6 +130,8 @@
|
|
|
128
130
|
"04c6e90faac2-overview",
|
|
129
131
|
"04c6e90faac2-overview",
|
|
130
132
|
"04c6e90faac2-overview",
|
|
133
|
+
"04c6e90faac2-overview",
|
|
134
|
+
"04c6e90faac2-overview",
|
|
131
135
|
"04c6e90faac2-overview"
|
|
132
136
|
],
|
|
133
137
|
"api reference": [
|
|
@@ -146,6 +150,8 @@
|
|
|
146
150
|
"392e93c0f22d-api-reference",
|
|
147
151
|
"392e93c0f22d-api-reference",
|
|
148
152
|
"392e93c0f22d-api-reference",
|
|
153
|
+
"392e93c0f22d-api-reference",
|
|
154
|
+
"392e93c0f22d-api-reference",
|
|
149
155
|
"392e93c0f22d-api-reference"
|
|
150
156
|
],
|
|
151
157
|
"endpoints": [
|
|
@@ -164,6 +170,8 @@
|
|
|
164
170
|
"392e93c0f22d-endpoints",
|
|
165
171
|
"392e93c0f22d-endpoints",
|
|
166
172
|
"392e93c0f22d-endpoints",
|
|
173
|
+
"392e93c0f22d-endpoints",
|
|
174
|
+
"392e93c0f22d-endpoints",
|
|
167
175
|
"392e93c0f22d-endpoints"
|
|
168
176
|
],
|
|
169
177
|
"getting started": [
|
|
@@ -182,6 +190,8 @@
|
|
|
182
190
|
"b6885e1f8555-getting-started",
|
|
183
191
|
"b6885e1f8555-getting-started",
|
|
184
192
|
"b6885e1f8555-getting-started",
|
|
193
|
+
"b6885e1f8555-getting-started",
|
|
194
|
+
"b6885e1f8555-getting-started",
|
|
185
195
|
"b6885e1f8555-getting-started"
|
|
186
196
|
],
|
|
187
197
|
"installation": [
|
|
@@ -200,6 +210,8 @@
|
|
|
200
210
|
"b6885e1f8555-installation",
|
|
201
211
|
"b6885e1f8555-installation",
|
|
202
212
|
"b6885e1f8555-installation",
|
|
213
|
+
"b6885e1f8555-installation",
|
|
214
|
+
"b6885e1f8555-installation",
|
|
203
215
|
"b6885e1f8555-installation"
|
|
204
216
|
],
|
|
205
217
|
"usage": [
|
|
@@ -218,6 +230,8 @@
|
|
|
218
230
|
"b6885e1f8555-usage",
|
|
219
231
|
"b6885e1f8555-usage",
|
|
220
232
|
"b6885e1f8555-usage",
|
|
233
|
+
"b6885e1f8555-usage",
|
|
234
|
+
"b6885e1f8555-usage",
|
|
221
235
|
"b6885e1f8555-usage"
|
|
222
236
|
]
|
|
223
237
|
},
|