mdcontext 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/config.json +9 -9
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +206 -3
- package/biome.json +1 -1
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +85 -89
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +718 -657
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1533 -1423
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.js +4072 -629
- package/dist/index.d.ts +420 -33
- package/dist/index.js +8 -15
- package/dist/mcp/server.js +103 -7
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +44 -5
- package/docs/020-current-implementation.md +8 -8
- package/docs/021-DOGFOODING-FINDINGS.md +1 -1
- package/docs/CONFIG.md +1123 -0
- package/docs/ERRORS.md +383 -0
- package/docs/summarization.md +320 -0
- package/justfile +40 -0
- package/package.json +39 -33
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +32 -37
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +2 -2
- package/src/cli/cli.test.ts +230 -33
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +97 -9
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +210 -30
- package/src/cli/commands/index.ts +3 -0
- package/src/cli/commands/search.ts +894 -64
- package/src/cli/commands/stats.ts +3 -0
- package/src/cli/commands/tree.ts +26 -5
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +66 -0
- package/src/cli/help.ts +209 -7
- package/src/cli/main.ts +348 -58
- package/src/cli/options.ts +10 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/utils.ts +150 -17
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/types.ts +6 -33
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +2 -0
- package/src/embeddings/openai-provider.ts +332 -83
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +780 -93
- package/src/embeddings/types.ts +293 -16
- package/src/embeddings/vector-store.ts +486 -77
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/indexer.ts +286 -48
- package/src/index/storage.ts +94 -30
- package/src/index/types.ts +40 -2
- package/src/index/watcher.ts +67 -9
- package/src/index.ts +22 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +135 -6
- package/src/parser/parser.ts +18 -19
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +125 -3
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/searcher.test.ts +99 -1
- package/src/search/searcher.ts +189 -67
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/summarizer.ts +104 -35
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/vitest.config.ts +1 -6
- package/AGENTS.md +0 -46
- package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voyage AI Embedding Provider
|
|
3
|
+
*
|
|
4
|
+
* Voyage AI offers high-quality embeddings with competitive pricing:
|
|
5
|
+
* - voyage-3.5-lite: Same price as OpenAI ($0.02/1M), better quality
|
|
6
|
+
* - voyage-3: Higher quality option ($0.06/1M)
|
|
7
|
+
* - 32K token context (4x OpenAI)
|
|
8
|
+
* - Top-tier retrieval performance in benchmarks
|
|
9
|
+
*
|
|
10
|
+
* API docs: https://docs.voyageai.com/reference/embeddings-api
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { Effect, Redacted } from 'effect'
|
|
14
|
+
import {
|
|
15
|
+
ApiKeyInvalidError,
|
|
16
|
+
ApiKeyMissingError,
|
|
17
|
+
EmbeddingError,
|
|
18
|
+
} from '../errors/index.js'
|
|
19
|
+
import pricingData from './pricing.json' with { type: 'json' }
|
|
20
|
+
import type {
|
|
21
|
+
EmbeddingProvider,
|
|
22
|
+
EmbeddingProviderWithMetadata,
|
|
23
|
+
EmbeddingResult,
|
|
24
|
+
EmbedOptions,
|
|
25
|
+
} from './types.js'
|
|
26
|
+
|
|
27
|
+
// ============================================================================
|
|
28
|
+
// Types
|
|
29
|
+
// ============================================================================
|
|
30
|
+
|
|
31
|
+
interface VoyageEmbeddingResponse {
|
|
32
|
+
object: 'list'
|
|
33
|
+
data: Array<{
|
|
34
|
+
object: 'embedding'
|
|
35
|
+
embedding: number[]
|
|
36
|
+
index: number
|
|
37
|
+
}>
|
|
38
|
+
model: string
|
|
39
|
+
usage: {
|
|
40
|
+
total_tokens: number
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ============================================================================
|
|
45
|
+
// Constants
|
|
46
|
+
// ============================================================================
|
|
47
|
+
|
|
48
|
+
const VOYAGE_API_BASE = 'https://api.voyageai.com/v1'
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Voyage AI model specifications.
|
|
52
|
+
* Pricing loaded from pricing.json for easy updates.
|
|
53
|
+
*/
|
|
54
|
+
const voyagePricing = pricingData.voyage as Record<string, number>
|
|
55
|
+
export const VOYAGE_MODELS: Record<
|
|
56
|
+
string,
|
|
57
|
+
{ dimensions: number; pricePerMillion: number }
|
|
58
|
+
> = {
|
|
59
|
+
'voyage-3.5-lite': {
|
|
60
|
+
dimensions: 1024,
|
|
61
|
+
pricePerMillion: voyagePricing['voyage-3.5-lite'] ?? 0.02,
|
|
62
|
+
},
|
|
63
|
+
'voyage-3': {
|
|
64
|
+
dimensions: 1024,
|
|
65
|
+
pricePerMillion: voyagePricing['voyage-3'] ?? 0.06,
|
|
66
|
+
},
|
|
67
|
+
'voyage-code-3': {
|
|
68
|
+
dimensions: 1024,
|
|
69
|
+
pricePerMillion: voyagePricing['voyage-code-3'] ?? 0.18,
|
|
70
|
+
},
|
|
71
|
+
// Legacy models
|
|
72
|
+
'voyage-2': {
|
|
73
|
+
dimensions: 1024,
|
|
74
|
+
pricePerMillion: voyagePricing['voyage-2'] ?? 0.1,
|
|
75
|
+
},
|
|
76
|
+
'voyage-large-2': {
|
|
77
|
+
dimensions: 1536,
|
|
78
|
+
pricePerMillion: voyagePricing['voyage-large-2'] ?? 0.12,
|
|
79
|
+
},
|
|
80
|
+
'voyage-code-2': {
|
|
81
|
+
dimensions: 1536,
|
|
82
|
+
pricePerMillion: voyagePricing['voyage-code-2'] ?? 0.12,
|
|
83
|
+
},
|
|
84
|
+
} as const
|
|
85
|
+
|
|
86
|
+
export const DEFAULT_VOYAGE_MODEL = 'voyage-3.5-lite'
|
|
87
|
+
|
|
88
|
+
// ============================================================================
|
|
89
|
+
// Provider Options
|
|
90
|
+
// ============================================================================
|
|
91
|
+
|
|
92
|
+
export interface VoyageProviderOptions {
|
|
93
|
+
/**
|
|
94
|
+
* API key. Can be a plain string or Redacted<string>.
|
|
95
|
+
* Falls back to VOYAGE_API_KEY env var if not provided.
|
|
96
|
+
*/
|
|
97
|
+
readonly apiKey?: string | Redacted.Redacted<string> | undefined
|
|
98
|
+
/** Model to use. Default: voyage-3.5-lite */
|
|
99
|
+
readonly model?: string | undefined
|
|
100
|
+
/** Batch size for embedding requests. Default: 128 (Voyage supports up to 128) */
|
|
101
|
+
readonly batchSize?: number | undefined
|
|
102
|
+
/**
|
|
103
|
+
* Request timeout in milliseconds.
|
|
104
|
+
* Default: 30000 (30 seconds)
|
|
105
|
+
*/
|
|
106
|
+
readonly timeout?: number | undefined
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// ============================================================================
|
|
110
|
+
// Voyage Provider Implementation
|
|
111
|
+
// ============================================================================
|
|
112
|
+
|
|
113
|
+
export class VoyageProvider implements EmbeddingProviderWithMetadata {
|
|
114
|
+
readonly name: string
|
|
115
|
+
readonly dimensions: number
|
|
116
|
+
readonly model: string
|
|
117
|
+
readonly baseURL: string = VOYAGE_API_BASE
|
|
118
|
+
readonly providerName = 'voyage'
|
|
119
|
+
|
|
120
|
+
private readonly apiKey: Redacted.Redacted<string>
|
|
121
|
+
private readonly batchSize: number
|
|
122
|
+
private readonly timeout: number
|
|
123
|
+
|
|
124
|
+
private constructor(
|
|
125
|
+
apiKey: Redacted.Redacted<string>,
|
|
126
|
+
options: VoyageProviderOptions = {},
|
|
127
|
+
) {
|
|
128
|
+
this.apiKey = apiKey
|
|
129
|
+
this.model = options.model ?? DEFAULT_VOYAGE_MODEL
|
|
130
|
+
this.batchSize = options.batchSize ?? 128
|
|
131
|
+
this.timeout = options.timeout ?? 30000
|
|
132
|
+
|
|
133
|
+
// Get dimensions for model
|
|
134
|
+
const modelSpec = VOYAGE_MODELS[this.model]
|
|
135
|
+
this.dimensions = modelSpec?.dimensions ?? 1024
|
|
136
|
+
|
|
137
|
+
this.name = `voyage:${this.model}`
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Create a Voyage provider instance.
|
|
142
|
+
* Returns an Effect that fails with ApiKeyMissingError if no API key is available.
|
|
143
|
+
*
|
|
144
|
+
* API keys are handled securely using Effect's Redacted type to prevent
|
|
145
|
+
* accidental logging of sensitive values.
|
|
146
|
+
*/
|
|
147
|
+
static create(
|
|
148
|
+
options: VoyageProviderOptions = {},
|
|
149
|
+
): Effect.Effect<VoyageProvider, ApiKeyMissingError> {
|
|
150
|
+
const rawApiKey = options.apiKey ?? process.env.VOYAGE_API_KEY
|
|
151
|
+
|
|
152
|
+
if (!rawApiKey) {
|
|
153
|
+
return Effect.fail(
|
|
154
|
+
new ApiKeyMissingError({
|
|
155
|
+
provider: 'Voyage AI',
|
|
156
|
+
envVar: 'VOYAGE_API_KEY',
|
|
157
|
+
}),
|
|
158
|
+
)
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Wrap in Redacted if it's a plain string
|
|
162
|
+
const redactedApiKey = Redacted.isRedacted(rawApiKey)
|
|
163
|
+
? rawApiKey
|
|
164
|
+
: Redacted.make(rawApiKey)
|
|
165
|
+
|
|
166
|
+
return Effect.succeed(new VoyageProvider(redactedApiKey, options))
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
async embed(
|
|
170
|
+
texts: string[],
|
|
171
|
+
options?: EmbedOptions,
|
|
172
|
+
): Promise<EmbeddingResult> {
|
|
173
|
+
if (texts.length === 0) {
|
|
174
|
+
return { embeddings: [], tokensUsed: 0, cost: 0 }
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const allEmbeddings: number[][] = []
|
|
178
|
+
let totalTokens = 0
|
|
179
|
+
const totalBatches = Math.ceil(texts.length / this.batchSize)
|
|
180
|
+
|
|
181
|
+
try {
|
|
182
|
+
// Process in batches
|
|
183
|
+
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
184
|
+
const batch = texts.slice(i, i + this.batchSize)
|
|
185
|
+
const batchIndex = Math.floor(i / this.batchSize)
|
|
186
|
+
|
|
187
|
+
// Use AbortController for timeout
|
|
188
|
+
const controller = new AbortController()
|
|
189
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout)
|
|
190
|
+
|
|
191
|
+
let response: Response
|
|
192
|
+
try {
|
|
193
|
+
response = await fetch(`${VOYAGE_API_BASE}/embeddings`, {
|
|
194
|
+
method: 'POST',
|
|
195
|
+
headers: {
|
|
196
|
+
Authorization: `Bearer ${Redacted.value(this.apiKey)}`,
|
|
197
|
+
'Content-Type': 'application/json',
|
|
198
|
+
},
|
|
199
|
+
body: JSON.stringify({
|
|
200
|
+
model: this.model,
|
|
201
|
+
input: batch,
|
|
202
|
+
input_type: 'document', // 'document' for indexing, 'query' for searching
|
|
203
|
+
}),
|
|
204
|
+
signal: controller.signal,
|
|
205
|
+
})
|
|
206
|
+
} finally {
|
|
207
|
+
clearTimeout(timeoutId)
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (!response.ok) {
|
|
211
|
+
const errorText = await response.text()
|
|
212
|
+
if (response.status === 401) {
|
|
213
|
+
throw new ApiKeyInvalidError({
|
|
214
|
+
provider: 'Voyage AI',
|
|
215
|
+
details: errorText,
|
|
216
|
+
})
|
|
217
|
+
}
|
|
218
|
+
throw new EmbeddingError({
|
|
219
|
+
reason: this.classifyHttpError(response.status, errorText),
|
|
220
|
+
message: `Voyage API error: ${response.status} - ${errorText}`,
|
|
221
|
+
provider: 'voyage',
|
|
222
|
+
})
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const data = (await response.json()) as VoyageEmbeddingResponse
|
|
226
|
+
|
|
227
|
+
for (const item of data.data) {
|
|
228
|
+
allEmbeddings.push(item.embedding)
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
totalTokens += data.usage?.total_tokens ?? 0
|
|
232
|
+
|
|
233
|
+
// Report batch progress
|
|
234
|
+
if (options?.onBatchProgress) {
|
|
235
|
+
options.onBatchProgress({
|
|
236
|
+
batchIndex: batchIndex + 1,
|
|
237
|
+
totalBatches,
|
|
238
|
+
processedTexts: Math.min(i + this.batchSize, texts.length),
|
|
239
|
+
totalTexts: texts.length,
|
|
240
|
+
})
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
} catch (error) {
|
|
244
|
+
if (
|
|
245
|
+
error instanceof ApiKeyInvalidError ||
|
|
246
|
+
error instanceof EmbeddingError
|
|
247
|
+
) {
|
|
248
|
+
throw error
|
|
249
|
+
}
|
|
250
|
+
throw new EmbeddingError({
|
|
251
|
+
reason: this.classifyError(error),
|
|
252
|
+
message: error instanceof Error ? error.message : String(error),
|
|
253
|
+
provider: 'voyage',
|
|
254
|
+
cause: error,
|
|
255
|
+
})
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Calculate cost
|
|
259
|
+
const pricePerMillion = VOYAGE_MODELS[this.model]?.pricePerMillion ?? 0.02
|
|
260
|
+
const cost = (totalTokens / 1_000_000) * pricePerMillion
|
|
261
|
+
|
|
262
|
+
return {
|
|
263
|
+
embeddings: allEmbeddings,
|
|
264
|
+
tokensUsed: totalTokens,
|
|
265
|
+
cost,
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
private classifyHttpError(
|
|
270
|
+
status: number,
|
|
271
|
+
_message: string,
|
|
272
|
+
): 'RateLimit' | 'QuotaExceeded' | 'Network' | 'ModelError' | 'Unknown' {
|
|
273
|
+
if (status === 429) return 'RateLimit'
|
|
274
|
+
if (status === 402) return 'QuotaExceeded'
|
|
275
|
+
if (status === 400) return 'ModelError'
|
|
276
|
+
return 'Unknown'
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
private classifyError(
|
|
280
|
+
error: unknown,
|
|
281
|
+
): 'RateLimit' | 'QuotaExceeded' | 'Network' | 'ModelError' | 'Unknown' {
|
|
282
|
+
if (!(error instanceof Error)) return 'Unknown'
|
|
283
|
+
const msg = error.message.toLowerCase()
|
|
284
|
+
|
|
285
|
+
// Check for abort errors (timeout)
|
|
286
|
+
if (error.name === 'AbortError' || msg.includes('aborted')) return 'Network'
|
|
287
|
+
|
|
288
|
+
if (msg.includes('rate limit') || msg.includes('429')) return 'RateLimit'
|
|
289
|
+
if (msg.includes('quota') || msg.includes('billing')) return 'QuotaExceeded'
|
|
290
|
+
if (
|
|
291
|
+
msg.includes('econnrefused') ||
|
|
292
|
+
msg.includes('timeout') ||
|
|
293
|
+
msg.includes('network')
|
|
294
|
+
)
|
|
295
|
+
return 'Network'
|
|
296
|
+
if (msg.includes('model') && msg.includes('not found')) return 'ModelError'
|
|
297
|
+
|
|
298
|
+
return 'Unknown'
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// ============================================================================
|
|
303
|
+
// Factory Functions
|
|
304
|
+
// ============================================================================
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Create a Voyage AI embedding provider.
|
|
308
|
+
* Returns an Effect that fails with ApiKeyMissingError if no API key is available.
|
|
309
|
+
*/
|
|
310
|
+
export const createVoyageProvider = (
|
|
311
|
+
options?: VoyageProviderOptions,
|
|
312
|
+
): Effect.Effect<EmbeddingProvider, ApiKeyMissingError> =>
|
|
313
|
+
VoyageProvider.create(options)
|