@lov3kaizen/agentsea-embeddings 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +475 -0
- package/dist/caching/index.d.mts +286 -0
- package/dist/caching/index.d.ts +286 -0
- package/dist/caching/index.js +1005 -0
- package/dist/caching/index.mjs +27 -0
- package/dist/chunk-3KM32UQK.mjs +207 -0
- package/dist/chunk-DJAURHAS.mjs +1117 -0
- package/dist/chunk-NBHIRTJT.mjs +895 -0
- package/dist/chunk-QAITLJ2E.mjs +259 -0
- package/dist/chunk-TER262ST.mjs +877 -0
- package/dist/chunk-VPSMDBHH.mjs +957 -0
- package/dist/chunking/index.d.mts +1 -0
- package/dist/chunking/index.d.ts +1 -0
- package/dist/chunking/index.js +1408 -0
- package/dist/chunking/index.mjs +37 -0
- package/dist/embedding.types-CCgPVxt1.d.mts +102 -0
- package/dist/embedding.types-CCgPVxt1.d.ts +102 -0
- package/dist/index-CeG6God2.d.mts +297 -0
- package/dist/index-DMaQRn2w.d.mts +172 -0
- package/dist/index-DMaQRn2w.d.ts +172 -0
- package/dist/index-DWddsKRi.d.ts +297 -0
- package/dist/index.d.mts +647 -0
- package/dist/index.d.ts +647 -0
- package/dist/index.js +5259 -0
- package/dist/index.mjs +1028 -0
- package/dist/providers/index.d.mts +2 -0
- package/dist/providers/index.d.ts +2 -0
- package/dist/providers/index.js +1235 -0
- package/dist/providers/index.mjs +32 -0
- package/dist/stores/index.d.mts +298 -0
- package/dist/stores/index.d.ts +298 -0
- package/dist/stores/index.js +1178 -0
- package/dist/stores/index.mjs +26 -0
- package/package.json +102 -0
|
@@ -0,0 +1,895 @@
|
|
|
1
|
+
import {
|
|
2
|
+
batch,
|
|
3
|
+
measureTime,
|
|
4
|
+
retry,
|
|
5
|
+
withConcurrency
|
|
6
|
+
} from "./chunk-3KM32UQK.mjs";
|
|
7
|
+
import {
|
|
8
|
+
EmbeddingModel
|
|
9
|
+
} from "./chunk-QAITLJ2E.mjs";
|
|
10
|
+
|
|
11
|
+
// src/providers/BaseProvider.ts
|
|
12
|
+
var BaseProvider = class extends EmbeddingModel {
|
|
13
|
+
config;
|
|
14
|
+
metrics;
|
|
15
|
+
health;
|
|
16
|
+
latencies = [];
|
|
17
|
+
maxLatencySamples = 1e3;
|
|
18
|
+
constructor(config) {
|
|
19
|
+
super();
|
|
20
|
+
this.config = {
|
|
21
|
+
timeout: 3e4,
|
|
22
|
+
maxRetries: 3,
|
|
23
|
+
retryDelay: 1e3,
|
|
24
|
+
...config
|
|
25
|
+
};
|
|
26
|
+
this.metrics = this.createInitialMetrics();
|
|
27
|
+
this.health = {
|
|
28
|
+
healthy: true,
|
|
29
|
+
latencyMs: 0,
|
|
30
|
+
lastCheck: Date.now()
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
createInitialMetrics() {
|
|
34
|
+
return {
|
|
35
|
+
provider: this.config.type,
|
|
36
|
+
totalRequests: 0,
|
|
37
|
+
successfulRequests: 0,
|
|
38
|
+
failedRequests: 0,
|
|
39
|
+
totalTokens: 0,
|
|
40
|
+
avgLatencyMs: 0,
|
|
41
|
+
p50LatencyMs: 0,
|
|
42
|
+
p95LatencyMs: 0,
|
|
43
|
+
p99LatencyMs: 0,
|
|
44
|
+
errorRate: 0,
|
|
45
|
+
rateLimitHits: 0,
|
|
46
|
+
estimatedCostUSD: 0
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Generate embedding for a single text
|
|
51
|
+
*/
|
|
52
|
+
async embed(text, options) {
|
|
53
|
+
const result = await this.embedBatch([text], options);
|
|
54
|
+
return result.results[0];
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Generate embeddings for multiple texts
|
|
58
|
+
*/
|
|
59
|
+
async embedBatch(texts, options) {
|
|
60
|
+
const startTime = performance.now();
|
|
61
|
+
const maxBatchSize = this.info.maxBatchSize;
|
|
62
|
+
const concurrency = options?.concurrency ?? 5;
|
|
63
|
+
const results = [];
|
|
64
|
+
let totalTokens = 0;
|
|
65
|
+
let failures = 0;
|
|
66
|
+
const batches = batch(texts, maxBatchSize);
|
|
67
|
+
const processBatch = async (batchTexts) => {
|
|
68
|
+
this.metrics.totalRequests++;
|
|
69
|
+
try {
|
|
70
|
+
const { result, durationMs } = await measureTime(
|
|
71
|
+
() => retry(() => this.doEmbed(batchTexts, options), {
|
|
72
|
+
maxRetries: this.config.maxRetries,
|
|
73
|
+
initialDelay: this.config.retryDelay,
|
|
74
|
+
retryCondition: (error) => this.isRetryable(error)
|
|
75
|
+
})
|
|
76
|
+
);
|
|
77
|
+
this.recordLatency(durationMs);
|
|
78
|
+
this.metrics.successfulRequests++;
|
|
79
|
+
this.metrics.totalTokens += result.tokenCount;
|
|
80
|
+
totalTokens += result.tokenCount;
|
|
81
|
+
return batchTexts.map((text, i) => ({
|
|
82
|
+
vector: result.vectors[i],
|
|
83
|
+
text,
|
|
84
|
+
tokenCount: Math.ceil(result.tokenCount / batchTexts.length),
|
|
85
|
+
cached: false,
|
|
86
|
+
model: this.info.name,
|
|
87
|
+
dimensions: this.info.dimensions,
|
|
88
|
+
latencyMs: durationMs / batchTexts.length
|
|
89
|
+
}));
|
|
90
|
+
} catch (error) {
|
|
91
|
+
this.metrics.failedRequests++;
|
|
92
|
+
this.health.healthy = false;
|
|
93
|
+
this.health.error = error.message;
|
|
94
|
+
if (options?.continueOnError) {
|
|
95
|
+
failures += batchTexts.length;
|
|
96
|
+
return [];
|
|
97
|
+
}
|
|
98
|
+
throw error;
|
|
99
|
+
}
|
|
100
|
+
};
|
|
101
|
+
const batchResults = await withConcurrency(
|
|
102
|
+
batches,
|
|
103
|
+
processBatch,
|
|
104
|
+
concurrency
|
|
105
|
+
);
|
|
106
|
+
for (const batchResult of batchResults) {
|
|
107
|
+
results.push(...batchResult);
|
|
108
|
+
}
|
|
109
|
+
const totalLatencyMs = performance.now() - startTime;
|
|
110
|
+
this.updateMetrics();
|
|
111
|
+
return {
|
|
112
|
+
results,
|
|
113
|
+
totalTokens,
|
|
114
|
+
totalLatencyMs,
|
|
115
|
+
cacheHits: 0,
|
|
116
|
+
cacheMisses: texts.length,
|
|
117
|
+
failures
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Check if error is retryable
|
|
122
|
+
*/
|
|
123
|
+
isRetryable(error) {
|
|
124
|
+
const message = error.message.toLowerCase();
|
|
125
|
+
return message.includes("rate limit") || message.includes("timeout") || message.includes("network") || message.includes("econnreset") || message.includes("502") || message.includes("503") || message.includes("504");
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Record latency sample
|
|
129
|
+
*/
|
|
130
|
+
recordLatency(latencyMs) {
|
|
131
|
+
this.latencies.push(latencyMs);
|
|
132
|
+
if (this.latencies.length > this.maxLatencySamples) {
|
|
133
|
+
this.latencies.shift();
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Calculate percentile from latencies
|
|
138
|
+
*/
|
|
139
|
+
calculatePercentile(p) {
|
|
140
|
+
if (this.latencies.length === 0) return 0;
|
|
141
|
+
const sorted = [...this.latencies].sort((a, b) => a - b);
|
|
142
|
+
const index = Math.ceil(p / 100 * sorted.length) - 1;
|
|
143
|
+
return sorted[Math.max(0, index)];
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Update metrics
|
|
147
|
+
*/
|
|
148
|
+
updateMetrics() {
|
|
149
|
+
const total = this.metrics.totalRequests;
|
|
150
|
+
if (total > 0) {
|
|
151
|
+
this.metrics.errorRate = this.metrics.failedRequests / total;
|
|
152
|
+
this.metrics.avgLatencyMs = this.latencies.reduce((a, b) => a + b, 0) / this.latencies.length || 0;
|
|
153
|
+
this.metrics.p50LatencyMs = this.calculatePercentile(50);
|
|
154
|
+
this.metrics.p95LatencyMs = this.calculatePercentile(95);
|
|
155
|
+
this.metrics.p99LatencyMs = this.calculatePercentile(99);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Get provider metrics
|
|
160
|
+
*/
|
|
161
|
+
getMetrics() {
|
|
162
|
+
return { ...this.metrics };
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Get provider health
|
|
166
|
+
*/
|
|
167
|
+
getHealth() {
|
|
168
|
+
return { ...this.health };
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Check provider health
|
|
172
|
+
*/
|
|
173
|
+
async checkHealth() {
|
|
174
|
+
try {
|
|
175
|
+
const { durationMs } = await measureTime(
|
|
176
|
+
() => this.doEmbed(["health check"])
|
|
177
|
+
);
|
|
178
|
+
this.health = {
|
|
179
|
+
healthy: true,
|
|
180
|
+
latencyMs: durationMs,
|
|
181
|
+
lastCheck: Date.now()
|
|
182
|
+
};
|
|
183
|
+
} catch (error) {
|
|
184
|
+
this.health = {
|
|
185
|
+
healthy: false,
|
|
186
|
+
latencyMs: 0,
|
|
187
|
+
lastCheck: Date.now(),
|
|
188
|
+
error: error.message
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
return this.health;
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Reset metrics
|
|
195
|
+
*/
|
|
196
|
+
resetMetrics() {
|
|
197
|
+
this.metrics = this.createInitialMetrics();
|
|
198
|
+
this.latencies = [];
|
|
199
|
+
}
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
// src/providers/OpenAIProvider.ts
|
|
203
|
+
var OPENAI_MODELS = {
|
|
204
|
+
"text-embedding-3-small": {
|
|
205
|
+
name: "text-embedding-3-small",
|
|
206
|
+
provider: "openai",
|
|
207
|
+
dimensions: 1536,
|
|
208
|
+
maxTokens: 8191,
|
|
209
|
+
maxBatchSize: 2048,
|
|
210
|
+
costPer1K: 2e-5,
|
|
211
|
+
description: "Smaller, faster, cheaper embedding model"
|
|
212
|
+
},
|
|
213
|
+
"text-embedding-3-large": {
|
|
214
|
+
name: "text-embedding-3-large",
|
|
215
|
+
provider: "openai",
|
|
216
|
+
dimensions: 3072,
|
|
217
|
+
maxTokens: 8191,
|
|
218
|
+
maxBatchSize: 2048,
|
|
219
|
+
costPer1K: 13e-5,
|
|
220
|
+
description: "Larger, more powerful embedding model"
|
|
221
|
+
},
|
|
222
|
+
"text-embedding-ada-002": {
|
|
223
|
+
name: "text-embedding-ada-002",
|
|
224
|
+
provider: "openai",
|
|
225
|
+
dimensions: 1536,
|
|
226
|
+
maxTokens: 8191,
|
|
227
|
+
maxBatchSize: 2048,
|
|
228
|
+
costPer1K: 1e-4,
|
|
229
|
+
description: "Legacy embedding model"
|
|
230
|
+
}
|
|
231
|
+
};
|
|
232
|
+
var OpenAIProvider = class extends BaseProvider {
|
|
233
|
+
modelInfo;
|
|
234
|
+
apiKey;
|
|
235
|
+
baseUrl;
|
|
236
|
+
organization;
|
|
237
|
+
constructor(config) {
|
|
238
|
+
super({ ...config, type: "openai" });
|
|
239
|
+
if (!config.apiKey) {
|
|
240
|
+
throw new Error("OpenAI API key is required");
|
|
241
|
+
}
|
|
242
|
+
this.apiKey = config.apiKey;
|
|
243
|
+
this.baseUrl = config.baseUrl ?? "https://api.openai.com/v1";
|
|
244
|
+
this.organization = config.organization;
|
|
245
|
+
const modelName = config.model ?? "text-embedding-3-small";
|
|
246
|
+
const modelConfig = OPENAI_MODELS[modelName];
|
|
247
|
+
if (!modelConfig) {
|
|
248
|
+
this.modelInfo = {
|
|
249
|
+
name: modelName,
|
|
250
|
+
provider: "openai",
|
|
251
|
+
dimensions: config.dimensions ?? 1536,
|
|
252
|
+
maxTokens: 8191,
|
|
253
|
+
maxBatchSize: 2048,
|
|
254
|
+
costPer1K: 1e-4
|
|
255
|
+
};
|
|
256
|
+
} else {
|
|
257
|
+
this.modelInfo = {
|
|
258
|
+
...modelConfig,
|
|
259
|
+
// Allow dimension override for text-embedding-3 models
|
|
260
|
+
dimensions: config.dimensions ?? modelConfig.dimensions
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
get info() {
|
|
265
|
+
return this.modelInfo;
|
|
266
|
+
}
|
|
267
|
+
async doEmbed(texts, options) {
|
|
268
|
+
const headers = {
|
|
269
|
+
"Content-Type": "application/json",
|
|
270
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
271
|
+
};
|
|
272
|
+
if (this.organization) {
|
|
273
|
+
headers["OpenAI-Organization"] = this.organization;
|
|
274
|
+
}
|
|
275
|
+
const body = {
|
|
276
|
+
model: options?.model ?? this.modelInfo.name,
|
|
277
|
+
input: texts
|
|
278
|
+
};
|
|
279
|
+
if (this.modelInfo.name.startsWith("text-embedding-3")) {
|
|
280
|
+
const config2 = this.config;
|
|
281
|
+
if (config2.dimensions) {
|
|
282
|
+
body.dimensions = config2.dimensions;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
const config = this.config;
|
|
286
|
+
if (config.encodingFormat) {
|
|
287
|
+
body.encoding_format = config.encodingFormat;
|
|
288
|
+
}
|
|
289
|
+
if (options?.user) {
|
|
290
|
+
body.user = options.user;
|
|
291
|
+
}
|
|
292
|
+
const response = await fetch(`${this.baseUrl}/embeddings`, {
|
|
293
|
+
method: "POST",
|
|
294
|
+
headers,
|
|
295
|
+
body: JSON.stringify(body),
|
|
296
|
+
signal: this.config.timeout ? AbortSignal.timeout(this.config.timeout) : void 0
|
|
297
|
+
});
|
|
298
|
+
if (!response.ok) {
|
|
299
|
+
const error = await response.json().catch(() => ({ error: { message: response.statusText } }));
|
|
300
|
+
const errorMessage = error.error?.message ?? response.statusText;
|
|
301
|
+
if (response.status === 429) {
|
|
302
|
+
this.metrics.rateLimitHits++;
|
|
303
|
+
}
|
|
304
|
+
throw new Error(`OpenAI API error: ${errorMessage} (${response.status})`);
|
|
305
|
+
}
|
|
306
|
+
const data = await response.json();
|
|
307
|
+
const embeddings = data.data.sort((a, b) => a.index - b.index);
|
|
308
|
+
const vectors = embeddings.map((e) => e.embedding);
|
|
309
|
+
const tokenCount = data.usage?.total_tokens ?? 0;
|
|
310
|
+
this.metrics.estimatedCostUSD += tokenCount / 1e3 * (this.modelInfo.costPer1K ?? 0);
|
|
311
|
+
return { vectors, tokenCount };
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Count tokens using tiktoken approximation
|
|
315
|
+
*/
|
|
316
|
+
countTokens(text) {
|
|
317
|
+
const words = text.split(/\s+/);
|
|
318
|
+
let tokens = 0;
|
|
319
|
+
for (const word of words) {
|
|
320
|
+
tokens += Math.ceil(word.length / 4) + 1;
|
|
321
|
+
}
|
|
322
|
+
return Math.max(1, tokens);
|
|
323
|
+
}
|
|
324
|
+
};
|
|
325
|
+
function createOpenAIProvider(config) {
|
|
326
|
+
return new OpenAIProvider(config);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// src/providers/CohereProvider.ts
|
|
330
|
+
var COHERE_MODELS = {
|
|
331
|
+
"embed-english-v3.0": {
|
|
332
|
+
name: "embed-english-v3.0",
|
|
333
|
+
provider: "cohere",
|
|
334
|
+
dimensions: 1024,
|
|
335
|
+
maxTokens: 512,
|
|
336
|
+
maxBatchSize: 96,
|
|
337
|
+
costPer1K: 1e-4,
|
|
338
|
+
description: "English embedding model v3"
|
|
339
|
+
},
|
|
340
|
+
"embed-multilingual-v3.0": {
|
|
341
|
+
name: "embed-multilingual-v3.0",
|
|
342
|
+
provider: "cohere",
|
|
343
|
+
dimensions: 1024,
|
|
344
|
+
maxTokens: 512,
|
|
345
|
+
maxBatchSize: 96,
|
|
346
|
+
costPer1K: 1e-4,
|
|
347
|
+
description: "Multilingual embedding model v3"
|
|
348
|
+
},
|
|
349
|
+
"embed-english-light-v3.0": {
|
|
350
|
+
name: "embed-english-light-v3.0",
|
|
351
|
+
provider: "cohere",
|
|
352
|
+
dimensions: 384,
|
|
353
|
+
maxTokens: 512,
|
|
354
|
+
maxBatchSize: 96,
|
|
355
|
+
costPer1K: 1e-4,
|
|
356
|
+
description: "Lightweight English embedding model v3"
|
|
357
|
+
},
|
|
358
|
+
"embed-multilingual-light-v3.0": {
|
|
359
|
+
name: "embed-multilingual-light-v3.0",
|
|
360
|
+
provider: "cohere",
|
|
361
|
+
dimensions: 384,
|
|
362
|
+
maxTokens: 512,
|
|
363
|
+
maxBatchSize: 96,
|
|
364
|
+
costPer1K: 1e-4,
|
|
365
|
+
description: "Lightweight multilingual embedding model v3"
|
|
366
|
+
},
|
|
367
|
+
"embed-english-v2.0": {
|
|
368
|
+
name: "embed-english-v2.0",
|
|
369
|
+
provider: "cohere",
|
|
370
|
+
dimensions: 4096,
|
|
371
|
+
maxTokens: 512,
|
|
372
|
+
maxBatchSize: 96,
|
|
373
|
+
costPer1K: 1e-4,
|
|
374
|
+
description: "Legacy English embedding model v2"
|
|
375
|
+
}
|
|
376
|
+
};
|
|
377
|
+
var CohereProvider = class extends BaseProvider {
|
|
378
|
+
modelInfo;
|
|
379
|
+
apiKey;
|
|
380
|
+
baseUrl;
|
|
381
|
+
inputType;
|
|
382
|
+
truncate;
|
|
383
|
+
constructor(config) {
|
|
384
|
+
super({ ...config, type: "cohere" });
|
|
385
|
+
if (!config.apiKey) {
|
|
386
|
+
throw new Error("Cohere API key is required");
|
|
387
|
+
}
|
|
388
|
+
this.apiKey = config.apiKey;
|
|
389
|
+
this.baseUrl = config.baseUrl ?? "https://api.cohere.ai/v1";
|
|
390
|
+
this.inputType = config.inputType ?? "search_document";
|
|
391
|
+
this.truncate = config.truncate ?? "END";
|
|
392
|
+
const modelName = config.model ?? "embed-english-v3.0";
|
|
393
|
+
const modelConfig = COHERE_MODELS[modelName];
|
|
394
|
+
if (!modelConfig) {
|
|
395
|
+
this.modelInfo = {
|
|
396
|
+
name: modelName,
|
|
397
|
+
provider: "cohere",
|
|
398
|
+
dimensions: 1024,
|
|
399
|
+
maxTokens: 512,
|
|
400
|
+
maxBatchSize: 96,
|
|
401
|
+
costPer1K: 1e-4
|
|
402
|
+
};
|
|
403
|
+
} else {
|
|
404
|
+
this.modelInfo = { ...modelConfig };
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
get info() {
|
|
408
|
+
return this.modelInfo;
|
|
409
|
+
}
|
|
410
|
+
async doEmbed(texts, options) {
|
|
411
|
+
const headers = {
|
|
412
|
+
"Content-Type": "application/json",
|
|
413
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
414
|
+
"Request-Source": "agentsea-embeddings"
|
|
415
|
+
};
|
|
416
|
+
const body = {
|
|
417
|
+
model: options?.model ?? this.modelInfo.name,
|
|
418
|
+
texts,
|
|
419
|
+
input_type: this.inputType,
|
|
420
|
+
truncate: this.truncate
|
|
421
|
+
};
|
|
422
|
+
const response = await fetch(`${this.baseUrl}/embed`, {
|
|
423
|
+
method: "POST",
|
|
424
|
+
headers,
|
|
425
|
+
body: JSON.stringify(body),
|
|
426
|
+
signal: this.config.timeout ? AbortSignal.timeout(this.config.timeout) : void 0
|
|
427
|
+
});
|
|
428
|
+
if (!response.ok) {
|
|
429
|
+
const error = await response.json().catch(() => ({ message: response.statusText }));
|
|
430
|
+
const errorMessage = error.message ?? response.statusText;
|
|
431
|
+
if (response.status === 429) {
|
|
432
|
+
this.metrics.rateLimitHits++;
|
|
433
|
+
}
|
|
434
|
+
throw new Error(`Cohere API error: ${errorMessage} (${response.status})`);
|
|
435
|
+
}
|
|
436
|
+
const data = await response.json();
|
|
437
|
+
const vectors = data.embeddings;
|
|
438
|
+
const tokenCount = texts.reduce(
|
|
439
|
+
(sum, text) => sum + this.countTokens(text),
|
|
440
|
+
0
|
|
441
|
+
);
|
|
442
|
+
this.metrics.estimatedCostUSD += tokenCount / 1e3 * (this.modelInfo.costPer1K ?? 0);
|
|
443
|
+
return { vectors, tokenCount };
|
|
444
|
+
}
|
|
445
|
+
/**
|
|
446
|
+
* Set input type for embeddings
|
|
447
|
+
*/
|
|
448
|
+
setInputType(inputType) {
|
|
449
|
+
this.inputType = inputType;
|
|
450
|
+
return this;
|
|
451
|
+
}
|
|
452
|
+
/**
|
|
453
|
+
* Count tokens (approximation)
|
|
454
|
+
*/
|
|
455
|
+
countTokens(text) {
|
|
456
|
+
return Math.ceil(text.length / 4);
|
|
457
|
+
}
|
|
458
|
+
};
|
|
459
|
+
function createCohereProvider(config) {
|
|
460
|
+
return new CohereProvider(config);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// src/providers/VoyageProvider.ts
|
|
464
|
+
var VOYAGE_MODELS = {
|
|
465
|
+
"voyage-3": {
|
|
466
|
+
name: "voyage-3",
|
|
467
|
+
provider: "voyage",
|
|
468
|
+
dimensions: 1024,
|
|
469
|
+
maxTokens: 32e3,
|
|
470
|
+
maxBatchSize: 128,
|
|
471
|
+
costPer1K: 6e-5,
|
|
472
|
+
description: "Latest general-purpose embedding model"
|
|
473
|
+
},
|
|
474
|
+
"voyage-3-lite": {
|
|
475
|
+
name: "voyage-3-lite",
|
|
476
|
+
provider: "voyage",
|
|
477
|
+
dimensions: 512,
|
|
478
|
+
maxTokens: 32e3,
|
|
479
|
+
maxBatchSize: 128,
|
|
480
|
+
costPer1K: 2e-5,
|
|
481
|
+
description: "Lightweight general-purpose model"
|
|
482
|
+
},
|
|
483
|
+
"voyage-code-3": {
|
|
484
|
+
name: "voyage-code-3",
|
|
485
|
+
provider: "voyage",
|
|
486
|
+
dimensions: 1024,
|
|
487
|
+
maxTokens: 32e3,
|
|
488
|
+
maxBatchSize: 128,
|
|
489
|
+
costPer1K: 6e-5,
|
|
490
|
+
description: "Optimized for code retrieval"
|
|
491
|
+
},
|
|
492
|
+
"voyage-finance-2": {
|
|
493
|
+
name: "voyage-finance-2",
|
|
494
|
+
provider: "voyage",
|
|
495
|
+
dimensions: 1024,
|
|
496
|
+
maxTokens: 32e3,
|
|
497
|
+
maxBatchSize: 128,
|
|
498
|
+
costPer1K: 12e-5,
|
|
499
|
+
description: "Optimized for finance domain"
|
|
500
|
+
},
|
|
501
|
+
"voyage-law-2": {
|
|
502
|
+
name: "voyage-law-2",
|
|
503
|
+
provider: "voyage",
|
|
504
|
+
dimensions: 1024,
|
|
505
|
+
maxTokens: 32e3,
|
|
506
|
+
maxBatchSize: 128,
|
|
507
|
+
costPer1K: 12e-5,
|
|
508
|
+
description: "Optimized for legal domain"
|
|
509
|
+
},
|
|
510
|
+
"voyage-multilingual-2": {
|
|
511
|
+
name: "voyage-multilingual-2",
|
|
512
|
+
provider: "voyage",
|
|
513
|
+
dimensions: 1024,
|
|
514
|
+
maxTokens: 32e3,
|
|
515
|
+
maxBatchSize: 128,
|
|
516
|
+
costPer1K: 12e-5,
|
|
517
|
+
description: "Multilingual embedding model"
|
|
518
|
+
},
|
|
519
|
+
"voyage-2": {
|
|
520
|
+
name: "voyage-2",
|
|
521
|
+
provider: "voyage",
|
|
522
|
+
dimensions: 1024,
|
|
523
|
+
maxTokens: 4e3,
|
|
524
|
+
maxBatchSize: 128,
|
|
525
|
+
costPer1K: 1e-4,
|
|
526
|
+
description: "Previous generation model"
|
|
527
|
+
}
|
|
528
|
+
};
|
|
529
|
+
var VoyageProvider = class extends BaseProvider {
|
|
530
|
+
modelInfo;
|
|
531
|
+
apiKey;
|
|
532
|
+
baseUrl;
|
|
533
|
+
inputType;
|
|
534
|
+
truncation;
|
|
535
|
+
constructor(config) {
|
|
536
|
+
super({ ...config, type: "voyage" });
|
|
537
|
+
if (!config.apiKey) {
|
|
538
|
+
throw new Error("Voyage AI API key is required");
|
|
539
|
+
}
|
|
540
|
+
this.apiKey = config.apiKey;
|
|
541
|
+
this.baseUrl = config.baseUrl ?? "https://api.voyageai.com/v1";
|
|
542
|
+
this.inputType = config.inputType ?? "document";
|
|
543
|
+
this.truncation = config.truncation ?? true;
|
|
544
|
+
const modelName = config.model ?? "voyage-3";
|
|
545
|
+
const modelConfig = VOYAGE_MODELS[modelName];
|
|
546
|
+
if (!modelConfig) {
|
|
547
|
+
this.modelInfo = {
|
|
548
|
+
name: modelName,
|
|
549
|
+
provider: "voyage",
|
|
550
|
+
dimensions: 1024,
|
|
551
|
+
maxTokens: 32e3,
|
|
552
|
+
maxBatchSize: 128,
|
|
553
|
+
costPer1K: 1e-4
|
|
554
|
+
};
|
|
555
|
+
} else {
|
|
556
|
+
this.modelInfo = { ...modelConfig };
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
get info() {
|
|
560
|
+
return this.modelInfo;
|
|
561
|
+
}
|
|
562
|
+
async doEmbed(texts, options) {
|
|
563
|
+
const headers = {
|
|
564
|
+
"Content-Type": "application/json",
|
|
565
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
566
|
+
};
|
|
567
|
+
const body = {
|
|
568
|
+
model: options?.model ?? this.modelInfo.name,
|
|
569
|
+
input: texts,
|
|
570
|
+
input_type: this.inputType,
|
|
571
|
+
truncation: this.truncation
|
|
572
|
+
};
|
|
573
|
+
const response = await fetch(`${this.baseUrl}/embeddings`, {
|
|
574
|
+
method: "POST",
|
|
575
|
+
headers,
|
|
576
|
+
body: JSON.stringify(body),
|
|
577
|
+
signal: this.config.timeout ? AbortSignal.timeout(this.config.timeout) : void 0
|
|
578
|
+
});
|
|
579
|
+
if (!response.ok) {
|
|
580
|
+
const error = await response.json().catch(() => ({ detail: response.statusText }));
|
|
581
|
+
const errorMessage = error.detail ?? response.statusText;
|
|
582
|
+
if (response.status === 429) {
|
|
583
|
+
this.metrics.rateLimitHits++;
|
|
584
|
+
}
|
|
585
|
+
throw new Error(
|
|
586
|
+
`Voyage AI API error: ${errorMessage} (${response.status})`
|
|
587
|
+
);
|
|
588
|
+
}
|
|
589
|
+
const data = await response.json();
|
|
590
|
+
const vectors = data.data.map((d) => d.embedding);
|
|
591
|
+
const tokenCount = data.usage?.total_tokens ?? texts.reduce((sum, text) => sum + this.countTokens(text), 0);
|
|
592
|
+
this.metrics.estimatedCostUSD += tokenCount / 1e3 * (this.modelInfo.costPer1K ?? 0);
|
|
593
|
+
return { vectors, tokenCount };
|
|
594
|
+
}
|
|
595
|
+
/**
|
|
596
|
+
* Set input type for embeddings
|
|
597
|
+
*/
|
|
598
|
+
setInputType(inputType) {
|
|
599
|
+
this.inputType = inputType;
|
|
600
|
+
return this;
|
|
601
|
+
}
|
|
602
|
+
/**
|
|
603
|
+
* Count tokens (approximation)
|
|
604
|
+
*/
|
|
605
|
+
countTokens(text) {
|
|
606
|
+
return Math.ceil(text.length / 4);
|
|
607
|
+
}
|
|
608
|
+
};
|
|
609
|
+
function createVoyageProvider(config) {
|
|
610
|
+
return new VoyageProvider(config);
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
// src/providers/LocalProvider.ts
|
|
614
|
+
var LocalProvider = class extends BaseProvider {
|
|
615
|
+
modelInfo;
|
|
616
|
+
embedFn = null;
|
|
617
|
+
normalize;
|
|
618
|
+
batchSize;
|
|
619
|
+
constructor(config) {
|
|
620
|
+
super({ ...config, type: "local" });
|
|
621
|
+
if (!config.embedFn && !config.modelPath) {
|
|
622
|
+
throw new Error(
|
|
623
|
+
"Either embedFn or modelPath is required for local provider"
|
|
624
|
+
);
|
|
625
|
+
}
|
|
626
|
+
this.embedFn = config.embedFn ?? null;
|
|
627
|
+
this.normalize = config.normalize ?? true;
|
|
628
|
+
this.batchSize = config.batchSize ?? 32;
|
|
629
|
+
this.modelInfo = {
|
|
630
|
+
name: config.name ?? config.modelPath ?? "local-model",
|
|
631
|
+
provider: "local",
|
|
632
|
+
dimensions: config.dimensions,
|
|
633
|
+
maxTokens: config.maxTokens ?? 512,
|
|
634
|
+
maxBatchSize: config.maxBatchSize ?? 32,
|
|
635
|
+
costPer1K: 0,
|
|
636
|
+
// Local models have no API cost
|
|
637
|
+
description: "Local embedding model"
|
|
638
|
+
};
|
|
639
|
+
}
|
|
640
|
+
get info() {
|
|
641
|
+
return this.modelInfo;
|
|
642
|
+
}
|
|
643
|
+
async doEmbed(texts, options) {
|
|
644
|
+
if (!this.embedFn) {
|
|
645
|
+
throw new Error("No embedding function configured");
|
|
646
|
+
}
|
|
647
|
+
let vectors = await this.embedFn(texts, options);
|
|
648
|
+
if (this.normalize) {
|
|
649
|
+
vectors = vectors.map((v) => EmbeddingModel.normalize(v));
|
|
650
|
+
}
|
|
651
|
+
const tokenCount = texts.reduce(
|
|
652
|
+
(sum, text) => sum + this.countTokens(text),
|
|
653
|
+
0
|
|
654
|
+
);
|
|
655
|
+
return { vectors, tokenCount };
|
|
656
|
+
}
|
|
657
|
+
/**
|
|
658
|
+
* Set the embedding function
|
|
659
|
+
*/
|
|
660
|
+
setEmbedFunction(fn) {
|
|
661
|
+
this.embedFn = fn;
|
|
662
|
+
return this;
|
|
663
|
+
}
|
|
664
|
+
/**
|
|
665
|
+
* Count tokens (simple approximation for local models)
|
|
666
|
+
*/
|
|
667
|
+
countTokens(text) {
|
|
668
|
+
return text.split(/\s+/).length;
|
|
669
|
+
}
|
|
670
|
+
};
|
|
671
|
+
function createLocalProvider(config) {
|
|
672
|
+
return new LocalProvider(config);
|
|
673
|
+
}
|
|
674
|
+
function createMockProvider(config) {
|
|
675
|
+
const delay = config.delay ?? 10;
|
|
676
|
+
return new LocalProvider({
|
|
677
|
+
type: "local",
|
|
678
|
+
dimensions: config.dimensions,
|
|
679
|
+
name: config.name ?? "mock-model",
|
|
680
|
+
embedFn: (texts) => {
|
|
681
|
+
return new Promise((resolve) => setTimeout(resolve, delay)).then(() => {
|
|
682
|
+
return texts.map((text) => {
|
|
683
|
+
const hash = text.split("").reduce((acc, char) => {
|
|
684
|
+
return (acc << 5) - acc + char.charCodeAt(0);
|
|
685
|
+
}, 0);
|
|
686
|
+
const vector = [];
|
|
687
|
+
let seed = Math.abs(hash);
|
|
688
|
+
for (let i = 0; i < config.dimensions; i++) {
|
|
689
|
+
seed = seed * 1103515245 + 12345 & 2147483647;
|
|
690
|
+
vector.push(seed / 2147483647 * 2 - 1);
|
|
691
|
+
}
|
|
692
|
+
const norm = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
|
|
693
|
+
return vector.map((v) => v / norm);
|
|
694
|
+
});
|
|
695
|
+
});
|
|
696
|
+
}
|
|
697
|
+
});
|
|
698
|
+
}
|
|
699
|
+
function createRandomProvider(config) {
|
|
700
|
+
return new LocalProvider({
|
|
701
|
+
type: "local",
|
|
702
|
+
dimensions: config.dimensions,
|
|
703
|
+
name: config.name ?? "random-model",
|
|
704
|
+
embedFn: (texts) => {
|
|
705
|
+
return Promise.resolve(
|
|
706
|
+
texts.map(() => {
|
|
707
|
+
const vector = [];
|
|
708
|
+
for (let i = 0; i < config.dimensions; i++) {
|
|
709
|
+
vector.push(Math.random() * 2 - 1);
|
|
710
|
+
}
|
|
711
|
+
const norm = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
|
|
712
|
+
return vector.map((v) => v / norm);
|
|
713
|
+
})
|
|
714
|
+
);
|
|
715
|
+
}
|
|
716
|
+
});
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
// src/providers/HuggingFaceProvider.ts
|
|
720
|
+
var HUGGINGFACE_MODELS = {
|
|
721
|
+
"sentence-transformers/all-MiniLM-L6-v2": {
|
|
722
|
+
dimensions: 384,
|
|
723
|
+
maxTokens: 256,
|
|
724
|
+
description: "Lightweight sentence transformer"
|
|
725
|
+
},
|
|
726
|
+
"sentence-transformers/all-mpnet-base-v2": {
|
|
727
|
+
dimensions: 768,
|
|
728
|
+
maxTokens: 384,
|
|
729
|
+
description: "High quality sentence transformer"
|
|
730
|
+
},
|
|
731
|
+
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": {
|
|
732
|
+
dimensions: 384,
|
|
733
|
+
maxTokens: 128,
|
|
734
|
+
description: "Multilingual sentence transformer"
|
|
735
|
+
},
|
|
736
|
+
"BAAI/bge-small-en-v1.5": {
|
|
737
|
+
dimensions: 384,
|
|
738
|
+
maxTokens: 512,
|
|
739
|
+
description: "BGE small English model"
|
|
740
|
+
},
|
|
741
|
+
"BAAI/bge-base-en-v1.5": {
|
|
742
|
+
dimensions: 768,
|
|
743
|
+
maxTokens: 512,
|
|
744
|
+
description: "BGE base English model"
|
|
745
|
+
},
|
|
746
|
+
"BAAI/bge-large-en-v1.5": {
|
|
747
|
+
dimensions: 1024,
|
|
748
|
+
maxTokens: 512,
|
|
749
|
+
description: "BGE large English model"
|
|
750
|
+
},
|
|
751
|
+
"thenlper/gte-small": {
|
|
752
|
+
dimensions: 384,
|
|
753
|
+
maxTokens: 512,
|
|
754
|
+
description: "GTE small model"
|
|
755
|
+
},
|
|
756
|
+
"thenlper/gte-base": {
|
|
757
|
+
dimensions: 768,
|
|
758
|
+
maxTokens: 512,
|
|
759
|
+
description: "GTE base model"
|
|
760
|
+
},
|
|
761
|
+
"thenlper/gte-large": {
|
|
762
|
+
dimensions: 1024,
|
|
763
|
+
maxTokens: 512,
|
|
764
|
+
description: "GTE large model"
|
|
765
|
+
},
|
|
766
|
+
"intfloat/e5-small-v2": {
|
|
767
|
+
dimensions: 384,
|
|
768
|
+
maxTokens: 512,
|
|
769
|
+
description: "E5 small v2 model"
|
|
770
|
+
},
|
|
771
|
+
"intfloat/e5-base-v2": {
|
|
772
|
+
dimensions: 768,
|
|
773
|
+
maxTokens: 512,
|
|
774
|
+
description: "E5 base v2 model"
|
|
775
|
+
},
|
|
776
|
+
"intfloat/e5-large-v2": {
|
|
777
|
+
dimensions: 1024,
|
|
778
|
+
maxTokens: 512,
|
|
779
|
+
description: "E5 large v2 model"
|
|
780
|
+
}
|
|
781
|
+
};
|
|
782
|
+
var HuggingFaceProvider = class extends BaseProvider {
|
|
783
|
+
modelInfo;
|
|
784
|
+
apiKey;
|
|
785
|
+
baseUrl;
|
|
786
|
+
waitForModel;
|
|
787
|
+
constructor(config) {
|
|
788
|
+
super({ ...config, type: "huggingface" });
|
|
789
|
+
if (!config.apiKey) {
|
|
790
|
+
throw new Error("HuggingFace API key is required");
|
|
791
|
+
}
|
|
792
|
+
this.apiKey = config.apiKey;
|
|
793
|
+
this.waitForModel = config.waitForModel ?? true;
|
|
794
|
+
const modelName = config.model ?? "sentence-transformers/all-MiniLM-L6-v2";
|
|
795
|
+
const knownConfig = HUGGINGFACE_MODELS[modelName];
|
|
796
|
+
this.baseUrl = config.baseUrl ?? `https://api-inference.huggingface.co/pipeline/feature-extraction/${modelName}`;
|
|
797
|
+
this.modelInfo = {
|
|
798
|
+
name: modelName,
|
|
799
|
+
provider: "huggingface",
|
|
800
|
+
dimensions: knownConfig?.dimensions ?? 768,
|
|
801
|
+
maxTokens: knownConfig?.maxTokens ?? 512,
|
|
802
|
+
maxBatchSize: 32,
|
|
803
|
+
// HF inference API handles batching
|
|
804
|
+
costPer1K: 0,
|
|
805
|
+
// Free tier available
|
|
806
|
+
description: knownConfig?.description ?? "HuggingFace model"
|
|
807
|
+
};
|
|
808
|
+
}
|
|
809
|
+
get info() {
|
|
810
|
+
return this.modelInfo;
|
|
811
|
+
}
|
|
812
|
+
async doEmbed(texts, _options) {
|
|
813
|
+
const headers = {
|
|
814
|
+
"Content-Type": "application/json",
|
|
815
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
816
|
+
};
|
|
817
|
+
const body = {
|
|
818
|
+
inputs: texts,
|
|
819
|
+
options: {
|
|
820
|
+
wait_for_model: this.waitForModel
|
|
821
|
+
}
|
|
822
|
+
};
|
|
823
|
+
const response = await fetch(this.baseUrl, {
|
|
824
|
+
method: "POST",
|
|
825
|
+
headers,
|
|
826
|
+
body: JSON.stringify(body),
|
|
827
|
+
signal: this.config.timeout ? AbortSignal.timeout(this.config.timeout) : void 0
|
|
828
|
+
});
|
|
829
|
+
if (!response.ok) {
|
|
830
|
+
const error = await response.json().catch(() => ({ error: response.statusText }));
|
|
831
|
+
const errorMessage = error.error ?? response.statusText;
|
|
832
|
+
if (response.status === 429) {
|
|
833
|
+
this.metrics.rateLimitHits++;
|
|
834
|
+
}
|
|
835
|
+
throw new Error(
|
|
836
|
+
`HuggingFace API error: ${errorMessage} (${response.status})`
|
|
837
|
+
);
|
|
838
|
+
}
|
|
839
|
+
const data = await response.json();
|
|
840
|
+
let vectors;
|
|
841
|
+
if (Array.isArray(data) && Array.isArray(data[0])) {
|
|
842
|
+
if (typeof data[0][0] === "number") {
|
|
843
|
+
vectors = data;
|
|
844
|
+
} else {
|
|
845
|
+
vectors = data.map((tokenEmbeddings) => {
|
|
846
|
+
const dims = tokenEmbeddings[0]?.length ?? this.modelInfo.dimensions;
|
|
847
|
+
const mean = new Array(dims).fill(0);
|
|
848
|
+
for (const embedding of tokenEmbeddings) {
|
|
849
|
+
for (let i = 0; i < dims; i++) {
|
|
850
|
+
mean[i] += embedding[i];
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
return mean.map((v) => v / tokenEmbeddings.length);
|
|
854
|
+
});
|
|
855
|
+
}
|
|
856
|
+
} else {
|
|
857
|
+
vectors = [data];
|
|
858
|
+
}
|
|
859
|
+
const tokenCount = texts.reduce(
|
|
860
|
+
(sum, text) => sum + this.countTokens(text),
|
|
861
|
+
0
|
|
862
|
+
);
|
|
863
|
+
return { vectors, tokenCount };
|
|
864
|
+
}
|
|
865
|
+
/**
|
|
866
|
+
* Count tokens (approximation based on wordpiece)
|
|
867
|
+
*/
|
|
868
|
+
countTokens(text) {
|
|
869
|
+
const words = text.split(/\s+/);
|
|
870
|
+
let tokens = 0;
|
|
871
|
+
for (const word of words) {
|
|
872
|
+
tokens += Math.ceil(word.length / 5) + 1;
|
|
873
|
+
}
|
|
874
|
+
return Math.max(1, tokens);
|
|
875
|
+
}
|
|
876
|
+
};
|
|
877
|
+
function createHuggingFaceProvider(config) {
|
|
878
|
+
return new HuggingFaceProvider(config);
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
export {
|
|
882
|
+
BaseProvider,
|
|
883
|
+
OpenAIProvider,
|
|
884
|
+
createOpenAIProvider,
|
|
885
|
+
CohereProvider,
|
|
886
|
+
createCohereProvider,
|
|
887
|
+
VoyageProvider,
|
|
888
|
+
createVoyageProvider,
|
|
889
|
+
LocalProvider,
|
|
890
|
+
createLocalProvider,
|
|
891
|
+
createMockProvider,
|
|
892
|
+
createRandomProvider,
|
|
893
|
+
HuggingFaceProvider,
|
|
894
|
+
createHuggingFaceProvider
|
|
895
|
+
};
|