@lov3kaizen/agentsea-embeddings 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +475 -0
- package/dist/caching/index.d.mts +286 -0
- package/dist/caching/index.d.ts +286 -0
- package/dist/caching/index.js +1005 -0
- package/dist/caching/index.mjs +27 -0
- package/dist/chunk-3KM32UQK.mjs +207 -0
- package/dist/chunk-DJAURHAS.mjs +1117 -0
- package/dist/chunk-NBHIRTJT.mjs +895 -0
- package/dist/chunk-QAITLJ2E.mjs +259 -0
- package/dist/chunk-TER262ST.mjs +877 -0
- package/dist/chunk-VPSMDBHH.mjs +957 -0
- package/dist/chunking/index.d.mts +1 -0
- package/dist/chunking/index.d.ts +1 -0
- package/dist/chunking/index.js +1408 -0
- package/dist/chunking/index.mjs +37 -0
- package/dist/embedding.types-CCgPVxt1.d.mts +102 -0
- package/dist/embedding.types-CCgPVxt1.d.ts +102 -0
- package/dist/index-CeG6God2.d.mts +297 -0
- package/dist/index-DMaQRn2w.d.mts +172 -0
- package/dist/index-DMaQRn2w.d.ts +172 -0
- package/dist/index-DWddsKRi.d.ts +297 -0
- package/dist/index.d.mts +647 -0
- package/dist/index.d.ts +647 -0
- package/dist/index.js +5259 -0
- package/dist/index.mjs +1028 -0
- package/dist/providers/index.d.mts +2 -0
- package/dist/providers/index.d.ts +2 -0
- package/dist/providers/index.js +1235 -0
- package/dist/providers/index.mjs +32 -0
- package/dist/stores/index.d.mts +298 -0
- package/dist/stores/index.d.ts +298 -0
- package/dist/stores/index.js +1178 -0
- package/dist/stores/index.mjs +26 -0
- package/package.json +102 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,1028 @@
|
|
|
1
|
+
import {
|
|
2
|
+
BaseCache,
|
|
3
|
+
MemoryCache,
|
|
4
|
+
RedisCache,
|
|
5
|
+
SQLiteCache,
|
|
6
|
+
TieredCache,
|
|
7
|
+
createCache,
|
|
8
|
+
createMemoryCache,
|
|
9
|
+
createRedisCache,
|
|
10
|
+
createSQLiteCache,
|
|
11
|
+
createStandardTieredCache,
|
|
12
|
+
createTieredCache
|
|
13
|
+
} from "./chunk-VPSMDBHH.mjs";
|
|
14
|
+
import {
|
|
15
|
+
BaseChunker,
|
|
16
|
+
CodeChunker,
|
|
17
|
+
FixedChunker,
|
|
18
|
+
MarkdownChunker,
|
|
19
|
+
RecursiveChunker,
|
|
20
|
+
SemanticChunker,
|
|
21
|
+
chunk,
|
|
22
|
+
createChunker,
|
|
23
|
+
createCodeChunker,
|
|
24
|
+
createFixedChunker,
|
|
25
|
+
createMarkdownChunker,
|
|
26
|
+
createRecursiveChunker,
|
|
27
|
+
createSemanticChunker,
|
|
28
|
+
defaultTokenCounter,
|
|
29
|
+
mergeSmallChunks,
|
|
30
|
+
splitLargeChunks
|
|
31
|
+
} from "./chunk-DJAURHAS.mjs";
|
|
32
|
+
import {
|
|
33
|
+
BaseProvider,
|
|
34
|
+
CohereProvider,
|
|
35
|
+
HuggingFaceProvider,
|
|
36
|
+
LocalProvider,
|
|
37
|
+
OpenAIProvider,
|
|
38
|
+
VoyageProvider,
|
|
39
|
+
createCohereProvider,
|
|
40
|
+
createHuggingFaceProvider,
|
|
41
|
+
createLocalProvider,
|
|
42
|
+
createMockProvider,
|
|
43
|
+
createOpenAIProvider,
|
|
44
|
+
createRandomProvider,
|
|
45
|
+
createVoyageProvider
|
|
46
|
+
} from "./chunk-NBHIRTJT.mjs";
|
|
47
|
+
import {
|
|
48
|
+
BaseStore,
|
|
49
|
+
ChromaStore,
|
|
50
|
+
MemoryStore,
|
|
51
|
+
PineconeStore,
|
|
52
|
+
QdrantStore,
|
|
53
|
+
createChromaStore,
|
|
54
|
+
createMemoryStore,
|
|
55
|
+
createPineconeStore,
|
|
56
|
+
createQdrantStore,
|
|
57
|
+
createStore
|
|
58
|
+
} from "./chunk-TER262ST.mjs";
|
|
59
|
+
import {
|
|
60
|
+
batch,
|
|
61
|
+
cacheKey,
|
|
62
|
+
clamp,
|
|
63
|
+
contentHash,
|
|
64
|
+
createEventEmitter,
|
|
65
|
+
deepClone,
|
|
66
|
+
deferred,
|
|
67
|
+
estimateTokens,
|
|
68
|
+
formatBytes,
|
|
69
|
+
formatDuration,
|
|
70
|
+
generateId,
|
|
71
|
+
mean,
|
|
72
|
+
measureTime,
|
|
73
|
+
normalize,
|
|
74
|
+
percentile,
|
|
75
|
+
retry,
|
|
76
|
+
sleep,
|
|
77
|
+
splitByChars,
|
|
78
|
+
splitBySeparator,
|
|
79
|
+
stdDev,
|
|
80
|
+
variance,
|
|
81
|
+
withConcurrency
|
|
82
|
+
} from "./chunk-3KM32UQK.mjs";
|
|
83
|
+
import {
|
|
84
|
+
EmbeddingModel,
|
|
85
|
+
ModelRegistry,
|
|
86
|
+
modelRegistry
|
|
87
|
+
} from "./chunk-QAITLJ2E.mjs";
|
|
88
|
+
|
|
89
|
+
// src/core/EmbeddingManager.ts
|
|
90
|
+
import EventEmitter from "eventemitter3";
|
|
91
|
+
var EmbeddingManager = class extends EventEmitter {
|
|
92
|
+
config;
|
|
93
|
+
modelRegistry;
|
|
94
|
+
cache = null;
|
|
95
|
+
chunker = null;
|
|
96
|
+
store = null;
|
|
97
|
+
stats;
|
|
98
|
+
constructor(config = {}) {
|
|
99
|
+
super();
|
|
100
|
+
this.config = {
|
|
101
|
+
defaultModel: config.defaultModel ?? "text-embedding-3-small",
|
|
102
|
+
defaultProvider: config.defaultProvider ?? "openai",
|
|
103
|
+
caching: config.caching ?? true,
|
|
104
|
+
batchSize: config.batchSize ?? 100,
|
|
105
|
+
concurrency: config.concurrency ?? 5,
|
|
106
|
+
retry: {
|
|
107
|
+
maxRetries: config.retry?.maxRetries ?? 3,
|
|
108
|
+
initialDelay: config.retry?.initialDelay ?? 1e3,
|
|
109
|
+
maxDelay: config.retry?.maxDelay ?? 3e4
|
|
110
|
+
}
|
|
111
|
+
};
|
|
112
|
+
this.modelRegistry = new ModelRegistry();
|
|
113
|
+
this.stats = this.createInitialStats();
|
|
114
|
+
}
|
|
115
|
+
createInitialStats() {
|
|
116
|
+
return {
|
|
117
|
+
totalEmbeddings: 0,
|
|
118
|
+
totalTokens: 0,
|
|
119
|
+
avgLatencyMs: 0,
|
|
120
|
+
cacheHitRate: 0,
|
|
121
|
+
apiCalls: 0,
|
|
122
|
+
errors: 0,
|
|
123
|
+
estimatedCostUSD: 0
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Register an embedding model
|
|
128
|
+
*/
|
|
129
|
+
registerModel(model, isDefault = false) {
|
|
130
|
+
this.modelRegistry.register(model, isDefault);
|
|
131
|
+
if (isDefault) {
|
|
132
|
+
this.config.defaultModel = model.name;
|
|
133
|
+
this.config.defaultProvider = model.provider;
|
|
134
|
+
}
|
|
135
|
+
return this;
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Set the cache implementation
|
|
139
|
+
*/
|
|
140
|
+
setCache(cache) {
|
|
141
|
+
this.cache = cache;
|
|
142
|
+
return this;
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Set the chunker implementation
|
|
146
|
+
*/
|
|
147
|
+
setChunker(chunker) {
|
|
148
|
+
this.chunker = chunker;
|
|
149
|
+
return this;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Set the store implementation
|
|
153
|
+
*/
|
|
154
|
+
setStore(store) {
|
|
155
|
+
this.store = store;
|
|
156
|
+
return this;
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Get the model to use for embedding
|
|
160
|
+
*/
|
|
161
|
+
getModel(options) {
|
|
162
|
+
const modelName = options?.model ?? this.config.defaultModel;
|
|
163
|
+
const model = this.modelRegistry.getByKey(
|
|
164
|
+
`${this.config.defaultProvider}:${modelName}`
|
|
165
|
+
) ?? this.modelRegistry.getDefault();
|
|
166
|
+
if (!model) {
|
|
167
|
+
throw new Error(`No embedding model found. Register a model first.`);
|
|
168
|
+
}
|
|
169
|
+
return model;
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Generate embedding for a single text
|
|
173
|
+
*/
|
|
174
|
+
async embed(text, options) {
|
|
175
|
+
this.emit("embed:start", text, options);
|
|
176
|
+
const model = this.getModel(options);
|
|
177
|
+
if (this.config.caching && this.cache && !options?.skipCache) {
|
|
178
|
+
const key = cacheKey(text, model.name);
|
|
179
|
+
const cached = await this.cache.get(key);
|
|
180
|
+
if (cached) {
|
|
181
|
+
this.emit("cache:hit", key);
|
|
182
|
+
this.updateStats({ cacheHits: 1 });
|
|
183
|
+
this.emit("embed:complete", { ...cached, cached: true });
|
|
184
|
+
return { ...cached, cached: true };
|
|
185
|
+
}
|
|
186
|
+
this.emit("cache:miss", key);
|
|
187
|
+
}
|
|
188
|
+
try {
|
|
189
|
+
const { result, durationMs } = await measureTime(
|
|
190
|
+
() => model.embed(text, options)
|
|
191
|
+
);
|
|
192
|
+
const finalResult = {
|
|
193
|
+
...result,
|
|
194
|
+
latencyMs: durationMs,
|
|
195
|
+
cached: false
|
|
196
|
+
};
|
|
197
|
+
if (this.config.caching && this.cache && !options?.skipCache) {
|
|
198
|
+
const key = cacheKey(text, model.name);
|
|
199
|
+
await this.cache.set(key, finalResult);
|
|
200
|
+
}
|
|
201
|
+
this.updateStats({
|
|
202
|
+
embeddings: 1,
|
|
203
|
+
tokens: finalResult.tokenCount,
|
|
204
|
+
latency: durationMs,
|
|
205
|
+
apiCalls: 1,
|
|
206
|
+
cost: this.estimateCost(model, finalResult.tokenCount)
|
|
207
|
+
});
|
|
208
|
+
this.emit("embed:complete", finalResult);
|
|
209
|
+
return finalResult;
|
|
210
|
+
} catch (error) {
|
|
211
|
+
this.stats.errors++;
|
|
212
|
+
this.emit("embed:error", error, text);
|
|
213
|
+
throw error;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Generate embeddings for multiple texts
|
|
218
|
+
*/
|
|
219
|
+
async embedBatch(texts, options) {
|
|
220
|
+
this.emit("batch:start", texts, options);
|
|
221
|
+
const model = this.getModel(options);
|
|
222
|
+
const batchSize = options?.concurrency ?? this.config.batchSize;
|
|
223
|
+
const results = [];
|
|
224
|
+
let cacheHits = 0;
|
|
225
|
+
let cacheMisses = 0;
|
|
226
|
+
let failures = 0;
|
|
227
|
+
let totalTokens = 0;
|
|
228
|
+
const startTime = performance.now();
|
|
229
|
+
const cacheResults = /* @__PURE__ */ new Map();
|
|
230
|
+
const textsToEmbed = [];
|
|
231
|
+
if (this.config.caching && this.cache && !options?.skipCache) {
|
|
232
|
+
for (let i = 0; i < texts.length; i++) {
|
|
233
|
+
const key = cacheKey(texts[i], model.name);
|
|
234
|
+
const cached = await this.cache.get(key);
|
|
235
|
+
if (cached) {
|
|
236
|
+
cacheResults.set(i, { ...cached, cached: true });
|
|
237
|
+
cacheHits++;
|
|
238
|
+
} else {
|
|
239
|
+
textsToEmbed.push({ index: i, text: texts[i] });
|
|
240
|
+
cacheMisses++;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
} else {
|
|
244
|
+
textsToEmbed.push(...texts.map((text, index) => ({ index, text })));
|
|
245
|
+
cacheMisses = texts.length;
|
|
246
|
+
}
|
|
247
|
+
const batches = batch(textsToEmbed, batchSize);
|
|
248
|
+
let processedCount = cacheResults.size;
|
|
249
|
+
for (const batchItems of batches) {
|
|
250
|
+
const batchTexts = batchItems.map((item) => item.text);
|
|
251
|
+
try {
|
|
252
|
+
const { result: batchResult2 } = await measureTime(
|
|
253
|
+
() => model.embedBatch(batchTexts, options)
|
|
254
|
+
);
|
|
255
|
+
for (let i = 0; i < batchResult2.results.length; i++) {
|
|
256
|
+
const item = batchItems[i];
|
|
257
|
+
const embeddingResult = batchResult2.results[i];
|
|
258
|
+
cacheResults.set(item.index, embeddingResult);
|
|
259
|
+
if (this.config.caching && this.cache && !options?.skipCache) {
|
|
260
|
+
const key = cacheKey(item.text, model.name);
|
|
261
|
+
await this.cache.set(key, embeddingResult);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
totalTokens += batchResult2.totalTokens;
|
|
265
|
+
processedCount += batchItems.length;
|
|
266
|
+
} catch (error) {
|
|
267
|
+
if (options?.continueOnError) {
|
|
268
|
+
failures += batchItems.length;
|
|
269
|
+
processedCount += batchItems.length;
|
|
270
|
+
} else {
|
|
271
|
+
throw error;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
this.emit("batch:progress", {
|
|
275
|
+
completed: processedCount,
|
|
276
|
+
total: texts.length
|
|
277
|
+
});
|
|
278
|
+
options?.onProgress?.({
|
|
279
|
+
percent: processedCount / texts.length * 100,
|
|
280
|
+
processed: processedCount,
|
|
281
|
+
total: texts.length,
|
|
282
|
+
elapsedMs: performance.now() - startTime
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
for (let i = 0; i < texts.length; i++) {
|
|
286
|
+
const result = cacheResults.get(i);
|
|
287
|
+
if (result) {
|
|
288
|
+
results.push(result);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
const totalLatencyMs = performance.now() - startTime;
|
|
292
|
+
const batchResult = {
|
|
293
|
+
results,
|
|
294
|
+
totalTokens,
|
|
295
|
+
totalLatencyMs,
|
|
296
|
+
cacheHits,
|
|
297
|
+
cacheMisses,
|
|
298
|
+
failures
|
|
299
|
+
};
|
|
300
|
+
this.updateStats({
|
|
301
|
+
embeddings: results.length,
|
|
302
|
+
tokens: totalTokens,
|
|
303
|
+
latency: totalLatencyMs / results.length,
|
|
304
|
+
apiCalls: batches.length,
|
|
305
|
+
cacheHits,
|
|
306
|
+
cost: this.estimateCost(model, totalTokens)
|
|
307
|
+
});
|
|
308
|
+
this.emit("batch:complete", batchResult);
|
|
309
|
+
return batchResult;
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Embed a document with chunking
|
|
313
|
+
*/
|
|
314
|
+
async embedDocument(text, options) {
|
|
315
|
+
if (!this.chunker) {
|
|
316
|
+
throw new Error("No chunker configured. Use setChunker() first.");
|
|
317
|
+
}
|
|
318
|
+
const chunks = await this.chunker.chunk(
|
|
319
|
+
text,
|
|
320
|
+
options
|
|
321
|
+
);
|
|
322
|
+
const chunkTexts = chunks.map((c) => c.text);
|
|
323
|
+
const embedResult = await this.embedBatch(chunkTexts, options);
|
|
324
|
+
const embeddedChunks = [];
|
|
325
|
+
let position = 0;
|
|
326
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
327
|
+
const chunk2 = chunks[i];
|
|
328
|
+
const result = embedResult.results[i];
|
|
329
|
+
if (result) {
|
|
330
|
+
embeddedChunks.push({
|
|
331
|
+
id: generateId("chunk"),
|
|
332
|
+
text: chunk2.text,
|
|
333
|
+
vector: result.vector,
|
|
334
|
+
index: i,
|
|
335
|
+
startPosition: position,
|
|
336
|
+
endPosition: position + chunk2.text.length,
|
|
337
|
+
tokenCount: result.tokenCount,
|
|
338
|
+
metadata: {
|
|
339
|
+
documentId: options?.documentId,
|
|
340
|
+
source: options?.source,
|
|
341
|
+
type: options?.type,
|
|
342
|
+
...chunk2.metadata,
|
|
343
|
+
...options?.chunkMetadata
|
|
344
|
+
}
|
|
345
|
+
});
|
|
346
|
+
}
|
|
347
|
+
position += chunk2.text.length;
|
|
348
|
+
}
|
|
349
|
+
if (this.store) {
|
|
350
|
+
await this.store.upsert(embeddedChunks, options?.documentId);
|
|
351
|
+
}
|
|
352
|
+
return embeddedChunks;
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Search for similar content
|
|
356
|
+
*/
|
|
357
|
+
async search(query, options) {
|
|
358
|
+
if (!this.store) {
|
|
359
|
+
throw new Error("No store configured. Use setStore() first.");
|
|
360
|
+
}
|
|
361
|
+
const queryResult = await this.embed(query);
|
|
362
|
+
return this.store.query(queryResult.vector, options);
|
|
363
|
+
}
|
|
364
|
+
/**
|
|
365
|
+
* Calculate similarity between two texts
|
|
366
|
+
*/
|
|
367
|
+
async similarity(text1, text2) {
|
|
368
|
+
const [result1, result2] = await Promise.all([
|
|
369
|
+
this.embed(text1),
|
|
370
|
+
this.embed(text2)
|
|
371
|
+
]);
|
|
372
|
+
return EmbeddingModel.cosineSimilarity(result1.vector, result2.vector);
|
|
373
|
+
}
|
|
374
|
+
/**
|
|
375
|
+
* Get embedding statistics
|
|
376
|
+
*/
|
|
377
|
+
getStats() {
|
|
378
|
+
return { ...this.stats };
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* Reset statistics
|
|
382
|
+
*/
|
|
383
|
+
resetStats() {
|
|
384
|
+
this.stats = this.createInitialStats();
|
|
385
|
+
}
|
|
386
|
+
/**
|
|
387
|
+
* Get registered models
|
|
388
|
+
*/
|
|
389
|
+
getModels() {
|
|
390
|
+
return this.modelRegistry.list().map((m) => ({
|
|
391
|
+
provider: m.provider,
|
|
392
|
+
name: m.name,
|
|
393
|
+
dimensions: m.dimensions
|
|
394
|
+
}));
|
|
395
|
+
}
|
|
396
|
+
/**
|
|
397
|
+
* Update statistics
|
|
398
|
+
*/
|
|
399
|
+
updateStats(update) {
|
|
400
|
+
if (update.embeddings) {
|
|
401
|
+
this.stats.totalEmbeddings += update.embeddings;
|
|
402
|
+
}
|
|
403
|
+
if (update.tokens) {
|
|
404
|
+
this.stats.totalTokens += update.tokens;
|
|
405
|
+
}
|
|
406
|
+
if (update.latency) {
|
|
407
|
+
const n = this.stats.totalEmbeddings;
|
|
408
|
+
this.stats.avgLatencyMs = (this.stats.avgLatencyMs * (n - 1) + update.latency) / n;
|
|
409
|
+
}
|
|
410
|
+
if (update.apiCalls) {
|
|
411
|
+
this.stats.apiCalls += update.apiCalls;
|
|
412
|
+
}
|
|
413
|
+
if (update.cacheHits !== void 0) {
|
|
414
|
+
const totalLookups = this.stats.totalEmbeddings;
|
|
415
|
+
const currentHits = this.stats.cacheHitRate * (totalLookups - 1);
|
|
416
|
+
this.stats.cacheHitRate = (currentHits + update.cacheHits) / totalLookups;
|
|
417
|
+
}
|
|
418
|
+
if (update.cost) {
|
|
419
|
+
this.stats.estimatedCostUSD += update.cost;
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
/**
|
|
423
|
+
* Estimate cost for embedding tokens
|
|
424
|
+
*/
|
|
425
|
+
estimateCost(model, tokens) {
|
|
426
|
+
const costPer1K = model.info.costPer1K ?? 0;
|
|
427
|
+
return tokens / 1e3 * costPer1K;
|
|
428
|
+
}
|
|
429
|
+
};
|
|
430
|
+
function createEmbeddingManager(config) {
|
|
431
|
+
return new EmbeddingManager(config);
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// src/versioning/VersionRegistry.ts
|
|
435
|
+
import { nanoid } from "nanoid";
|
|
436
|
+
import EventEmitter2 from "eventemitter3";
|
|
437
|
+
var VersionRegistry = class extends EventEmitter2 {
|
|
438
|
+
versions = /* @__PURE__ */ new Map();
|
|
439
|
+
activeVersion = null;
|
|
440
|
+
options;
|
|
441
|
+
constructor(options = {}) {
|
|
442
|
+
super();
|
|
443
|
+
this.options = {
|
|
444
|
+
autoRegister: options.autoRegister ?? true,
|
|
445
|
+
trackUsage: options.trackUsage ?? true,
|
|
446
|
+
maxVersions: options.maxVersions ?? 100,
|
|
447
|
+
...options
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
/**
|
|
451
|
+
* Register a new version
|
|
452
|
+
*/
|
|
453
|
+
register(version) {
|
|
454
|
+
const newVersion = {
|
|
455
|
+
...version,
|
|
456
|
+
id: nanoid(),
|
|
457
|
+
createdAt: Date.now(),
|
|
458
|
+
active: false,
|
|
459
|
+
deprecated: false
|
|
460
|
+
};
|
|
461
|
+
const entry = {
|
|
462
|
+
version: newVersion,
|
|
463
|
+
documentCount: 0,
|
|
464
|
+
chunkCount: 0,
|
|
465
|
+
firstUsed: 0,
|
|
466
|
+
lastUsed: 0
|
|
467
|
+
};
|
|
468
|
+
this.versions.set(newVersion.id, entry);
|
|
469
|
+
if (this.versions.size === 1) {
|
|
470
|
+
this.activate(newVersion.id);
|
|
471
|
+
}
|
|
472
|
+
if (this.versions.size > (this.options.maxVersions ?? 100)) {
|
|
473
|
+
this.pruneOldVersions();
|
|
474
|
+
}
|
|
475
|
+
this.emit("version:created", newVersion);
|
|
476
|
+
this.emitChange("created", newVersion.id);
|
|
477
|
+
return newVersion;
|
|
478
|
+
}
|
|
479
|
+
/**
|
|
480
|
+
* Get a version by ID
|
|
481
|
+
*/
|
|
482
|
+
get(id) {
|
|
483
|
+
return this.versions.get(id)?.version;
|
|
484
|
+
}
|
|
485
|
+
/**
|
|
486
|
+
* Get the active version
|
|
487
|
+
*/
|
|
488
|
+
getActive() {
|
|
489
|
+
if (!this.activeVersion) return void 0;
|
|
490
|
+
return this.versions.get(this.activeVersion)?.version;
|
|
491
|
+
}
|
|
492
|
+
/**
|
|
493
|
+
* Activate a version
|
|
494
|
+
*/
|
|
495
|
+
activate(id) {
|
|
496
|
+
const entry = this.versions.get(id);
|
|
497
|
+
if (!entry) {
|
|
498
|
+
throw new Error(`Version ${id} not found`);
|
|
499
|
+
}
|
|
500
|
+
const previousVersion = this.activeVersion ? this.versions.get(this.activeVersion)?.version : void 0;
|
|
501
|
+
if (this.activeVersion && this.activeVersion !== id) {
|
|
502
|
+
const prev = this.versions.get(this.activeVersion);
|
|
503
|
+
if (prev) {
|
|
504
|
+
prev.version.active = false;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
entry.version.active = true;
|
|
508
|
+
this.activeVersion = id;
|
|
509
|
+
this.emit("version:activated", entry.version, previousVersion);
|
|
510
|
+
this.emitChange("activated", id, previousVersion?.id);
|
|
511
|
+
}
|
|
512
|
+
/**
|
|
513
|
+
* Deprecate a version
|
|
514
|
+
*/
|
|
515
|
+
deprecate(id, reason, replacement) {
|
|
516
|
+
const entry = this.versions.get(id);
|
|
517
|
+
if (!entry) {
|
|
518
|
+
throw new Error(`Version ${id} not found`);
|
|
519
|
+
}
|
|
520
|
+
entry.version.deprecated = true;
|
|
521
|
+
entry.version.deprecationReason = reason;
|
|
522
|
+
entry.version.replacement = replacement;
|
|
523
|
+
this.emit("version:deprecated", entry.version, reason);
|
|
524
|
+
this.emitChange("deprecated", id);
|
|
525
|
+
}
|
|
526
|
+
/**
|
|
527
|
+
* Delete a version
|
|
528
|
+
*/
|
|
529
|
+
delete(id) {
|
|
530
|
+
if (this.activeVersion === id) {
|
|
531
|
+
throw new Error("Cannot delete active version");
|
|
532
|
+
}
|
|
533
|
+
const deleted = this.versions.delete(id);
|
|
534
|
+
if (deleted) {
|
|
535
|
+
this.emit("version:deleted", id);
|
|
536
|
+
this.emitChange("deleted", id);
|
|
537
|
+
}
|
|
538
|
+
return deleted;
|
|
539
|
+
}
|
|
540
|
+
/**
|
|
541
|
+
* List all versions
|
|
542
|
+
*/
|
|
543
|
+
list() {
|
|
544
|
+
return Array.from(this.versions.values()).map((e) => e.version);
|
|
545
|
+
}
|
|
546
|
+
/**
|
|
547
|
+
* Find versions by provider
|
|
548
|
+
*/
|
|
549
|
+
findByProvider(provider) {
|
|
550
|
+
return this.list().filter((v) => v.provider === provider);
|
|
551
|
+
}
|
|
552
|
+
/**
|
|
553
|
+
* Find versions by model
|
|
554
|
+
*/
|
|
555
|
+
findByModel(model) {
|
|
556
|
+
return this.list().filter((v) => v.model === model);
|
|
557
|
+
}
|
|
558
|
+
/**
|
|
559
|
+
* Compare two versions
|
|
560
|
+
*/
|
|
561
|
+
compare(sourceId, targetId) {
|
|
562
|
+
const source = this.get(sourceId);
|
|
563
|
+
const target = this.get(targetId);
|
|
564
|
+
if (!source || !target) {
|
|
565
|
+
throw new Error("One or both versions not found");
|
|
566
|
+
}
|
|
567
|
+
const dimensionChange = target.dimensions - source.dimensions;
|
|
568
|
+
const providerChanged = source.provider !== target.provider;
|
|
569
|
+
const migrationRequired = dimensionChange !== 0 || providerChanged || source.model !== target.model;
|
|
570
|
+
let migrationComplexity = "low";
|
|
571
|
+
if (dimensionChange !== 0) {
|
|
572
|
+
migrationComplexity = "high";
|
|
573
|
+
} else if (providerChanged) {
|
|
574
|
+
migrationComplexity = "medium";
|
|
575
|
+
}
|
|
576
|
+
const notes = [];
|
|
577
|
+
if (dimensionChange > 0) {
|
|
578
|
+
notes.push(`Dimensions increase by ${dimensionChange}`);
|
|
579
|
+
} else if (dimensionChange < 0) {
|
|
580
|
+
notes.push(`Dimensions decrease by ${Math.abs(dimensionChange)}`);
|
|
581
|
+
}
|
|
582
|
+
if (providerChanged) {
|
|
583
|
+
notes.push(
|
|
584
|
+
`Provider changes from ${source.provider} to ${target.provider}`
|
|
585
|
+
);
|
|
586
|
+
}
|
|
587
|
+
if (source.model !== target.model) {
|
|
588
|
+
notes.push(`Model changes from ${source.model} to ${target.model}`);
|
|
589
|
+
}
|
|
590
|
+
return {
|
|
591
|
+
source,
|
|
592
|
+
target,
|
|
593
|
+
compatible: dimensionChange === 0,
|
|
594
|
+
dimensionChange,
|
|
595
|
+
providerChanged,
|
|
596
|
+
migrationRequired,
|
|
597
|
+
migrationComplexity,
|
|
598
|
+
notes
|
|
599
|
+
};
|
|
600
|
+
}
|
|
601
|
+
/**
|
|
602
|
+
* Get upgrade path between versions
|
|
603
|
+
*/
|
|
604
|
+
getUpgradePath(fromId, toId) {
|
|
605
|
+
const comparison = this.compare(fromId, toId);
|
|
606
|
+
return {
|
|
607
|
+
from: fromId,
|
|
608
|
+
to: toId,
|
|
609
|
+
steps: comparison.migrationRequired ? ["backup", "re-embed", "verify", "switch"] : ["switch"],
|
|
610
|
+
direct: !comparison.migrationRequired,
|
|
611
|
+
complexity: comparison.migrationComplexity,
|
|
612
|
+
breakingChanges: comparison.dimensionChange !== 0 ? [`Dimension change: ${comparison.dimensionChange}`] : []
|
|
613
|
+
};
|
|
614
|
+
}
|
|
615
|
+
/**
|
|
616
|
+
* Track usage of a version
|
|
617
|
+
*/
|
|
618
|
+
trackUsage(id, documents = 0, chunks = 0) {
|
|
619
|
+
if (!this.options.trackUsage) return;
|
|
620
|
+
const entry = this.versions.get(id);
|
|
621
|
+
if (!entry) return;
|
|
622
|
+
const now = Date.now();
|
|
623
|
+
if (entry.firstUsed === 0) {
|
|
624
|
+
entry.firstUsed = now;
|
|
625
|
+
}
|
|
626
|
+
entry.lastUsed = now;
|
|
627
|
+
entry.documentCount += documents;
|
|
628
|
+
entry.chunkCount += chunks;
|
|
629
|
+
}
|
|
630
|
+
/**
|
|
631
|
+
* Get usage stats for a version
|
|
632
|
+
*/
|
|
633
|
+
getUsageStats(id) {
|
|
634
|
+
return this.versions.get(id);
|
|
635
|
+
}
|
|
636
|
+
/**
|
|
637
|
+
* Prune old inactive versions
|
|
638
|
+
*/
|
|
639
|
+
pruneOldVersions() {
|
|
640
|
+
const entries = Array.from(this.versions.entries()).filter(([id]) => id !== this.activeVersion).sort((a, b) => a[1].lastUsed - b[1].lastUsed);
|
|
641
|
+
const toRemove = entries.slice(
|
|
642
|
+
0,
|
|
643
|
+
entries.length - (this.options.maxVersions ?? 100) + 1
|
|
644
|
+
);
|
|
645
|
+
for (const [id] of toRemove) {
|
|
646
|
+
this.versions.delete(id);
|
|
647
|
+
this.emitChange("deleted", id);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
/**
|
|
651
|
+
* Emit change event
|
|
652
|
+
*/
|
|
653
|
+
emitChange(type, versionId, previousVersion) {
|
|
654
|
+
this.emit("change", {
|
|
655
|
+
type,
|
|
656
|
+
versionId,
|
|
657
|
+
previousVersion,
|
|
658
|
+
timestamp: Date.now()
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
/**
|
|
662
|
+
* Export registry state
|
|
663
|
+
*/
|
|
664
|
+
export() {
|
|
665
|
+
return {
|
|
666
|
+
versions: Array.from(this.versions.values()),
|
|
667
|
+
activeVersion: this.activeVersion
|
|
668
|
+
};
|
|
669
|
+
}
|
|
670
|
+
/**
|
|
671
|
+
* Import registry state
|
|
672
|
+
*/
|
|
673
|
+
import(data) {
|
|
674
|
+
this.versions.clear();
|
|
675
|
+
for (const entry of data.versions) {
|
|
676
|
+
this.versions.set(entry.version.id, entry);
|
|
677
|
+
}
|
|
678
|
+
this.activeVersion = data.activeVersion;
|
|
679
|
+
}
|
|
680
|
+
};
|
|
681
|
+
function createVersionRegistry(options) {
|
|
682
|
+
return new VersionRegistry(options);
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
// src/quality/DriftDetector.ts
|
|
686
|
+
import EventEmitter3 from "eventemitter3";
|
|
687
|
+
import { nanoid as nanoid2 } from "nanoid";
|
|
688
|
+
var DriftDetector = class extends EventEmitter3 {
|
|
689
|
+
reference = null;
|
|
690
|
+
config;
|
|
691
|
+
monitorInterval;
|
|
692
|
+
sampleBuffer = [];
|
|
693
|
+
constructor(config = {}) {
|
|
694
|
+
super();
|
|
695
|
+
this.config = {
|
|
696
|
+
checkInterval: config.checkInterval ?? 36e5,
|
|
697
|
+
// 1 hour
|
|
698
|
+
sampleSize: config.sampleSize ?? 1e3,
|
|
699
|
+
driftThreshold: config.driftThreshold ?? 0.1,
|
|
700
|
+
alertSeverity: config.alertSeverity ?? "medium",
|
|
701
|
+
autoUpdateBaseline: config.autoUpdateBaseline ?? false,
|
|
702
|
+
baselineUpdateInterval: config.baselineUpdateInterval ?? 864e5,
|
|
703
|
+
// 24 hours
|
|
704
|
+
...config
|
|
705
|
+
};
|
|
706
|
+
}
|
|
707
|
+
/**
|
|
708
|
+
* Set reference distribution from embeddings
|
|
709
|
+
*/
|
|
710
|
+
setReference(embeddings, model, version) {
|
|
711
|
+
if (embeddings.length === 0) {
|
|
712
|
+
throw new Error("Cannot create reference from empty embeddings");
|
|
713
|
+
}
|
|
714
|
+
const dimensions = embeddings[0].length;
|
|
715
|
+
const meanVector = EmbeddingModel.average(embeddings);
|
|
716
|
+
const varianceVector = [];
|
|
717
|
+
for (let d = 0; d < dimensions; d++) {
|
|
718
|
+
const values = embeddings.map((e) => e[d]);
|
|
719
|
+
varianceVector.push(variance(values));
|
|
720
|
+
}
|
|
721
|
+
this.reference = {
|
|
722
|
+
id: nanoid2(),
|
|
723
|
+
model,
|
|
724
|
+
version,
|
|
725
|
+
sampleCount: embeddings.length,
|
|
726
|
+
mean: meanVector,
|
|
727
|
+
variance: varianceVector,
|
|
728
|
+
createdAt: Date.now()
|
|
729
|
+
};
|
|
730
|
+
this.emit("baseline:updated", this.reference);
|
|
731
|
+
return this.reference;
|
|
732
|
+
}
|
|
733
|
+
/**
|
|
734
|
+
* Get current reference distribution
|
|
735
|
+
*/
|
|
736
|
+
getReference() {
|
|
737
|
+
return this.reference;
|
|
738
|
+
}
|
|
739
|
+
/**
|
|
740
|
+
* Detect drift from reference distribution
|
|
741
|
+
*/
|
|
742
|
+
detect(currentEmbeddings) {
|
|
743
|
+
if (!this.reference) {
|
|
744
|
+
throw new Error("No reference distribution set");
|
|
745
|
+
}
|
|
746
|
+
if (currentEmbeddings.length === 0) {
|
|
747
|
+
throw new Error("Cannot detect drift from empty embeddings");
|
|
748
|
+
}
|
|
749
|
+
const dimensions = this.reference.mean.length;
|
|
750
|
+
const currentMean = EmbeddingModel.average(currentEmbeddings);
|
|
751
|
+
const currentVariance = [];
|
|
752
|
+
for (let d = 0; d < dimensions; d++) {
|
|
753
|
+
const values = currentEmbeddings.map((e) => e[d]);
|
|
754
|
+
currentVariance.push(variance(values));
|
|
755
|
+
}
|
|
756
|
+
const comparison = this.compareDistributions(
|
|
757
|
+
this.reference.mean,
|
|
758
|
+
this.reference.variance,
|
|
759
|
+
currentMean,
|
|
760
|
+
currentVariance
|
|
761
|
+
);
|
|
762
|
+
const driftScore = this.calculateDriftScore(comparison);
|
|
763
|
+
const severity = this.determineSeverity(driftScore);
|
|
764
|
+
const affectedDimensions = comparison.dimensionStats?.filter((s) => s.significantChange).length ?? 0;
|
|
765
|
+
const result = {
|
|
766
|
+
driftDetected: driftScore >= (this.config.driftThreshold ?? 0.1),
|
|
767
|
+
severity,
|
|
768
|
+
driftScore,
|
|
769
|
+
affectedDimensionsPercent: affectedDimensions / dimensions * 100,
|
|
770
|
+
meanShift: comparison.meanCosineSimilarity,
|
|
771
|
+
varianceChange: mean(
|
|
772
|
+
currentVariance.map(
|
|
773
|
+
(v, i) => Math.abs(v - this.reference.variance[i]) / (this.reference.variance[i] || 1)
|
|
774
|
+
)
|
|
775
|
+
),
|
|
776
|
+
distributionComparison: comparison,
|
|
777
|
+
detectedAt: Date.now(),
|
|
778
|
+
referenceTimestamp: this.reference.createdAt,
|
|
779
|
+
currentTimestamp: Date.now(),
|
|
780
|
+
recommendations: this.generateRecommendations(driftScore, severity)
|
|
781
|
+
};
|
|
782
|
+
if (result.driftDetected) {
|
|
783
|
+
this.emit("drift:detected", result);
|
|
784
|
+
if (this.shouldAlert(severity)) {
|
|
785
|
+
this.emitAlert(result);
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
return result;
|
|
789
|
+
}
|
|
790
|
+
/**
|
|
791
|
+
* Compare two distributions
|
|
792
|
+
*/
|
|
793
|
+
compareDistributions(refMean, refVariance, curMean, curVariance) {
|
|
794
|
+
const dimensions = refMean.length;
|
|
795
|
+
const meanCosineSimilarity = EmbeddingModel.cosineSimilarity(
|
|
796
|
+
refMean,
|
|
797
|
+
curMean
|
|
798
|
+
);
|
|
799
|
+
let klDivergence = 0;
|
|
800
|
+
for (let d = 0; d < dimensions; d++) {
|
|
801
|
+
const refVar = refVariance[d] || 1e-4;
|
|
802
|
+
const curVar = curVariance[d] || 1e-4;
|
|
803
|
+
const meanDiff = curMean[d] - refMean[d];
|
|
804
|
+
klDivergence += Math.log(Math.sqrt(curVar / refVar)) + (refVar + meanDiff * meanDiff) / (2 * curVar) - 0.5;
|
|
805
|
+
}
|
|
806
|
+
klDivergence = Math.max(0, klDivergence / dimensions);
|
|
807
|
+
const jsDivergence = klDivergence / 2;
|
|
808
|
+
let wassersteinDistance = 0;
|
|
809
|
+
for (let d = 0; d < dimensions; d++) {
|
|
810
|
+
const meanDiff = Math.abs(curMean[d] - refMean[d]);
|
|
811
|
+
const stdDiff = Math.abs(
|
|
812
|
+
Math.sqrt(curVariance[d]) - Math.sqrt(refVariance[d])
|
|
813
|
+
);
|
|
814
|
+
wassersteinDistance += meanDiff + stdDiff;
|
|
815
|
+
}
|
|
816
|
+
wassersteinDistance /= dimensions;
|
|
817
|
+
const dimensionStats = [];
|
|
818
|
+
for (let d = 0; d < dimensions; d++) {
|
|
819
|
+
const meanChange = curMean[d] - refMean[d];
|
|
820
|
+
const varChange = curVariance[d] - refVariance[d];
|
|
821
|
+
const refStd = Math.sqrt(refVariance[d] || 1e-4);
|
|
822
|
+
dimensionStats.push({
|
|
823
|
+
dimension: d,
|
|
824
|
+
referenceMean: refMean[d],
|
|
825
|
+
currentMean: curMean[d],
|
|
826
|
+
meanChange,
|
|
827
|
+
referenceVariance: refVariance[d],
|
|
828
|
+
currentVariance: curVariance[d],
|
|
829
|
+
varianceChange: varChange,
|
|
830
|
+
significantChange: Math.abs(meanChange) > 2 * refStd
|
|
831
|
+
});
|
|
832
|
+
}
|
|
833
|
+
return {
|
|
834
|
+
klDivergence,
|
|
835
|
+
jsDivergence,
|
|
836
|
+
wassersteinDistance,
|
|
837
|
+
meanCosineSimilarity,
|
|
838
|
+
dimensionStats
|
|
839
|
+
};
|
|
840
|
+
}
|
|
841
|
+
/**
|
|
842
|
+
* Calculate overall drift score
|
|
843
|
+
*/
|
|
844
|
+
calculateDriftScore(comparison) {
|
|
845
|
+
const cosineDistance = 1 - comparison.meanCosineSimilarity;
|
|
846
|
+
const klScore = Math.min(1, comparison.klDivergence / 10);
|
|
847
|
+
const wasserstein = Math.min(1, comparison.wassersteinDistance);
|
|
848
|
+
return cosineDistance * 0.4 + klScore * 0.3 + wasserstein * 0.3;
|
|
849
|
+
}
|
|
850
|
+
/**
|
|
851
|
+
* Determine severity based on drift score
|
|
852
|
+
*/
|
|
853
|
+
determineSeverity(driftScore) {
|
|
854
|
+
if (driftScore < 0.05) return "none";
|
|
855
|
+
if (driftScore < 0.1) return "low";
|
|
856
|
+
if (driftScore < 0.2) return "medium";
|
|
857
|
+
if (driftScore < 0.4) return "high";
|
|
858
|
+
return "critical";
|
|
859
|
+
}
|
|
860
|
+
/**
|
|
861
|
+
* Generate recommendations
|
|
862
|
+
*/
|
|
863
|
+
generateRecommendations(driftScore, severity) {
|
|
864
|
+
const recommendations = [];
|
|
865
|
+
if (severity === "none" || severity === "low") {
|
|
866
|
+
recommendations.push("Continue monitoring");
|
|
867
|
+
}
|
|
868
|
+
if (severity === "medium") {
|
|
869
|
+
recommendations.push("Consider updating the baseline distribution");
|
|
870
|
+
recommendations.push("Review recent changes to input data");
|
|
871
|
+
}
|
|
872
|
+
if (severity === "high") {
|
|
873
|
+
recommendations.push("Re-embed affected documents");
|
|
874
|
+
recommendations.push("Update baseline distribution immediately");
|
|
875
|
+
recommendations.push("Investigate root cause of drift");
|
|
876
|
+
}
|
|
877
|
+
if (severity === "critical") {
|
|
878
|
+
recommendations.push("URGENT: Stop accepting new embeddings");
|
|
879
|
+
recommendations.push("Full re-embedding required");
|
|
880
|
+
recommendations.push("Review embedding model for issues");
|
|
881
|
+
}
|
|
882
|
+
return recommendations;
|
|
883
|
+
}
|
|
884
|
+
/**
|
|
885
|
+
* Check if should alert
|
|
886
|
+
*/
|
|
887
|
+
shouldAlert(severity) {
|
|
888
|
+
const severityOrder = ["none", "low", "medium", "high", "critical"];
|
|
889
|
+
const alertLevel = this.config.alertSeverity ?? "medium";
|
|
890
|
+
return severityOrder.indexOf(severity) >= severityOrder.indexOf(alertLevel);
|
|
891
|
+
}
|
|
892
|
+
/**
|
|
893
|
+
* Emit quality alert
|
|
894
|
+
*/
|
|
895
|
+
emitAlert(result) {
|
|
896
|
+
const alert = {
|
|
897
|
+
id: nanoid2(),
|
|
898
|
+
type: "drift_detected",
|
|
899
|
+
severity: result.severity,
|
|
900
|
+
message: `Embedding drift detected with score ${result.driftScore.toFixed(3)}`,
|
|
901
|
+
currentValue: result.driftScore,
|
|
902
|
+
thresholdValue: this.config.driftThreshold,
|
|
903
|
+
createdAt: Date.now(),
|
|
904
|
+
acknowledged: false
|
|
905
|
+
};
|
|
906
|
+
this.emit("drift:alert", alert);
|
|
907
|
+
this.config.onAlert?.(result);
|
|
908
|
+
}
|
|
909
|
+
/**
|
|
910
|
+
* Add sample to buffer for monitoring
|
|
911
|
+
*/
|
|
912
|
+
addSample(embedding) {
|
|
913
|
+
this.sampleBuffer.push(embedding);
|
|
914
|
+
if (this.sampleBuffer.length >= (this.config.sampleSize ?? 1e3)) {
|
|
915
|
+
if (this.reference) {
|
|
916
|
+
this.detect(this.sampleBuffer);
|
|
917
|
+
}
|
|
918
|
+
this.sampleBuffer = [];
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
/**
|
|
922
|
+
* Start continuous monitoring
|
|
923
|
+
*/
|
|
924
|
+
startMonitoring() {
|
|
925
|
+
if (this.monitorInterval) return;
|
|
926
|
+
this.monitorInterval = setInterval(() => {
|
|
927
|
+
if (this.reference && this.sampleBuffer.length >= 100) {
|
|
928
|
+
this.detect(this.sampleBuffer);
|
|
929
|
+
this.sampleBuffer = [];
|
|
930
|
+
}
|
|
931
|
+
}, this.config.checkInterval);
|
|
932
|
+
}
|
|
933
|
+
/**
|
|
934
|
+
* Stop monitoring
|
|
935
|
+
*/
|
|
936
|
+
stopMonitoring() {
|
|
937
|
+
if (this.monitorInterval) {
|
|
938
|
+
clearInterval(this.monitorInterval);
|
|
939
|
+
this.monitorInterval = void 0;
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
};
|
|
943
|
+
function createDriftDetector(config) {
|
|
944
|
+
return new DriftDetector(config);
|
|
945
|
+
}
|
|
946
|
+
export {
|
|
947
|
+
BaseCache,
|
|
948
|
+
BaseChunker,
|
|
949
|
+
BaseProvider,
|
|
950
|
+
BaseStore,
|
|
951
|
+
ChromaStore,
|
|
952
|
+
CodeChunker,
|
|
953
|
+
CohereProvider,
|
|
954
|
+
DriftDetector,
|
|
955
|
+
EmbeddingManager,
|
|
956
|
+
EmbeddingModel,
|
|
957
|
+
FixedChunker,
|
|
958
|
+
HuggingFaceProvider,
|
|
959
|
+
LocalProvider,
|
|
960
|
+
MarkdownChunker,
|
|
961
|
+
MemoryCache,
|
|
962
|
+
MemoryStore,
|
|
963
|
+
ModelRegistry,
|
|
964
|
+
OpenAIProvider,
|
|
965
|
+
PineconeStore,
|
|
966
|
+
QdrantStore,
|
|
967
|
+
RecursiveChunker,
|
|
968
|
+
RedisCache,
|
|
969
|
+
SQLiteCache,
|
|
970
|
+
SemanticChunker,
|
|
971
|
+
TieredCache,
|
|
972
|
+
VersionRegistry,
|
|
973
|
+
VoyageProvider,
|
|
974
|
+
batch,
|
|
975
|
+
cacheKey,
|
|
976
|
+
chunk,
|
|
977
|
+
clamp,
|
|
978
|
+
contentHash,
|
|
979
|
+
createCache,
|
|
980
|
+
createChromaStore,
|
|
981
|
+
createChunker,
|
|
982
|
+
createCodeChunker,
|
|
983
|
+
createCohereProvider,
|
|
984
|
+
createDriftDetector,
|
|
985
|
+
createEmbeddingManager,
|
|
986
|
+
createEventEmitter,
|
|
987
|
+
createFixedChunker,
|
|
988
|
+
createHuggingFaceProvider,
|
|
989
|
+
createLocalProvider,
|
|
990
|
+
createMarkdownChunker,
|
|
991
|
+
createMemoryCache,
|
|
992
|
+
createMemoryStore,
|
|
993
|
+
createMockProvider,
|
|
994
|
+
createOpenAIProvider,
|
|
995
|
+
createPineconeStore,
|
|
996
|
+
createQdrantStore,
|
|
997
|
+
createRandomProvider,
|
|
998
|
+
createRecursiveChunker,
|
|
999
|
+
createRedisCache,
|
|
1000
|
+
createSQLiteCache,
|
|
1001
|
+
createSemanticChunker,
|
|
1002
|
+
createStandardTieredCache,
|
|
1003
|
+
createStore,
|
|
1004
|
+
createTieredCache,
|
|
1005
|
+
createVersionRegistry,
|
|
1006
|
+
createVoyageProvider,
|
|
1007
|
+
deepClone,
|
|
1008
|
+
defaultTokenCounter,
|
|
1009
|
+
deferred,
|
|
1010
|
+
estimateTokens,
|
|
1011
|
+
formatBytes,
|
|
1012
|
+
formatDuration,
|
|
1013
|
+
generateId,
|
|
1014
|
+
mean,
|
|
1015
|
+
measureTime,
|
|
1016
|
+
mergeSmallChunks,
|
|
1017
|
+
modelRegistry,
|
|
1018
|
+
normalize,
|
|
1019
|
+
percentile,
|
|
1020
|
+
retry,
|
|
1021
|
+
sleep,
|
|
1022
|
+
splitByChars,
|
|
1023
|
+
splitBySeparator,
|
|
1024
|
+
splitLargeChunks,
|
|
1025
|
+
stdDev,
|
|
1026
|
+
variance,
|
|
1027
|
+
withConcurrency
|
|
1028
|
+
};
|