@lov3kaizen/agentsea-embeddings 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,1028 @@
1
+ import {
2
+ BaseCache,
3
+ MemoryCache,
4
+ RedisCache,
5
+ SQLiteCache,
6
+ TieredCache,
7
+ createCache,
8
+ createMemoryCache,
9
+ createRedisCache,
10
+ createSQLiteCache,
11
+ createStandardTieredCache,
12
+ createTieredCache
13
+ } from "./chunk-VPSMDBHH.mjs";
14
+ import {
15
+ BaseChunker,
16
+ CodeChunker,
17
+ FixedChunker,
18
+ MarkdownChunker,
19
+ RecursiveChunker,
20
+ SemanticChunker,
21
+ chunk,
22
+ createChunker,
23
+ createCodeChunker,
24
+ createFixedChunker,
25
+ createMarkdownChunker,
26
+ createRecursiveChunker,
27
+ createSemanticChunker,
28
+ defaultTokenCounter,
29
+ mergeSmallChunks,
30
+ splitLargeChunks
31
+ } from "./chunk-DJAURHAS.mjs";
32
+ import {
33
+ BaseProvider,
34
+ CohereProvider,
35
+ HuggingFaceProvider,
36
+ LocalProvider,
37
+ OpenAIProvider,
38
+ VoyageProvider,
39
+ createCohereProvider,
40
+ createHuggingFaceProvider,
41
+ createLocalProvider,
42
+ createMockProvider,
43
+ createOpenAIProvider,
44
+ createRandomProvider,
45
+ createVoyageProvider
46
+ } from "./chunk-NBHIRTJT.mjs";
47
+ import {
48
+ BaseStore,
49
+ ChromaStore,
50
+ MemoryStore,
51
+ PineconeStore,
52
+ QdrantStore,
53
+ createChromaStore,
54
+ createMemoryStore,
55
+ createPineconeStore,
56
+ createQdrantStore,
57
+ createStore
58
+ } from "./chunk-TER262ST.mjs";
59
+ import {
60
+ batch,
61
+ cacheKey,
62
+ clamp,
63
+ contentHash,
64
+ createEventEmitter,
65
+ deepClone,
66
+ deferred,
67
+ estimateTokens,
68
+ formatBytes,
69
+ formatDuration,
70
+ generateId,
71
+ mean,
72
+ measureTime,
73
+ normalize,
74
+ percentile,
75
+ retry,
76
+ sleep,
77
+ splitByChars,
78
+ splitBySeparator,
79
+ stdDev,
80
+ variance,
81
+ withConcurrency
82
+ } from "./chunk-3KM32UQK.mjs";
83
+ import {
84
+ EmbeddingModel,
85
+ ModelRegistry,
86
+ modelRegistry
87
+ } from "./chunk-QAITLJ2E.mjs";
88
+
89
+ // src/core/EmbeddingManager.ts
90
+ import EventEmitter from "eventemitter3";
91
+ var EmbeddingManager = class extends EventEmitter {
92
+ config;
93
+ modelRegistry;
94
+ cache = null;
95
+ chunker = null;
96
+ store = null;
97
+ stats;
98
+ constructor(config = {}) {
99
+ super();
100
+ this.config = {
101
+ defaultModel: config.defaultModel ?? "text-embedding-3-small",
102
+ defaultProvider: config.defaultProvider ?? "openai",
103
+ caching: config.caching ?? true,
104
+ batchSize: config.batchSize ?? 100,
105
+ concurrency: config.concurrency ?? 5,
106
+ retry: {
107
+ maxRetries: config.retry?.maxRetries ?? 3,
108
+ initialDelay: config.retry?.initialDelay ?? 1e3,
109
+ maxDelay: config.retry?.maxDelay ?? 3e4
110
+ }
111
+ };
112
+ this.modelRegistry = new ModelRegistry();
113
+ this.stats = this.createInitialStats();
114
+ }
115
+ createInitialStats() {
116
+ return {
117
+ totalEmbeddings: 0,
118
+ totalTokens: 0,
119
+ avgLatencyMs: 0,
120
+ cacheHitRate: 0,
121
+ apiCalls: 0,
122
+ errors: 0,
123
+ estimatedCostUSD: 0
124
+ };
125
+ }
126
+ /**
127
+ * Register an embedding model
128
+ */
129
+ registerModel(model, isDefault = false) {
130
+ this.modelRegistry.register(model, isDefault);
131
+ if (isDefault) {
132
+ this.config.defaultModel = model.name;
133
+ this.config.defaultProvider = model.provider;
134
+ }
135
+ return this;
136
+ }
137
+ /**
138
+ * Set the cache implementation
139
+ */
140
+ setCache(cache) {
141
+ this.cache = cache;
142
+ return this;
143
+ }
144
+ /**
145
+ * Set the chunker implementation
146
+ */
147
+ setChunker(chunker) {
148
+ this.chunker = chunker;
149
+ return this;
150
+ }
151
+ /**
152
+ * Set the store implementation
153
+ */
154
+ setStore(store) {
155
+ this.store = store;
156
+ return this;
157
+ }
158
+ /**
159
+ * Get the model to use for embedding
160
+ */
161
+ getModel(options) {
162
+ const modelName = options?.model ?? this.config.defaultModel;
163
+ const model = this.modelRegistry.getByKey(
164
+ `${this.config.defaultProvider}:${modelName}`
165
+ ) ?? this.modelRegistry.getDefault();
166
+ if (!model) {
167
+ throw new Error(`No embedding model found. Register a model first.`);
168
+ }
169
+ return model;
170
+ }
171
+ /**
172
+ * Generate embedding for a single text
173
+ */
174
+ async embed(text, options) {
175
+ this.emit("embed:start", text, options);
176
+ const model = this.getModel(options);
177
+ if (this.config.caching && this.cache && !options?.skipCache) {
178
+ const key = cacheKey(text, model.name);
179
+ const cached = await this.cache.get(key);
180
+ if (cached) {
181
+ this.emit("cache:hit", key);
182
+ this.updateStats({ cacheHits: 1 });
183
+ this.emit("embed:complete", { ...cached, cached: true });
184
+ return { ...cached, cached: true };
185
+ }
186
+ this.emit("cache:miss", key);
187
+ }
188
+ try {
189
+ const { result, durationMs } = await measureTime(
190
+ () => model.embed(text, options)
191
+ );
192
+ const finalResult = {
193
+ ...result,
194
+ latencyMs: durationMs,
195
+ cached: false
196
+ };
197
+ if (this.config.caching && this.cache && !options?.skipCache) {
198
+ const key = cacheKey(text, model.name);
199
+ await this.cache.set(key, finalResult);
200
+ }
201
+ this.updateStats({
202
+ embeddings: 1,
203
+ tokens: finalResult.tokenCount,
204
+ latency: durationMs,
205
+ apiCalls: 1,
206
+ cost: this.estimateCost(model, finalResult.tokenCount)
207
+ });
208
+ this.emit("embed:complete", finalResult);
209
+ return finalResult;
210
+ } catch (error) {
211
+ this.stats.errors++;
212
+ this.emit("embed:error", error, text);
213
+ throw error;
214
+ }
215
+ }
216
+ /**
217
+ * Generate embeddings for multiple texts
218
+ */
219
+ async embedBatch(texts, options) {
220
+ this.emit("batch:start", texts, options);
221
+ const model = this.getModel(options);
222
+ const batchSize = options?.concurrency ?? this.config.batchSize;
223
+ const results = [];
224
+ let cacheHits = 0;
225
+ let cacheMisses = 0;
226
+ let failures = 0;
227
+ let totalTokens = 0;
228
+ const startTime = performance.now();
229
+ const cacheResults = /* @__PURE__ */ new Map();
230
+ const textsToEmbed = [];
231
+ if (this.config.caching && this.cache && !options?.skipCache) {
232
+ for (let i = 0; i < texts.length; i++) {
233
+ const key = cacheKey(texts[i], model.name);
234
+ const cached = await this.cache.get(key);
235
+ if (cached) {
236
+ cacheResults.set(i, { ...cached, cached: true });
237
+ cacheHits++;
238
+ } else {
239
+ textsToEmbed.push({ index: i, text: texts[i] });
240
+ cacheMisses++;
241
+ }
242
+ }
243
+ } else {
244
+ textsToEmbed.push(...texts.map((text, index) => ({ index, text })));
245
+ cacheMisses = texts.length;
246
+ }
247
+ const batches = batch(textsToEmbed, batchSize);
248
+ let processedCount = cacheResults.size;
249
+ for (const batchItems of batches) {
250
+ const batchTexts = batchItems.map((item) => item.text);
251
+ try {
252
+ const { result: batchResult2 } = await measureTime(
253
+ () => model.embedBatch(batchTexts, options)
254
+ );
255
+ for (let i = 0; i < batchResult2.results.length; i++) {
256
+ const item = batchItems[i];
257
+ const embeddingResult = batchResult2.results[i];
258
+ cacheResults.set(item.index, embeddingResult);
259
+ if (this.config.caching && this.cache && !options?.skipCache) {
260
+ const key = cacheKey(item.text, model.name);
261
+ await this.cache.set(key, embeddingResult);
262
+ }
263
+ }
264
+ totalTokens += batchResult2.totalTokens;
265
+ processedCount += batchItems.length;
266
+ } catch (error) {
267
+ if (options?.continueOnError) {
268
+ failures += batchItems.length;
269
+ processedCount += batchItems.length;
270
+ } else {
271
+ throw error;
272
+ }
273
+ }
274
+ this.emit("batch:progress", {
275
+ completed: processedCount,
276
+ total: texts.length
277
+ });
278
+ options?.onProgress?.({
279
+ percent: processedCount / texts.length * 100,
280
+ processed: processedCount,
281
+ total: texts.length,
282
+ elapsedMs: performance.now() - startTime
283
+ });
284
+ }
285
+ for (let i = 0; i < texts.length; i++) {
286
+ const result = cacheResults.get(i);
287
+ if (result) {
288
+ results.push(result);
289
+ }
290
+ }
291
+ const totalLatencyMs = performance.now() - startTime;
292
+ const batchResult = {
293
+ results,
294
+ totalTokens,
295
+ totalLatencyMs,
296
+ cacheHits,
297
+ cacheMisses,
298
+ failures
299
+ };
300
+ this.updateStats({
301
+ embeddings: results.length,
302
+ tokens: totalTokens,
303
+ latency: totalLatencyMs / results.length,
304
+ apiCalls: batches.length,
305
+ cacheHits,
306
+ cost: this.estimateCost(model, totalTokens)
307
+ });
308
+ this.emit("batch:complete", batchResult);
309
+ return batchResult;
310
+ }
311
+ /**
312
+ * Embed a document with chunking
313
+ */
314
+ async embedDocument(text, options) {
315
+ if (!this.chunker) {
316
+ throw new Error("No chunker configured. Use setChunker() first.");
317
+ }
318
+ const chunks = await this.chunker.chunk(
319
+ text,
320
+ options
321
+ );
322
+ const chunkTexts = chunks.map((c) => c.text);
323
+ const embedResult = await this.embedBatch(chunkTexts, options);
324
+ const embeddedChunks = [];
325
+ let position = 0;
326
+ for (let i = 0; i < chunks.length; i++) {
327
+ const chunk2 = chunks[i];
328
+ const result = embedResult.results[i];
329
+ if (result) {
330
+ embeddedChunks.push({
331
+ id: generateId("chunk"),
332
+ text: chunk2.text,
333
+ vector: result.vector,
334
+ index: i,
335
+ startPosition: position,
336
+ endPosition: position + chunk2.text.length,
337
+ tokenCount: result.tokenCount,
338
+ metadata: {
339
+ documentId: options?.documentId,
340
+ source: options?.source,
341
+ type: options?.type,
342
+ ...chunk2.metadata,
343
+ ...options?.chunkMetadata
344
+ }
345
+ });
346
+ }
347
+ position += chunk2.text.length;
348
+ }
349
+ if (this.store) {
350
+ await this.store.upsert(embeddedChunks, options?.documentId);
351
+ }
352
+ return embeddedChunks;
353
+ }
354
+ /**
355
+ * Search for similar content
356
+ */
357
+ async search(query, options) {
358
+ if (!this.store) {
359
+ throw new Error("No store configured. Use setStore() first.");
360
+ }
361
+ const queryResult = await this.embed(query);
362
+ return this.store.query(queryResult.vector, options);
363
+ }
364
+ /**
365
+ * Calculate similarity between two texts
366
+ */
367
+ async similarity(text1, text2) {
368
+ const [result1, result2] = await Promise.all([
369
+ this.embed(text1),
370
+ this.embed(text2)
371
+ ]);
372
+ return EmbeddingModel.cosineSimilarity(result1.vector, result2.vector);
373
+ }
374
+ /**
375
+ * Get embedding statistics
376
+ */
377
+ getStats() {
378
+ return { ...this.stats };
379
+ }
380
+ /**
381
+ * Reset statistics
382
+ */
383
+ resetStats() {
384
+ this.stats = this.createInitialStats();
385
+ }
386
+ /**
387
+ * Get registered models
388
+ */
389
+ getModels() {
390
+ return this.modelRegistry.list().map((m) => ({
391
+ provider: m.provider,
392
+ name: m.name,
393
+ dimensions: m.dimensions
394
+ }));
395
+ }
396
+ /**
397
+ * Update statistics
398
+ */
399
+ updateStats(update) {
400
+ if (update.embeddings) {
401
+ this.stats.totalEmbeddings += update.embeddings;
402
+ }
403
+ if (update.tokens) {
404
+ this.stats.totalTokens += update.tokens;
405
+ }
406
+ if (update.latency) {
407
+ const n = this.stats.totalEmbeddings;
408
+ this.stats.avgLatencyMs = (this.stats.avgLatencyMs * (n - 1) + update.latency) / n;
409
+ }
410
+ if (update.apiCalls) {
411
+ this.stats.apiCalls += update.apiCalls;
412
+ }
413
+ if (update.cacheHits !== void 0) {
414
+ const totalLookups = this.stats.totalEmbeddings;
415
+ const currentHits = this.stats.cacheHitRate * (totalLookups - 1);
416
+ this.stats.cacheHitRate = (currentHits + update.cacheHits) / totalLookups;
417
+ }
418
+ if (update.cost) {
419
+ this.stats.estimatedCostUSD += update.cost;
420
+ }
421
+ }
422
+ /**
423
+ * Estimate cost for embedding tokens
424
+ */
425
+ estimateCost(model, tokens) {
426
+ const costPer1K = model.info.costPer1K ?? 0;
427
+ return tokens / 1e3 * costPer1K;
428
+ }
429
+ };
430
+ function createEmbeddingManager(config) {
431
+ return new EmbeddingManager(config);
432
+ }
433
+
434
+ // src/versioning/VersionRegistry.ts
435
+ import { nanoid } from "nanoid";
436
+ import EventEmitter2 from "eventemitter3";
437
+ var VersionRegistry = class extends EventEmitter2 {
438
+ versions = /* @__PURE__ */ new Map();
439
+ activeVersion = null;
440
+ options;
441
+ constructor(options = {}) {
442
+ super();
443
+ this.options = {
444
+ autoRegister: options.autoRegister ?? true,
445
+ trackUsage: options.trackUsage ?? true,
446
+ maxVersions: options.maxVersions ?? 100,
447
+ ...options
448
+ };
449
+ }
450
+ /**
451
+ * Register a new version
452
+ */
453
+ register(version) {
454
+ const newVersion = {
455
+ ...version,
456
+ id: nanoid(),
457
+ createdAt: Date.now(),
458
+ active: false,
459
+ deprecated: false
460
+ };
461
+ const entry = {
462
+ version: newVersion,
463
+ documentCount: 0,
464
+ chunkCount: 0,
465
+ firstUsed: 0,
466
+ lastUsed: 0
467
+ };
468
+ this.versions.set(newVersion.id, entry);
469
+ if (this.versions.size === 1) {
470
+ this.activate(newVersion.id);
471
+ }
472
+ if (this.versions.size > (this.options.maxVersions ?? 100)) {
473
+ this.pruneOldVersions();
474
+ }
475
+ this.emit("version:created", newVersion);
476
+ this.emitChange("created", newVersion.id);
477
+ return newVersion;
478
+ }
479
+ /**
480
+ * Get a version by ID
481
+ */
482
+ get(id) {
483
+ return this.versions.get(id)?.version;
484
+ }
485
+ /**
486
+ * Get the active version
487
+ */
488
+ getActive() {
489
+ if (!this.activeVersion) return void 0;
490
+ return this.versions.get(this.activeVersion)?.version;
491
+ }
492
+ /**
493
+ * Activate a version
494
+ */
495
+ activate(id) {
496
+ const entry = this.versions.get(id);
497
+ if (!entry) {
498
+ throw new Error(`Version ${id} not found`);
499
+ }
500
+ const previousVersion = this.activeVersion ? this.versions.get(this.activeVersion)?.version : void 0;
501
+ if (this.activeVersion && this.activeVersion !== id) {
502
+ const prev = this.versions.get(this.activeVersion);
503
+ if (prev) {
504
+ prev.version.active = false;
505
+ }
506
+ }
507
+ entry.version.active = true;
508
+ this.activeVersion = id;
509
+ this.emit("version:activated", entry.version, previousVersion);
510
+ this.emitChange("activated", id, previousVersion?.id);
511
+ }
512
+ /**
513
+ * Deprecate a version
514
+ */
515
+ deprecate(id, reason, replacement) {
516
+ const entry = this.versions.get(id);
517
+ if (!entry) {
518
+ throw new Error(`Version ${id} not found`);
519
+ }
520
+ entry.version.deprecated = true;
521
+ entry.version.deprecationReason = reason;
522
+ entry.version.replacement = replacement;
523
+ this.emit("version:deprecated", entry.version, reason);
524
+ this.emitChange("deprecated", id);
525
+ }
526
+ /**
527
+ * Delete a version
528
+ */
529
+ delete(id) {
530
+ if (this.activeVersion === id) {
531
+ throw new Error("Cannot delete active version");
532
+ }
533
+ const deleted = this.versions.delete(id);
534
+ if (deleted) {
535
+ this.emit("version:deleted", id);
536
+ this.emitChange("deleted", id);
537
+ }
538
+ return deleted;
539
+ }
540
+ /**
541
+ * List all versions
542
+ */
543
+ list() {
544
+ return Array.from(this.versions.values()).map((e) => e.version);
545
+ }
546
+ /**
547
+ * Find versions by provider
548
+ */
549
+ findByProvider(provider) {
550
+ return this.list().filter((v) => v.provider === provider);
551
+ }
552
+ /**
553
+ * Find versions by model
554
+ */
555
+ findByModel(model) {
556
+ return this.list().filter((v) => v.model === model);
557
+ }
558
+ /**
559
+ * Compare two versions
560
+ */
561
+ compare(sourceId, targetId) {
562
+ const source = this.get(sourceId);
563
+ const target = this.get(targetId);
564
+ if (!source || !target) {
565
+ throw new Error("One or both versions not found");
566
+ }
567
+ const dimensionChange = target.dimensions - source.dimensions;
568
+ const providerChanged = source.provider !== target.provider;
569
+ const migrationRequired = dimensionChange !== 0 || providerChanged || source.model !== target.model;
570
+ let migrationComplexity = "low";
571
+ if (dimensionChange !== 0) {
572
+ migrationComplexity = "high";
573
+ } else if (providerChanged) {
574
+ migrationComplexity = "medium";
575
+ }
576
+ const notes = [];
577
+ if (dimensionChange > 0) {
578
+ notes.push(`Dimensions increase by ${dimensionChange}`);
579
+ } else if (dimensionChange < 0) {
580
+ notes.push(`Dimensions decrease by ${Math.abs(dimensionChange)}`);
581
+ }
582
+ if (providerChanged) {
583
+ notes.push(
584
+ `Provider changes from ${source.provider} to ${target.provider}`
585
+ );
586
+ }
587
+ if (source.model !== target.model) {
588
+ notes.push(`Model changes from ${source.model} to ${target.model}`);
589
+ }
590
+ return {
591
+ source,
592
+ target,
593
+ compatible: dimensionChange === 0,
594
+ dimensionChange,
595
+ providerChanged,
596
+ migrationRequired,
597
+ migrationComplexity,
598
+ notes
599
+ };
600
+ }
601
+ /**
602
+ * Get upgrade path between versions
603
+ */
604
+ getUpgradePath(fromId, toId) {
605
+ const comparison = this.compare(fromId, toId);
606
+ return {
607
+ from: fromId,
608
+ to: toId,
609
+ steps: comparison.migrationRequired ? ["backup", "re-embed", "verify", "switch"] : ["switch"],
610
+ direct: !comparison.migrationRequired,
611
+ complexity: comparison.migrationComplexity,
612
+ breakingChanges: comparison.dimensionChange !== 0 ? [`Dimension change: ${comparison.dimensionChange}`] : []
613
+ };
614
+ }
615
+ /**
616
+ * Track usage of a version
617
+ */
618
+ trackUsage(id, documents = 0, chunks = 0) {
619
+ if (!this.options.trackUsage) return;
620
+ const entry = this.versions.get(id);
621
+ if (!entry) return;
622
+ const now = Date.now();
623
+ if (entry.firstUsed === 0) {
624
+ entry.firstUsed = now;
625
+ }
626
+ entry.lastUsed = now;
627
+ entry.documentCount += documents;
628
+ entry.chunkCount += chunks;
629
+ }
630
+ /**
631
+ * Get usage stats for a version
632
+ */
633
+ getUsageStats(id) {
634
+ return this.versions.get(id);
635
+ }
636
+ /**
637
+ * Prune old inactive versions
638
+ */
639
+ pruneOldVersions() {
640
+ const entries = Array.from(this.versions.entries()).filter(([id]) => id !== this.activeVersion).sort((a, b) => a[1].lastUsed - b[1].lastUsed);
641
+ const toRemove = entries.slice(
642
+ 0,
643
+ entries.length - (this.options.maxVersions ?? 100) + 1
644
+ );
645
+ for (const [id] of toRemove) {
646
+ this.versions.delete(id);
647
+ this.emitChange("deleted", id);
648
+ }
649
+ }
650
+ /**
651
+ * Emit change event
652
+ */
653
+ emitChange(type, versionId, previousVersion) {
654
+ this.emit("change", {
655
+ type,
656
+ versionId,
657
+ previousVersion,
658
+ timestamp: Date.now()
659
+ });
660
+ }
661
+ /**
662
+ * Export registry state
663
+ */
664
+ export() {
665
+ return {
666
+ versions: Array.from(this.versions.values()),
667
+ activeVersion: this.activeVersion
668
+ };
669
+ }
670
+ /**
671
+ * Import registry state
672
+ */
673
+ import(data) {
674
+ this.versions.clear();
675
+ for (const entry of data.versions) {
676
+ this.versions.set(entry.version.id, entry);
677
+ }
678
+ this.activeVersion = data.activeVersion;
679
+ }
680
+ };
681
+ function createVersionRegistry(options) {
682
+ return new VersionRegistry(options);
683
+ }
684
+
685
+ // src/quality/DriftDetector.ts
686
+ import EventEmitter3 from "eventemitter3";
687
+ import { nanoid as nanoid2 } from "nanoid";
688
+ var DriftDetector = class extends EventEmitter3 {
689
+ reference = null;
690
+ config;
691
+ monitorInterval;
692
+ sampleBuffer = [];
693
+ constructor(config = {}) {
694
+ super();
695
+ this.config = {
696
+ checkInterval: config.checkInterval ?? 36e5,
697
+ // 1 hour
698
+ sampleSize: config.sampleSize ?? 1e3,
699
+ driftThreshold: config.driftThreshold ?? 0.1,
700
+ alertSeverity: config.alertSeverity ?? "medium",
701
+ autoUpdateBaseline: config.autoUpdateBaseline ?? false,
702
+ baselineUpdateInterval: config.baselineUpdateInterval ?? 864e5,
703
+ // 24 hours
704
+ ...config
705
+ };
706
+ }
707
+ /**
708
+ * Set reference distribution from embeddings
709
+ */
710
+ setReference(embeddings, model, version) {
711
+ if (embeddings.length === 0) {
712
+ throw new Error("Cannot create reference from empty embeddings");
713
+ }
714
+ const dimensions = embeddings[0].length;
715
+ const meanVector = EmbeddingModel.average(embeddings);
716
+ const varianceVector = [];
717
+ for (let d = 0; d < dimensions; d++) {
718
+ const values = embeddings.map((e) => e[d]);
719
+ varianceVector.push(variance(values));
720
+ }
721
+ this.reference = {
722
+ id: nanoid2(),
723
+ model,
724
+ version,
725
+ sampleCount: embeddings.length,
726
+ mean: meanVector,
727
+ variance: varianceVector,
728
+ createdAt: Date.now()
729
+ };
730
+ this.emit("baseline:updated", this.reference);
731
+ return this.reference;
732
+ }
733
+ /**
734
+ * Get current reference distribution
735
+ */
736
+ getReference() {
737
+ return this.reference;
738
+ }
739
+ /**
740
+ * Detect drift from reference distribution
741
+ */
742
+ detect(currentEmbeddings) {
743
+ if (!this.reference) {
744
+ throw new Error("No reference distribution set");
745
+ }
746
+ if (currentEmbeddings.length === 0) {
747
+ throw new Error("Cannot detect drift from empty embeddings");
748
+ }
749
+ const dimensions = this.reference.mean.length;
750
+ const currentMean = EmbeddingModel.average(currentEmbeddings);
751
+ const currentVariance = [];
752
+ for (let d = 0; d < dimensions; d++) {
753
+ const values = currentEmbeddings.map((e) => e[d]);
754
+ currentVariance.push(variance(values));
755
+ }
756
+ const comparison = this.compareDistributions(
757
+ this.reference.mean,
758
+ this.reference.variance,
759
+ currentMean,
760
+ currentVariance
761
+ );
762
+ const driftScore = this.calculateDriftScore(comparison);
763
+ const severity = this.determineSeverity(driftScore);
764
+ const affectedDimensions = comparison.dimensionStats?.filter((s) => s.significantChange).length ?? 0;
765
+ const result = {
766
+ driftDetected: driftScore >= (this.config.driftThreshold ?? 0.1),
767
+ severity,
768
+ driftScore,
769
+ affectedDimensionsPercent: affectedDimensions / dimensions * 100,
770
+ meanShift: comparison.meanCosineSimilarity,
771
+ varianceChange: mean(
772
+ currentVariance.map(
773
+ (v, i) => Math.abs(v - this.reference.variance[i]) / (this.reference.variance[i] || 1)
774
+ )
775
+ ),
776
+ distributionComparison: comparison,
777
+ detectedAt: Date.now(),
778
+ referenceTimestamp: this.reference.createdAt,
779
+ currentTimestamp: Date.now(),
780
+ recommendations: this.generateRecommendations(driftScore, severity)
781
+ };
782
+ if (result.driftDetected) {
783
+ this.emit("drift:detected", result);
784
+ if (this.shouldAlert(severity)) {
785
+ this.emitAlert(result);
786
+ }
787
+ }
788
+ return result;
789
+ }
790
+ /**
791
+ * Compare two distributions
792
+ */
793
+ compareDistributions(refMean, refVariance, curMean, curVariance) {
794
+ const dimensions = refMean.length;
795
+ const meanCosineSimilarity = EmbeddingModel.cosineSimilarity(
796
+ refMean,
797
+ curMean
798
+ );
799
+ let klDivergence = 0;
800
+ for (let d = 0; d < dimensions; d++) {
801
+ const refVar = refVariance[d] || 1e-4;
802
+ const curVar = curVariance[d] || 1e-4;
803
+ const meanDiff = curMean[d] - refMean[d];
804
+ klDivergence += Math.log(Math.sqrt(curVar / refVar)) + (refVar + meanDiff * meanDiff) / (2 * curVar) - 0.5;
805
+ }
806
+ klDivergence = Math.max(0, klDivergence / dimensions);
807
+ const jsDivergence = klDivergence / 2;
808
+ let wassersteinDistance = 0;
809
+ for (let d = 0; d < dimensions; d++) {
810
+ const meanDiff = Math.abs(curMean[d] - refMean[d]);
811
+ const stdDiff = Math.abs(
812
+ Math.sqrt(curVariance[d]) - Math.sqrt(refVariance[d])
813
+ );
814
+ wassersteinDistance += meanDiff + stdDiff;
815
+ }
816
+ wassersteinDistance /= dimensions;
817
+ const dimensionStats = [];
818
+ for (let d = 0; d < dimensions; d++) {
819
+ const meanChange = curMean[d] - refMean[d];
820
+ const varChange = curVariance[d] - refVariance[d];
821
+ const refStd = Math.sqrt(refVariance[d] || 1e-4);
822
+ dimensionStats.push({
823
+ dimension: d,
824
+ referenceMean: refMean[d],
825
+ currentMean: curMean[d],
826
+ meanChange,
827
+ referenceVariance: refVariance[d],
828
+ currentVariance: curVariance[d],
829
+ varianceChange: varChange,
830
+ significantChange: Math.abs(meanChange) > 2 * refStd
831
+ });
832
+ }
833
+ return {
834
+ klDivergence,
835
+ jsDivergence,
836
+ wassersteinDistance,
837
+ meanCosineSimilarity,
838
+ dimensionStats
839
+ };
840
+ }
841
+ /**
842
+ * Calculate overall drift score
843
+ */
844
+ calculateDriftScore(comparison) {
845
+ const cosineDistance = 1 - comparison.meanCosineSimilarity;
846
+ const klScore = Math.min(1, comparison.klDivergence / 10);
847
+ const wasserstein = Math.min(1, comparison.wassersteinDistance);
848
+ return cosineDistance * 0.4 + klScore * 0.3 + wasserstein * 0.3;
849
+ }
850
+ /**
851
+ * Determine severity based on drift score
852
+ */
853
+ determineSeverity(driftScore) {
854
+ if (driftScore < 0.05) return "none";
855
+ if (driftScore < 0.1) return "low";
856
+ if (driftScore < 0.2) return "medium";
857
+ if (driftScore < 0.4) return "high";
858
+ return "critical";
859
+ }
860
+ /**
861
+ * Generate recommendations
862
+ */
863
+ generateRecommendations(driftScore, severity) {
864
+ const recommendations = [];
865
+ if (severity === "none" || severity === "low") {
866
+ recommendations.push("Continue monitoring");
867
+ }
868
+ if (severity === "medium") {
869
+ recommendations.push("Consider updating the baseline distribution");
870
+ recommendations.push("Review recent changes to input data");
871
+ }
872
+ if (severity === "high") {
873
+ recommendations.push("Re-embed affected documents");
874
+ recommendations.push("Update baseline distribution immediately");
875
+ recommendations.push("Investigate root cause of drift");
876
+ }
877
+ if (severity === "critical") {
878
+ recommendations.push("URGENT: Stop accepting new embeddings");
879
+ recommendations.push("Full re-embedding required");
880
+ recommendations.push("Review embedding model for issues");
881
+ }
882
+ return recommendations;
883
+ }
884
+ /**
885
+ * Check if should alert
886
+ */
887
+ shouldAlert(severity) {
888
+ const severityOrder = ["none", "low", "medium", "high", "critical"];
889
+ const alertLevel = this.config.alertSeverity ?? "medium";
890
+ return severityOrder.indexOf(severity) >= severityOrder.indexOf(alertLevel);
891
+ }
892
+ /**
893
+ * Emit quality alert
894
+ */
895
+ emitAlert(result) {
896
+ const alert = {
897
+ id: nanoid2(),
898
+ type: "drift_detected",
899
+ severity: result.severity,
900
+ message: `Embedding drift detected with score ${result.driftScore.toFixed(3)}`,
901
+ currentValue: result.driftScore,
902
+ thresholdValue: this.config.driftThreshold,
903
+ createdAt: Date.now(),
904
+ acknowledged: false
905
+ };
906
+ this.emit("drift:alert", alert);
907
+ this.config.onAlert?.(result);
908
+ }
909
+ /**
910
+ * Add sample to buffer for monitoring
911
+ */
912
+ addSample(embedding) {
913
+ this.sampleBuffer.push(embedding);
914
+ if (this.sampleBuffer.length >= (this.config.sampleSize ?? 1e3)) {
915
+ if (this.reference) {
916
+ this.detect(this.sampleBuffer);
917
+ }
918
+ this.sampleBuffer = [];
919
+ }
920
+ }
921
+ /**
922
+ * Start continuous monitoring
923
+ */
924
+ startMonitoring() {
925
+ if (this.monitorInterval) return;
926
+ this.monitorInterval = setInterval(() => {
927
+ if (this.reference && this.sampleBuffer.length >= 100) {
928
+ this.detect(this.sampleBuffer);
929
+ this.sampleBuffer = [];
930
+ }
931
+ }, this.config.checkInterval);
932
+ }
933
+ /**
934
+ * Stop monitoring
935
+ */
936
+ stopMonitoring() {
937
+ if (this.monitorInterval) {
938
+ clearInterval(this.monitorInterval);
939
+ this.monitorInterval = void 0;
940
+ }
941
+ }
942
+ };
943
+ function createDriftDetector(config) {
944
+ return new DriftDetector(config);
945
+ }
946
+ export {
947
+ BaseCache,
948
+ BaseChunker,
949
+ BaseProvider,
950
+ BaseStore,
951
+ ChromaStore,
952
+ CodeChunker,
953
+ CohereProvider,
954
+ DriftDetector,
955
+ EmbeddingManager,
956
+ EmbeddingModel,
957
+ FixedChunker,
958
+ HuggingFaceProvider,
959
+ LocalProvider,
960
+ MarkdownChunker,
961
+ MemoryCache,
962
+ MemoryStore,
963
+ ModelRegistry,
964
+ OpenAIProvider,
965
+ PineconeStore,
966
+ QdrantStore,
967
+ RecursiveChunker,
968
+ RedisCache,
969
+ SQLiteCache,
970
+ SemanticChunker,
971
+ TieredCache,
972
+ VersionRegistry,
973
+ VoyageProvider,
974
+ batch,
975
+ cacheKey,
976
+ chunk,
977
+ clamp,
978
+ contentHash,
979
+ createCache,
980
+ createChromaStore,
981
+ createChunker,
982
+ createCodeChunker,
983
+ createCohereProvider,
984
+ createDriftDetector,
985
+ createEmbeddingManager,
986
+ createEventEmitter,
987
+ createFixedChunker,
988
+ createHuggingFaceProvider,
989
+ createLocalProvider,
990
+ createMarkdownChunker,
991
+ createMemoryCache,
992
+ createMemoryStore,
993
+ createMockProvider,
994
+ createOpenAIProvider,
995
+ createPineconeStore,
996
+ createQdrantStore,
997
+ createRandomProvider,
998
+ createRecursiveChunker,
999
+ createRedisCache,
1000
+ createSQLiteCache,
1001
+ createSemanticChunker,
1002
+ createStandardTieredCache,
1003
+ createStore,
1004
+ createTieredCache,
1005
+ createVersionRegistry,
1006
+ createVoyageProvider,
1007
+ deepClone,
1008
+ defaultTokenCounter,
1009
+ deferred,
1010
+ estimateTokens,
1011
+ formatBytes,
1012
+ formatDuration,
1013
+ generateId,
1014
+ mean,
1015
+ measureTime,
1016
+ mergeSmallChunks,
1017
+ modelRegistry,
1018
+ normalize,
1019
+ percentile,
1020
+ retry,
1021
+ sleep,
1022
+ splitByChars,
1023
+ splitBySeparator,
1024
+ splitLargeChunks,
1025
+ stdDev,
1026
+ variance,
1027
+ withConcurrency
1028
+ };