@snap-agent/rag-ecommerce 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,999 @@
1
+ // src/EcommerceRAGPlugin.ts
2
+ import { MongoClient } from "mongodb";
3
+ import OpenAI from "openai";
4
+ var EcommerceRAGPlugin = class {
5
+ constructor(config) {
6
+ this.name = "ecommerce-rag";
7
+ this.type = "rag";
8
+ this.db = null;
9
+ // Caching layers
10
+ this.embeddingCache = /* @__PURE__ */ new Map();
11
+ this.attributeCache = /* @__PURE__ */ new Map();
12
+ // Cache statistics
13
+ this.cacheStats = {
14
+ embeddings: { hits: 0, misses: 0 },
15
+ attributes: { hits: 0, misses: 0 }
16
+ };
17
+ this.config = {
18
+ dbName: "agentStudio",
19
+ collection: "products",
20
+ embeddingModel: "voyage-multilingual-2",
21
+ attributeList: [
22
+ "category",
23
+ "color",
24
+ "gender",
25
+ "brand",
26
+ "material",
27
+ "size",
28
+ "season",
29
+ "priceMin",
30
+ "priceMax"
31
+ ],
32
+ enableAttributeExtraction: true,
33
+ numCandidates: 200,
34
+ limit: 50,
35
+ vectorIndexName: "product_vector_index",
36
+ rescoringWeights: {
37
+ color: 0.15,
38
+ size: 0.1,
39
+ material: 0.1,
40
+ category: 0.12,
41
+ brand: 0.08,
42
+ popularity: 0.05,
43
+ ctr: 0.1,
44
+ sales: 0.1
45
+ },
46
+ enableReranking: false,
47
+ rerankTopK: 10,
48
+ contextProductCount: 8,
49
+ language: "es",
50
+ includeOutOfStock: false,
51
+ priority: 10,
52
+ ...config,
53
+ cache: {
54
+ embeddings: {
55
+ enabled: config.cache?.embeddings?.enabled ?? true,
56
+ ttl: config.cache?.embeddings?.ttl ?? 36e5,
57
+ // 1 hour
58
+ maxSize: config.cache?.embeddings?.maxSize ?? 1e3
59
+ },
60
+ attributes: {
61
+ enabled: config.cache?.attributes?.enabled ?? true,
62
+ ttl: config.cache?.attributes?.ttl ?? 18e5,
63
+ // 30 minutes
64
+ maxSize: config.cache?.attributes?.maxSize ?? 500
65
+ }
66
+ }
67
+ };
68
+ this.priority = this.config.priority;
69
+ this.client = new MongoClient(this.config.mongoUri);
70
+ this.openai = new OpenAI({ apiKey: this.config.openaiApiKey });
71
+ this.startCacheCleanup();
72
+ }
73
+ async ensureConnection() {
74
+ if (!this.db) {
75
+ await this.client.connect();
76
+ this.db = this.client.db(this.config.dbName);
77
+ }
78
+ return this.db;
79
+ }
80
+ /**
81
+ * Main retrieval method - called by the SDK
82
+ */
83
+ async retrieveContext(message, options) {
84
+ const queryVector = await this.embedText(message);
85
+ let attributes = {};
86
+ if (this.config.enableAttributeExtraction) {
87
+ attributes = await this.extractAttributes(message);
88
+ }
89
+ const searchResults = await this.vectorSearch({
90
+ queryVector,
91
+ agentId: options.agentId,
92
+ hardFilters: options.filters || {}
93
+ });
94
+ const rescored = this.softRescore(searchResults, attributes);
95
+ let final = rescored;
96
+ if (this.config.enableReranking) {
97
+ final = await this.rerank(message, rescored);
98
+ }
99
+ if (!this.config.includeOutOfStock) {
100
+ final = final.filter((p) => p.inStock !== false);
101
+ }
102
+ return {
103
+ content: this.buildContextString(final),
104
+ sources: final.slice(0, this.config.contextProductCount).map((p) => ({
105
+ id: p.sku,
106
+ title: p.title,
107
+ score: p.vectorSearchScore,
108
+ type: "product",
109
+ attributes: p.attributes,
110
+ inStock: p.inStock
111
+ })),
112
+ metadata: {
113
+ productCount: final.length,
114
+ extractedAttributes: attributes,
115
+ topProducts: final.slice(0, 3).map((p) => ({
116
+ sku: p.sku,
117
+ title: p.title,
118
+ score: p.vectorSearchScore
119
+ }))
120
+ }
121
+ };
122
+ }
123
+ /**
124
+ * Format context for LLM
125
+ */
126
+ formatContext(context) {
127
+ return context.content;
128
+ }
129
+ // ============================================================================
130
+ // Private Methods
131
+ // ============================================================================
132
+ /**
133
+ * Embed text using Voyage with caching
134
+ */
135
+ async embedText(text) {
136
+ if (this.config.cache?.embeddings?.enabled) {
137
+ const cacheKey = `${this.config.embeddingModel}:${text}`;
138
+ const cached = this.embeddingCache.get(cacheKey);
139
+ if (cached) {
140
+ const age = Date.now() - cached.timestamp;
141
+ if (age < (this.config.cache?.embeddings?.ttl ?? 36e5)) {
142
+ this.cacheStats.embeddings.hits++;
143
+ return cached.value;
144
+ } else {
145
+ this.embeddingCache.delete(cacheKey);
146
+ }
147
+ }
148
+ this.cacheStats.embeddings.misses++;
149
+ }
150
+ const response = await fetch("https://api.voyageai.com/v1/embeddings", {
151
+ method: "POST",
152
+ headers: {
153
+ "Content-Type": "application/json",
154
+ "Authorization": `Bearer ${this.config.voyageApiKey}`
155
+ },
156
+ body: JSON.stringify({
157
+ input: text,
158
+ model: this.config.embeddingModel
159
+ })
160
+ });
161
+ if (!response.ok) {
162
+ throw new Error(`Voyage API error: ${response.statusText}`);
163
+ }
164
+ const data = await response.json();
165
+ const embedding = data.data[0].embedding;
166
+ if (this.config.cache?.embeddings?.enabled) {
167
+ const cacheKey = `${this.config.embeddingModel}:${text}`;
168
+ const maxSize = this.config.cache?.embeddings?.maxSize ?? 1e3;
169
+ if (this.embeddingCache.size >= maxSize) {
170
+ const firstKey = this.embeddingCache.keys().next().value;
171
+ if (firstKey) {
172
+ this.embeddingCache.delete(firstKey);
173
+ }
174
+ }
175
+ this.embeddingCache.set(cacheKey, {
176
+ value: embedding,
177
+ timestamp: Date.now()
178
+ });
179
+ }
180
+ return embedding;
181
+ }
182
+ /**
183
+ * Extract attributes from user message using OpenAI with caching
184
+ */
185
+ async extractAttributes(message) {
186
+ if (this.config.cache?.attributes?.enabled) {
187
+ const cacheKey = message.toLowerCase().trim();
188
+ const cached = this.attributeCache.get(cacheKey);
189
+ if (cached) {
190
+ const age = Date.now() - cached.timestamp;
191
+ if (age < (this.config.cache?.attributes?.ttl ?? 18e5)) {
192
+ this.cacheStats.attributes.hits++;
193
+ return cached.value;
194
+ } else {
195
+ this.attributeCache.delete(cacheKey);
196
+ }
197
+ }
198
+ this.cacheStats.attributes.misses++;
199
+ }
200
+ let attrs = {};
201
+ try {
202
+ const completion = await this.openai.chat.completions.create({
203
+ model: "gpt-4o-mini",
204
+ messages: [
205
+ {
206
+ role: "system",
207
+ content: `Extract product attributes from the user message. Return a JSON object with only the attributes you can identify from this list: ${this.config.attributeList.join(", ")}. If an attribute is not mentioned, omit it from the response.`
208
+ },
209
+ {
210
+ role: "user",
211
+ content: message
212
+ }
213
+ ],
214
+ response_format: { type: "json_object" },
215
+ temperature: 0.1
216
+ });
217
+ const extracted = JSON.parse(completion.choices[0]?.message?.content || "{}");
218
+ if (extracted.category) attrs.category = String(extracted.category);
219
+ if (extracted.color) attrs.color = String(extracted.color);
220
+ if (extracted.gender) {
221
+ const g = String(extracted.gender).toLowerCase();
222
+ if (g.includes("hombre") || g.includes("male") || g.includes("man") || g === "m") {
223
+ attrs.gender = "M";
224
+ } else if (g.includes("mujer") || g.includes("female") || g.includes("woman") || g === "f") {
225
+ attrs.gender = "F";
226
+ } else if (g.includes("unisex")) {
227
+ attrs.gender = "Unisex";
228
+ }
229
+ }
230
+ if (extracted.brand) attrs.brand = String(extracted.brand);
231
+ if (extracted.material) attrs.material = String(extracted.material);
232
+ if (extracted.size) attrs.size = String(extracted.size);
233
+ if (extracted.season) attrs.season = String(extracted.season);
234
+ if (extracted.priceMin) attrs.priceMin = Number(extracted.priceMin);
235
+ if (extracted.priceMax) attrs.priceMax = Number(extracted.priceMax);
236
+ } catch (error) {
237
+ console.error("Attribute extraction failed:", error);
238
+ return {};
239
+ }
240
+ if (this.config.cache?.attributes?.enabled) {
241
+ const cacheKey = message.toLowerCase().trim();
242
+ const maxSize = this.config.cache?.attributes?.maxSize ?? 500;
243
+ if (this.attributeCache.size >= maxSize) {
244
+ const firstKey = this.attributeCache.keys().next().value;
245
+ if (firstKey) {
246
+ this.attributeCache.delete(firstKey);
247
+ }
248
+ }
249
+ this.attributeCache.set(cacheKey, {
250
+ value: attrs,
251
+ timestamp: Date.now()
252
+ });
253
+ }
254
+ return attrs;
255
+ }
256
+ /**
257
+ * MongoDB Atlas Vector Search
258
+ */
259
+ async vectorSearch(options) {
260
+ const db = await this.ensureConnection();
261
+ const collection = db.collection(this.config.collection);
262
+ const filter = { tenantId: this.config.tenantId };
263
+ if (options.agentId) {
264
+ filter.agentId = options.agentId;
265
+ }
266
+ Object.entries(options.hardFilters).forEach(([key, value]) => {
267
+ if (value !== void 0 && value !== null) {
268
+ filter[key] = value;
269
+ }
270
+ });
271
+ const pipeline = [
272
+ {
273
+ $vectorSearch: {
274
+ index: this.config.vectorIndexName,
275
+ path: "embedding",
276
+ queryVector: options.queryVector,
277
+ numCandidates: this.config.numCandidates,
278
+ limit: this.config.limit,
279
+ filter
280
+ }
281
+ },
282
+ {
283
+ $addFields: {
284
+ vectorSearchScore: { $meta: "vectorSearchScore" }
285
+ }
286
+ }
287
+ ];
288
+ const results = await collection.aggregate(pipeline).toArray();
289
+ return results;
290
+ }
291
+ /**
292
+ * Soft rescore based on attributes and metrics
293
+ */
294
+ softRescore(results, attrs) {
295
+ const weights = this.config.rescoringWeights;
296
+ return results.map((product) => {
297
+ let boost = product.vectorSearchScore || 0;
298
+ if (attrs.color && product.attributes.color) {
299
+ const match = product.attributes.color.toLowerCase() === attrs.color.toLowerCase();
300
+ if (match) boost += weights.color;
301
+ }
302
+ if (attrs.size && product.attributes.size) {
303
+ const match = product.attributes.size.some(
304
+ (s) => s.toLowerCase() === attrs.size?.toLowerCase()
305
+ );
306
+ if (match) boost += weights.size;
307
+ }
308
+ if (attrs.material && product.attributes.material) {
309
+ const match = product.attributes.material.toLowerCase() === attrs.material.toLowerCase();
310
+ if (match) boost += weights.material;
311
+ }
312
+ if (attrs.category && product.attributes.category) {
313
+ const match = product.attributes.category.toLowerCase() === attrs.category.toLowerCase();
314
+ if (match) boost += weights.category;
315
+ }
316
+ if (attrs.brand && product.attributes.brand) {
317
+ const match = product.attributes.brand.toLowerCase() === attrs.brand.toLowerCase();
318
+ if (match) boost += weights.brand;
319
+ }
320
+ if (attrs.priceMax && product.attributes.price) {
321
+ const withinBudget = product.attributes.price <= attrs.priceMax;
322
+ if (withinBudget) {
323
+ const proximity = 1 - product.attributes.price / attrs.priceMax;
324
+ boost += Math.max(0, proximity * 0.1);
325
+ }
326
+ }
327
+ if (product.metrics?.popularity) {
328
+ boost += Math.min(product.metrics.popularity * weights.popularity, 0.2);
329
+ }
330
+ if (product.metrics?.ctr) {
331
+ boost += Math.min(product.metrics.ctr * weights.ctr, 0.15);
332
+ }
333
+ if (product.metrics?.sales) {
334
+ const normalizedSales = Math.log10(product.metrics.sales + 1) / 10;
335
+ boost += Math.min(normalizedSales * weights.sales, 0.1);
336
+ }
337
+ return { ...product, vectorSearchScore: boost };
338
+ }).sort((a, b) => (b.vectorSearchScore || 0) - (a.vectorSearchScore || 0));
339
+ }
340
+ /**
341
+ * Optional Voyage reranking
342
+ */
343
+ async rerank(query, products) {
344
+ if (products.length === 0) return products;
345
+ try {
346
+ const docTexts = products.map(
347
+ (p) => `${p.title}. ${p.description || ""}. ${Object.entries(p.attributes).filter(([_, v]) => v).map(([k, v]) => `${k}: ${v}`).join(", ")}`
348
+ );
349
+ const response = await fetch("https://api.voyageai.com/v1/rerank", {
350
+ method: "POST",
351
+ headers: {
352
+ "Content-Type": "application/json",
353
+ "Authorization": `Bearer ${this.config.voyageApiKey}`
354
+ },
355
+ body: JSON.stringify({
356
+ query,
357
+ documents: docTexts,
358
+ model: "rerank-2",
359
+ top_k: this.config.rerankTopK
360
+ })
361
+ });
362
+ if (!response.ok) {
363
+ throw new Error(`Voyage rerank error: ${response.statusText}`);
364
+ }
365
+ const data = await response.json();
366
+ const scores = data.data.map((item) => item.relevance_score);
367
+ const reranked = products.map((doc, idx) => ({
368
+ ...doc,
369
+ vectorSearchScore: (doc.vectorSearchScore || 0) * 0.5 + (scores[idx] || 0) * 0.5
370
+ }));
371
+ return reranked.sort((a, b) => (b.vectorSearchScore || 0) - (a.vectorSearchScore || 0)).slice(0, this.config.rerankTopK);
372
+ } catch (error) {
373
+ console.error("Reranking failed:", error);
374
+ return products;
375
+ }
376
+ }
377
+ /**
378
+ * Build context string for LLM
379
+ */
380
+ buildContextString(products) {
381
+ const limited = products.slice(0, this.config.contextProductCount);
382
+ if (limited.length === 0) {
383
+ return this.config.language === "es" ? "No se encontraron productos en el cat\xE1logo." : "No products found in the catalog.";
384
+ }
385
+ const productBlocks = limited.map((product, idx) => {
386
+ const attrs = [];
387
+ if (product.attributes.category) attrs.push(`Category: ${product.attributes.category}`);
388
+ if (product.attributes.brand) attrs.push(`Brand: ${product.attributes.brand}`);
389
+ if (product.attributes.color) attrs.push(`Color: ${product.attributes.color}`);
390
+ if (product.attributes.material) attrs.push(`Material: ${product.attributes.material}`);
391
+ if (product.attributes.size?.length) {
392
+ attrs.push(`Sizes: ${product.attributes.size.join(", ")}`);
393
+ }
394
+ if (product.attributes.price !== void 0) {
395
+ attrs.push(`Price: $${product.attributes.price.toFixed(2)}`);
396
+ }
397
+ if (product.inStock !== void 0) {
398
+ attrs.push(product.inStock ? "In Stock" : "Out of Stock");
399
+ }
400
+ return `${idx + 1}. ${product.title}
401
+ SKU: ${product.sku}
402
+ ${product.description || ""}
403
+ ${attrs.join(" | ")}`;
404
+ });
405
+ const header = this.config.language === "es" ? "PRODUCTOS DISPONIBLES EN EL CAT\xC1LOGO:" : "AVAILABLE PRODUCTS IN CATALOG:";
406
+ return `${header}
407
+
408
+ ${productBlocks.join("\n\n")}`;
409
+ }
410
+ // ============================================================================
411
+ // Cache Management
412
+ // ============================================================================
413
+ /**
414
+ * Start periodic cache cleanup (remove expired entries)
415
+ */
416
+ startCacheCleanup() {
417
+ this.cleanupInterval = setInterval(() => {
418
+ this.cleanupExpiredCache();
419
+ }, 3e5);
420
+ }
421
+ /**
422
+ * Clean up expired cache entries
423
+ */
424
+ cleanupExpiredCache() {
425
+ const now = Date.now();
426
+ if (this.config.cache?.embeddings?.enabled) {
427
+ const ttl = this.config.cache?.embeddings?.ttl ?? 36e5;
428
+ for (const [key, entry] of this.embeddingCache.entries()) {
429
+ if (now - entry.timestamp >= ttl) {
430
+ this.embeddingCache.delete(key);
431
+ }
432
+ }
433
+ }
434
+ if (this.config.cache?.attributes?.enabled) {
435
+ const ttl = this.config.cache?.attributes?.ttl ?? 18e5;
436
+ for (const [key, entry] of this.attributeCache.entries()) {
437
+ if (now - entry.timestamp >= ttl) {
438
+ this.attributeCache.delete(key);
439
+ }
440
+ }
441
+ }
442
+ }
443
+ /**
444
+ * Get cache statistics
445
+ */
446
+ getCacheStats() {
447
+ return {
448
+ embeddings: {
449
+ size: this.embeddingCache.size,
450
+ maxSize: this.config.cache?.embeddings?.maxSize ?? 1e3,
451
+ hits: this.cacheStats.embeddings.hits,
452
+ misses: this.cacheStats.embeddings.misses,
453
+ hitRate: this.cacheStats.embeddings.hits + this.cacheStats.embeddings.misses > 0 ? (this.cacheStats.embeddings.hits / (this.cacheStats.embeddings.hits + this.cacheStats.embeddings.misses)).toFixed(2) : "0.00"
454
+ },
455
+ attributes: {
456
+ size: this.attributeCache.size,
457
+ maxSize: this.config.cache?.attributes?.maxSize ?? 500,
458
+ hits: this.cacheStats.attributes.hits,
459
+ misses: this.cacheStats.attributes.misses,
460
+ hitRate: this.cacheStats.attributes.hits + this.cacheStats.attributes.misses > 0 ? (this.cacheStats.attributes.hits / (this.cacheStats.attributes.hits + this.cacheStats.attributes.misses)).toFixed(2) : "0.00"
461
+ }
462
+ };
463
+ }
464
+ /**
465
+ * Clear all caches
466
+ */
467
+ clearCache() {
468
+ this.embeddingCache.clear();
469
+ this.attributeCache.clear();
470
+ this.cacheStats = {
471
+ embeddings: { hits: 0, misses: 0 },
472
+ attributes: { hits: 0, misses: 0 }
473
+ };
474
+ }
475
+ /**
476
+ * Get MongoDB collection
477
+ */
478
+ async getCollection() {
479
+ const db = await this.ensureConnection();
480
+ return db.collection(this.config.collection);
481
+ }
482
+ /**
483
+ * Generate embedding for a single text
484
+ */
485
+ async generateEmbedding(text) {
486
+ return this.embedText(text);
487
+ }
488
+ /**
489
+ * Ingest products into the RAG system
490
+ * Converts RAGDocuments to ProductDocs and indexes them with embeddings
491
+ */
492
+ async ingest(documents, options) {
493
+ const collection = await this.getCollection();
494
+ let indexed = 0;
495
+ let failed = 0;
496
+ const errors = [];
497
+ try {
498
+ const batchSize = options?.batchSize || 10;
499
+ for (let i = 0; i < documents.length; i += batchSize) {
500
+ const batch = documents.slice(i, i + batchSize);
501
+ const textsForEmbedding = batch.map((doc) => {
502
+ const metadata = doc.metadata || {};
503
+ const attributeParts = [];
504
+ const attributeFields = ["category", "brand", "color", "material", "size", "gender", "season"];
505
+ for (const field of attributeFields) {
506
+ if (metadata[field]) {
507
+ const value = Array.isArray(metadata[field]) ? metadata[field].join(", ") : metadata[field];
508
+ attributeParts.push(`${field}: ${value}`);
509
+ }
510
+ }
511
+ return attributeParts.length > 0 ? `${doc.content}. ${attributeParts.join(", ")}` : doc.content;
512
+ });
513
+ const embeddings = await this.generateEmbeddingsBatch(textsForEmbedding);
514
+ const productDocs = batch.map((doc, idx) => {
515
+ const metadata = doc.metadata || {};
516
+ return {
517
+ tenantId: this.config.tenantId,
518
+ agentId: options?.agentId,
519
+ sku: doc.id,
520
+ title: metadata.title || doc.content.substring(0, 100),
521
+ description: metadata.description || doc.content,
522
+ embedding: embeddings[idx],
523
+ attributes: {
524
+ category: metadata.category,
525
+ brand: metadata.brand,
526
+ color: metadata.color,
527
+ material: metadata.material,
528
+ size: metadata.size,
529
+ gender: metadata.gender,
530
+ season: metadata.season,
531
+ price: metadata.price,
532
+ ...metadata.attributes
533
+ },
534
+ inStock: metadata.inStock !== false,
535
+ metrics: metadata.metrics || {}
536
+ };
537
+ });
538
+ try {
539
+ if (options?.overwrite) {
540
+ const bulkOps = productDocs.map((doc) => ({
541
+ replaceOne: {
542
+ filter: {
543
+ tenantId: this.config.tenantId,
544
+ sku: doc.sku,
545
+ ...options.agentId ? { agentId: options.agentId } : {}
546
+ },
547
+ replacement: doc,
548
+ upsert: true
549
+ }
550
+ }));
551
+ const result = await collection.bulkWrite(bulkOps);
552
+ indexed += result.upsertedCount + result.modifiedCount;
553
+ } else if (options?.skipExisting) {
554
+ const existingSkus = await collection.find({
555
+ tenantId: this.config.tenantId,
556
+ sku: { $in: productDocs.map((d) => d.sku) },
557
+ ...options.agentId ? { agentId: options.agentId } : {}
558
+ }).project({ sku: 1 }).toArray();
559
+ const existingSet = new Set(existingSkus.map((d) => d.sku));
560
+ const newDocs = productDocs.filter((d) => !existingSet.has(d.sku));
561
+ if (newDocs.length > 0) {
562
+ const result = await collection.insertMany(newDocs);
563
+ indexed += result.insertedCount;
564
+ }
565
+ failed += productDocs.length - newDocs.length;
566
+ } else {
567
+ const bulkOps = productDocs.map((doc) => ({
568
+ updateOne: {
569
+ filter: {
570
+ tenantId: this.config.tenantId,
571
+ sku: doc.sku,
572
+ ...options?.agentId ? { agentId: options.agentId } : {}
573
+ },
574
+ update: { $set: doc },
575
+ upsert: true
576
+ }
577
+ }));
578
+ const result = await collection.bulkWrite(bulkOps);
579
+ indexed += result.upsertedCount + result.modifiedCount;
580
+ }
581
+ } catch (error) {
582
+ batch.forEach((doc) => {
583
+ failed++;
584
+ errors.push({
585
+ id: doc.id,
586
+ error: error.message || "Unknown error during insertion"
587
+ });
588
+ });
589
+ }
590
+ }
591
+ return {
592
+ success: failed === 0,
593
+ indexed,
594
+ failed,
595
+ errors: errors.length > 0 ? errors : void 0,
596
+ metadata: {
597
+ batchSize: options?.batchSize || 10,
598
+ totalDocuments: documents.length
599
+ }
600
+ };
601
+ } catch (error) {
602
+ return {
603
+ success: false,
604
+ indexed,
605
+ failed: documents.length - indexed,
606
+ errors: [{
607
+ id: "batch",
608
+ error: error.message || "Unknown error during ingestion"
609
+ }]
610
+ };
611
+ }
612
+ }
613
+ /**
614
+ * Update a single product
615
+ */
616
+ async update(id, document, options) {
617
+ const collection = await this.getCollection();
618
+ const update = {};
619
+ const metadata = document.metadata || {};
620
+ if (document.content) {
621
+ const attributeParts = [];
622
+ const attributeFields = ["category", "brand", "color", "material", "size", "gender", "season"];
623
+ for (const field of attributeFields) {
624
+ if (metadata[field]) {
625
+ const value = Array.isArray(metadata[field]) ? metadata[field].join(", ") : metadata[field];
626
+ attributeParts.push(`${field}: ${value}`);
627
+ }
628
+ }
629
+ const textForEmbedding = attributeParts.length > 0 ? `${document.content}. ${attributeParts.join(", ")}` : document.content;
630
+ const embedding = await this.generateEmbedding(textForEmbedding);
631
+ update.embedding = embedding;
632
+ update.description = document.content;
633
+ }
634
+ if (document.metadata) {
635
+ if (metadata.title) update.title = metadata.title;
636
+ if (metadata.inStock !== void 0) update.inStock = metadata.inStock;
637
+ const attributeUpdates = {};
638
+ const metricUpdates = {};
639
+ const attributeFields = ["category", "brand", "color", "material", "size", "gender", "season", "price"];
640
+ attributeFields.forEach((field) => {
641
+ if (metadata[field] !== void 0) {
642
+ attributeUpdates[`attributes.${field}`] = metadata[field];
643
+ }
644
+ });
645
+ if (metadata.metrics) {
646
+ Object.entries(metadata.metrics).forEach(([key, value]) => {
647
+ metricUpdates[`metrics.${key}`] = value;
648
+ });
649
+ }
650
+ Object.assign(update, attributeUpdates, metricUpdates);
651
+ if (metadata.attributes) {
652
+ Object.entries(metadata.attributes).forEach(([key, value]) => {
653
+ update[`attributes.${key}`] = value;
654
+ });
655
+ }
656
+ }
657
+ await collection.updateOne(
658
+ {
659
+ tenantId: this.config.tenantId,
660
+ sku: id,
661
+ ...options?.agentId ? { agentId: options.agentId } : {}
662
+ },
663
+ { $set: update }
664
+ );
665
+ }
666
+ /**
667
+ * Delete product(s) by SKU
668
+ */
669
+ async delete(ids, options) {
670
+ const collection = await this.getCollection();
671
+ const skuArray = Array.isArray(ids) ? ids : [ids];
672
+ const result = await collection.deleteMany({
673
+ tenantId: this.config.tenantId,
674
+ sku: { $in: skuArray },
675
+ ...options?.agentId ? { agentId: options.agentId } : {}
676
+ });
677
+ return result.deletedCount;
678
+ }
679
+ /**
680
+ * Bulk operations for efficient batch processing
681
+ */
682
+ async bulk(operations, options) {
683
+ let inserted = 0;
684
+ let updated = 0;
685
+ let deleted = 0;
686
+ let failed = 0;
687
+ const errors = [];
688
+ for (const op of operations) {
689
+ try {
690
+ switch (op.type) {
691
+ case "insert":
692
+ if (op.document) {
693
+ await this.ingest([op.document], options);
694
+ inserted++;
695
+ }
696
+ break;
697
+ case "update":
698
+ if (op.document) {
699
+ await this.update(op.id, op.document, options);
700
+ updated++;
701
+ }
702
+ break;
703
+ case "delete":
704
+ const count = await this.delete(op.id, options);
705
+ deleted += count;
706
+ break;
707
+ }
708
+ } catch (error) {
709
+ failed++;
710
+ errors.push({
711
+ id: op.id,
712
+ operation: op.type,
713
+ error: error.message || "Unknown error"
714
+ });
715
+ }
716
+ }
717
+ return {
718
+ success: failed === 0,
719
+ inserted,
720
+ updated,
721
+ deleted,
722
+ failed,
723
+ errors: errors.length > 0 ? errors : void 0
724
+ };
725
+ }
726
+ /**
727
+ * Generate embeddings for a batch of texts
728
+ */
729
+ async generateEmbeddingsBatch(texts) {
730
+ const embeddings = [];
731
+ const cacheConfig = this.config.cache?.embeddings;
732
+ for (const text of texts) {
733
+ if (cacheConfig?.enabled) {
734
+ const cached = this.embeddingCache.get(text);
735
+ const ttl = cacheConfig.ttl ?? 36e5;
736
+ if (cached && Date.now() - cached.timestamp < ttl) {
737
+ embeddings.push(cached.value);
738
+ this.cacheStats.embeddings.hits++;
739
+ continue;
740
+ }
741
+ }
742
+ this.cacheStats.embeddings.misses++;
743
+ const embedding = await this.generateEmbedding(text);
744
+ embeddings.push(embedding);
745
+ if (cacheConfig?.enabled) {
746
+ this.embeddingCache.set(text, {
747
+ value: embedding,
748
+ timestamp: Date.now()
749
+ });
750
+ }
751
+ }
752
+ return embeddings;
753
+ }
754
+ /**
755
+ * Ingest documents from URL source (CSV, JSON, XML, API)
756
+ */
757
+ async ingestFromUrl(source, options) {
758
+ try {
759
+ const controller = new AbortController();
760
+ const timeoutId = setTimeout(() => controller.abort(), source.timeout || 3e4);
761
+ const response = await fetch(source.url, {
762
+ headers: {
763
+ ...source.headers,
764
+ ...source.auth && this.buildAuthHeaders(source.auth)
765
+ },
766
+ signal: controller.signal
767
+ });
768
+ clearTimeout(timeoutId);
769
+ if (!response.ok) {
770
+ throw new Error(`HTTP error: ${response.status} ${response.statusText}`);
771
+ }
772
+ let documents;
773
+ if (source.type === "json" || source.type === "api") {
774
+ const data = await response.json();
775
+ documents = this.transformJsonToDocuments(data, source.transform);
776
+ } else if (source.type === "csv") {
777
+ const data = await response.text();
778
+ documents = await this.transformCsvToDocuments(data, source.transform);
779
+ } else if (source.type === "xml") {
780
+ const data = await response.text();
781
+ documents = await this.transformXmlToDocuments(data, source.transform);
782
+ } else {
783
+ throw new Error(`Unsupported source type: ${source.type}`);
784
+ }
785
+ documents = documents.map((doc) => ({
786
+ ...doc,
787
+ metadata: {
788
+ ...doc.metadata,
789
+ ...source.metadata,
790
+ sourceUrl: source.url,
791
+ fetchedAt: (/* @__PURE__ */ new Date()).toISOString()
792
+ }
793
+ }));
794
+ const ingestResult = await this.ingest(documents, options);
795
+ return {
796
+ ...ingestResult,
797
+ sourceUrl: source.url,
798
+ fetchedAt: /* @__PURE__ */ new Date(),
799
+ documentsFetched: documents.length
800
+ };
801
+ } catch (error) {
802
+ console.error("URL ingestion failed:", error);
803
+ return {
804
+ success: false,
805
+ indexed: 0,
806
+ failed: 0,
807
+ sourceUrl: source.url,
808
+ fetchedAt: /* @__PURE__ */ new Date(),
809
+ documentsFetched: 0,
810
+ errors: [{
811
+ id: "fetch",
812
+ error: error instanceof Error ? error.message : "Unknown error"
813
+ }]
814
+ };
815
+ }
816
+ }
817
+ /**
818
+ * Handle webhook payload for real-time updates
819
+ */
820
+ async handleWebhook(payload, source, options) {
821
+ try {
822
+ let documents = [];
823
+ if (source === "shopify") {
824
+ documents = this.parseShopifyWebhook(payload);
825
+ } else if (source === "woocommerce") {
826
+ documents = this.parseWooCommerceWebhook(payload);
827
+ } else if (source === "custom") {
828
+ documents = Array.isArray(payload) ? payload : [payload];
829
+ } else {
830
+ throw new Error(`Unsupported webhook source: ${source}`);
831
+ }
832
+ documents = documents.map((doc) => ({
833
+ ...doc,
834
+ metadata: {
835
+ ...doc.metadata,
836
+ webhookSource: source,
837
+ receivedAt: (/* @__PURE__ */ new Date()).toISOString()
838
+ }
839
+ }));
840
+ return await this.ingest(documents, { ...options, overwrite: true });
841
+ } catch (error) {
842
+ console.error("Webhook handling failed:", error);
843
+ return {
844
+ success: false,
845
+ indexed: 0,
846
+ failed: 1,
847
+ errors: [{
848
+ id: "webhook",
849
+ error: error instanceof Error ? error.message : "Unknown error"
850
+ }]
851
+ };
852
+ }
853
+ }
854
+ // ============================================================================
855
+ // Private Helper Methods for URL Ingestion
856
+ // ============================================================================
857
+ buildAuthHeaders(auth) {
858
+ if (!auth) return {};
859
+ if (auth.type === "bearer" && auth.token) {
860
+ return { Authorization: `Bearer ${auth.token}` };
861
+ } else if (auth.type === "basic" && auth.username && auth.password) {
862
+ const encoded = Buffer.from(`${auth.username}:${auth.password}`).toString("base64");
863
+ return { Authorization: `Basic ${encoded}` };
864
+ } else if (auth.type === "api-key" && auth.header && auth.key) {
865
+ return { [auth.header]: auth.key };
866
+ } else if (auth.type === "custom" && auth.headers) {
867
+ return auth.headers;
868
+ }
869
+ return {};
870
+ }
871
+ transformJsonToDocuments(data, transform) {
872
+ let items = data;
873
+ if (transform?.documentPath) {
874
+ items = this.extractByPath(data, transform.documentPath);
875
+ }
876
+ if (!Array.isArray(items)) {
877
+ items = [items];
878
+ }
879
+ return items.map((item, index) => {
880
+ const fieldMapping = transform?.fieldMapping || {};
881
+ const metadata = {};
882
+ for (const [key, path] of Object.entries(fieldMapping)) {
883
+ if (key !== "id" && key !== "content" && path && typeof path === "string") {
884
+ metadata[key] = this.extractField(item, path);
885
+ }
886
+ }
887
+ return {
888
+ id: this.extractField(item, fieldMapping.id || "id") || `doc-${index}`,
889
+ content: this.extractField(item, fieldMapping.content || "content") || JSON.stringify(item),
890
+ metadata
891
+ };
892
+ });
893
+ }
894
+ async transformCsvToDocuments(csvData, transform) {
895
+ const lines = csvData.trim().split("\n");
896
+ const headers = lines[0].split(",").map((h) => h.trim());
897
+ return lines.slice(1).map((line, index) => {
898
+ const values = line.split(",").map((v) => v.trim());
899
+ const item = headers.reduce((acc, header, i) => {
900
+ acc[header] = values[i];
901
+ return acc;
902
+ }, {});
903
+ const fieldMapping = transform?.fieldMapping || {};
904
+ const metadata = {};
905
+ for (const [key, path] of Object.entries(fieldMapping)) {
906
+ if (key !== "id" && key !== "content" && path && typeof path === "string") {
907
+ metadata[key] = this.extractField(item, path);
908
+ }
909
+ }
910
+ return {
911
+ id: this.extractField(item, fieldMapping.id || "id") || `doc-${index}`,
912
+ content: this.extractField(item, fieldMapping.content || "content") || JSON.stringify(item),
913
+ metadata
914
+ };
915
+ });
916
+ }
917
+ async transformXmlToDocuments(_xmlData, _transform) {
918
+ throw new Error("XML parsing not yet implemented. Please use JSON or CSV format.");
919
+ }
920
+ extractByPath(data, path) {
921
+ if (path.startsWith("$.")) {
922
+ const parts = path.slice(2).split(".");
923
+ let current = data;
924
+ for (const part of parts) {
925
+ if (part.endsWith("[*]")) {
926
+ const key = part.slice(0, -3);
927
+ current = current[key];
928
+ if (!Array.isArray(current)) {
929
+ throw new Error(`Path ${path} does not resolve to an array`);
930
+ }
931
+ return current;
932
+ } else {
933
+ current = current[part];
934
+ }
935
+ }
936
+ return current;
937
+ }
938
+ return data;
939
+ }
940
+ extractField(item, path) {
941
+ const parts = path.split(".");
942
+ let current = item;
943
+ for (const part of parts) {
944
+ if (part.includes("[")) {
945
+ const [key, index] = part.split("[");
946
+ const idx = parseInt(index.replace("]", ""));
947
+ current = current[key]?.[idx];
948
+ } else {
949
+ current = current[part];
950
+ }
951
+ if (current === void 0) return void 0;
952
+ }
953
+ return current;
954
+ }
955
+ parseShopifyWebhook(payload) {
956
+ return [{
957
+ id: payload.id?.toString() || payload.handle,
958
+ content: `${payload.title}
959
+ ${payload.body_html || ""}`,
960
+ metadata: {
961
+ title: payload.title,
962
+ price: payload.variants?.[0]?.price,
963
+ sku: payload.variants?.[0]?.sku,
964
+ inStock: (payload.variants?.[0]?.inventory_quantity || 0) > 0,
965
+ vendor: payload.vendor,
966
+ product_type: payload.product_type,
967
+ tags: payload.tags
968
+ }
969
+ }];
970
+ }
971
+ parseWooCommerceWebhook(payload) {
972
+ return [{
973
+ id: payload.id?.toString() || payload.sku,
974
+ content: `${payload.name}
975
+ ${payload.description || ""}`,
976
+ metadata: {
977
+ title: payload.name,
978
+ price: payload.price,
979
+ sku: payload.sku,
980
+ inStock: payload.stock_status === "instock",
981
+ categories: payload.categories?.map((c) => c.name)
982
+ }
983
+ }];
984
+ }
985
+ /**
986
+ * Cleanup resources and close connections
987
+ */
988
+ async disconnect() {
989
+ if (this.cleanupInterval) {
990
+ clearInterval(this.cleanupInterval);
991
+ this.cleanupInterval = void 0;
992
+ }
993
+ await this.client.close();
994
+ this.db = null;
995
+ }
996
+ };
997
+ export {
998
+ EcommerceRAGPlugin
999
+ };