@snap-agent/rag-ecommerce 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1036 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/index.ts
31
+ var index_exports = {};
32
+ __export(index_exports, {
33
+ EcommerceRAGPlugin: () => EcommerceRAGPlugin
34
+ });
35
+ module.exports = __toCommonJS(index_exports);
36
+
37
+ // src/EcommerceRAGPlugin.ts
38
+ var import_mongodb = require("mongodb");
39
+ var import_openai = __toESM(require("openai"));
40
+ var EcommerceRAGPlugin = class {
41
+ constructor(config) {
42
+ this.name = "ecommerce-rag";
43
+ this.type = "rag";
44
+ this.db = null;
45
+ // Caching layers
46
+ this.embeddingCache = /* @__PURE__ */ new Map();
47
+ this.attributeCache = /* @__PURE__ */ new Map();
48
+ // Cache statistics
49
+ this.cacheStats = {
50
+ embeddings: { hits: 0, misses: 0 },
51
+ attributes: { hits: 0, misses: 0 }
52
+ };
53
+ this.config = {
54
+ dbName: "agentStudio",
55
+ collection: "products",
56
+ embeddingModel: "voyage-multilingual-2",
57
+ attributeList: [
58
+ "category",
59
+ "color",
60
+ "gender",
61
+ "brand",
62
+ "material",
63
+ "size",
64
+ "season",
65
+ "priceMin",
66
+ "priceMax"
67
+ ],
68
+ enableAttributeExtraction: true,
69
+ numCandidates: 200,
70
+ limit: 50,
71
+ vectorIndexName: "product_vector_index",
72
+ rescoringWeights: {
73
+ color: 0.15,
74
+ size: 0.1,
75
+ material: 0.1,
76
+ category: 0.12,
77
+ brand: 0.08,
78
+ popularity: 0.05,
79
+ ctr: 0.1,
80
+ sales: 0.1
81
+ },
82
+ enableReranking: false,
83
+ rerankTopK: 10,
84
+ contextProductCount: 8,
85
+ language: "es",
86
+ includeOutOfStock: false,
87
+ priority: 10,
88
+ ...config,
89
+ cache: {
90
+ embeddings: {
91
+ enabled: config.cache?.embeddings?.enabled ?? true,
92
+ ttl: config.cache?.embeddings?.ttl ?? 36e5,
93
+ // 1 hour
94
+ maxSize: config.cache?.embeddings?.maxSize ?? 1e3
95
+ },
96
+ attributes: {
97
+ enabled: config.cache?.attributes?.enabled ?? true,
98
+ ttl: config.cache?.attributes?.ttl ?? 18e5,
99
+ // 30 minutes
100
+ maxSize: config.cache?.attributes?.maxSize ?? 500
101
+ }
102
+ }
103
+ };
104
+ this.priority = this.config.priority;
105
+ this.client = new import_mongodb.MongoClient(this.config.mongoUri);
106
+ this.openai = new import_openai.default({ apiKey: this.config.openaiApiKey });
107
+ this.startCacheCleanup();
108
+ }
109
+ async ensureConnection() {
110
+ if (!this.db) {
111
+ await this.client.connect();
112
+ this.db = this.client.db(this.config.dbName);
113
+ }
114
+ return this.db;
115
+ }
116
+ /**
117
+ * Main retrieval method - called by the SDK
118
+ */
119
+ async retrieveContext(message, options) {
120
+ const queryVector = await this.embedText(message);
121
+ let attributes = {};
122
+ if (this.config.enableAttributeExtraction) {
123
+ attributes = await this.extractAttributes(message);
124
+ }
125
+ const searchResults = await this.vectorSearch({
126
+ queryVector,
127
+ agentId: options.agentId,
128
+ hardFilters: options.filters || {}
129
+ });
130
+ const rescored = this.softRescore(searchResults, attributes);
131
+ let final = rescored;
132
+ if (this.config.enableReranking) {
133
+ final = await this.rerank(message, rescored);
134
+ }
135
+ if (!this.config.includeOutOfStock) {
136
+ final = final.filter((p) => p.inStock !== false);
137
+ }
138
+ return {
139
+ content: this.buildContextString(final),
140
+ sources: final.slice(0, this.config.contextProductCount).map((p) => ({
141
+ id: p.sku,
142
+ title: p.title,
143
+ score: p.vectorSearchScore,
144
+ type: "product",
145
+ attributes: p.attributes,
146
+ inStock: p.inStock
147
+ })),
148
+ metadata: {
149
+ productCount: final.length,
150
+ extractedAttributes: attributes,
151
+ topProducts: final.slice(0, 3).map((p) => ({
152
+ sku: p.sku,
153
+ title: p.title,
154
+ score: p.vectorSearchScore
155
+ }))
156
+ }
157
+ };
158
+ }
159
+ /**
160
+ * Format context for LLM
161
+ */
162
+ formatContext(context) {
163
+ return context.content;
164
+ }
165
+ // ============================================================================
166
+ // Private Methods
167
+ // ============================================================================
168
+ /**
169
+ * Embed text using Voyage with caching
170
+ */
171
+ async embedText(text) {
172
+ if (this.config.cache?.embeddings?.enabled) {
173
+ const cacheKey = `${this.config.embeddingModel}:${text}`;
174
+ const cached = this.embeddingCache.get(cacheKey);
175
+ if (cached) {
176
+ const age = Date.now() - cached.timestamp;
177
+ if (age < (this.config.cache?.embeddings?.ttl ?? 36e5)) {
178
+ this.cacheStats.embeddings.hits++;
179
+ return cached.value;
180
+ } else {
181
+ this.embeddingCache.delete(cacheKey);
182
+ }
183
+ }
184
+ this.cacheStats.embeddings.misses++;
185
+ }
186
+ const response = await fetch("https://api.voyageai.com/v1/embeddings", {
187
+ method: "POST",
188
+ headers: {
189
+ "Content-Type": "application/json",
190
+ "Authorization": `Bearer ${this.config.voyageApiKey}`
191
+ },
192
+ body: JSON.stringify({
193
+ input: text,
194
+ model: this.config.embeddingModel
195
+ })
196
+ });
197
+ if (!response.ok) {
198
+ throw new Error(`Voyage API error: ${response.statusText}`);
199
+ }
200
+ const data = await response.json();
201
+ const embedding = data.data[0].embedding;
202
+ if (this.config.cache?.embeddings?.enabled) {
203
+ const cacheKey = `${this.config.embeddingModel}:${text}`;
204
+ const maxSize = this.config.cache?.embeddings?.maxSize ?? 1e3;
205
+ if (this.embeddingCache.size >= maxSize) {
206
+ const firstKey = this.embeddingCache.keys().next().value;
207
+ if (firstKey) {
208
+ this.embeddingCache.delete(firstKey);
209
+ }
210
+ }
211
+ this.embeddingCache.set(cacheKey, {
212
+ value: embedding,
213
+ timestamp: Date.now()
214
+ });
215
+ }
216
+ return embedding;
217
+ }
218
+ /**
219
+ * Extract attributes from user message using OpenAI with caching
220
+ */
221
+ async extractAttributes(message) {
222
+ if (this.config.cache?.attributes?.enabled) {
223
+ const cacheKey = message.toLowerCase().trim();
224
+ const cached = this.attributeCache.get(cacheKey);
225
+ if (cached) {
226
+ const age = Date.now() - cached.timestamp;
227
+ if (age < (this.config.cache?.attributes?.ttl ?? 18e5)) {
228
+ this.cacheStats.attributes.hits++;
229
+ return cached.value;
230
+ } else {
231
+ this.attributeCache.delete(cacheKey);
232
+ }
233
+ }
234
+ this.cacheStats.attributes.misses++;
235
+ }
236
+ let attrs = {};
237
+ try {
238
+ const completion = await this.openai.chat.completions.create({
239
+ model: "gpt-4o-mini",
240
+ messages: [
241
+ {
242
+ role: "system",
243
+ content: `Extract product attributes from the user message. Return a JSON object with only the attributes you can identify from this list: ${this.config.attributeList.join(", ")}. If an attribute is not mentioned, omit it from the response.`
244
+ },
245
+ {
246
+ role: "user",
247
+ content: message
248
+ }
249
+ ],
250
+ response_format: { type: "json_object" },
251
+ temperature: 0.1
252
+ });
253
+ const extracted = JSON.parse(completion.choices[0]?.message?.content || "{}");
254
+ if (extracted.category) attrs.category = String(extracted.category);
255
+ if (extracted.color) attrs.color = String(extracted.color);
256
+ if (extracted.gender) {
257
+ const g = String(extracted.gender).toLowerCase();
258
+ if (g.includes("hombre") || g.includes("male") || g.includes("man") || g === "m") {
259
+ attrs.gender = "M";
260
+ } else if (g.includes("mujer") || g.includes("female") || g.includes("woman") || g === "f") {
261
+ attrs.gender = "F";
262
+ } else if (g.includes("unisex")) {
263
+ attrs.gender = "Unisex";
264
+ }
265
+ }
266
+ if (extracted.brand) attrs.brand = String(extracted.brand);
267
+ if (extracted.material) attrs.material = String(extracted.material);
268
+ if (extracted.size) attrs.size = String(extracted.size);
269
+ if (extracted.season) attrs.season = String(extracted.season);
270
+ if (extracted.priceMin) attrs.priceMin = Number(extracted.priceMin);
271
+ if (extracted.priceMax) attrs.priceMax = Number(extracted.priceMax);
272
+ } catch (error) {
273
+ console.error("Attribute extraction failed:", error);
274
+ return {};
275
+ }
276
+ if (this.config.cache?.attributes?.enabled) {
277
+ const cacheKey = message.toLowerCase().trim();
278
+ const maxSize = this.config.cache?.attributes?.maxSize ?? 500;
279
+ if (this.attributeCache.size >= maxSize) {
280
+ const firstKey = this.attributeCache.keys().next().value;
281
+ if (firstKey) {
282
+ this.attributeCache.delete(firstKey);
283
+ }
284
+ }
285
+ this.attributeCache.set(cacheKey, {
286
+ value: attrs,
287
+ timestamp: Date.now()
288
+ });
289
+ }
290
+ return attrs;
291
+ }
292
+ /**
293
+ * MongoDB Atlas Vector Search
294
+ */
295
+ async vectorSearch(options) {
296
+ const db = await this.ensureConnection();
297
+ const collection = db.collection(this.config.collection);
298
+ const filter = { tenantId: this.config.tenantId };
299
+ if (options.agentId) {
300
+ filter.agentId = options.agentId;
301
+ }
302
+ Object.entries(options.hardFilters).forEach(([key, value]) => {
303
+ if (value !== void 0 && value !== null) {
304
+ filter[key] = value;
305
+ }
306
+ });
307
+ const pipeline = [
308
+ {
309
+ $vectorSearch: {
310
+ index: this.config.vectorIndexName,
311
+ path: "embedding",
312
+ queryVector: options.queryVector,
313
+ numCandidates: this.config.numCandidates,
314
+ limit: this.config.limit,
315
+ filter
316
+ }
317
+ },
318
+ {
319
+ $addFields: {
320
+ vectorSearchScore: { $meta: "vectorSearchScore" }
321
+ }
322
+ }
323
+ ];
324
+ const results = await collection.aggregate(pipeline).toArray();
325
+ return results;
326
+ }
327
+ /**
328
+ * Soft rescore based on attributes and metrics
329
+ */
330
+ softRescore(results, attrs) {
331
+ const weights = this.config.rescoringWeights;
332
+ return results.map((product) => {
333
+ let boost = product.vectorSearchScore || 0;
334
+ if (attrs.color && product.attributes.color) {
335
+ const match = product.attributes.color.toLowerCase() === attrs.color.toLowerCase();
336
+ if (match) boost += weights.color;
337
+ }
338
+ if (attrs.size && product.attributes.size) {
339
+ const match = product.attributes.size.some(
340
+ (s) => s.toLowerCase() === attrs.size?.toLowerCase()
341
+ );
342
+ if (match) boost += weights.size;
343
+ }
344
+ if (attrs.material && product.attributes.material) {
345
+ const match = product.attributes.material.toLowerCase() === attrs.material.toLowerCase();
346
+ if (match) boost += weights.material;
347
+ }
348
+ if (attrs.category && product.attributes.category) {
349
+ const match = product.attributes.category.toLowerCase() === attrs.category.toLowerCase();
350
+ if (match) boost += weights.category;
351
+ }
352
+ if (attrs.brand && product.attributes.brand) {
353
+ const match = product.attributes.brand.toLowerCase() === attrs.brand.toLowerCase();
354
+ if (match) boost += weights.brand;
355
+ }
356
+ if (attrs.priceMax && product.attributes.price) {
357
+ const withinBudget = product.attributes.price <= attrs.priceMax;
358
+ if (withinBudget) {
359
+ const proximity = 1 - product.attributes.price / attrs.priceMax;
360
+ boost += Math.max(0, proximity * 0.1);
361
+ }
362
+ }
363
+ if (product.metrics?.popularity) {
364
+ boost += Math.min(product.metrics.popularity * weights.popularity, 0.2);
365
+ }
366
+ if (product.metrics?.ctr) {
367
+ boost += Math.min(product.metrics.ctr * weights.ctr, 0.15);
368
+ }
369
+ if (product.metrics?.sales) {
370
+ const normalizedSales = Math.log10(product.metrics.sales + 1) / 10;
371
+ boost += Math.min(normalizedSales * weights.sales, 0.1);
372
+ }
373
+ return { ...product, vectorSearchScore: boost };
374
+ }).sort((a, b) => (b.vectorSearchScore || 0) - (a.vectorSearchScore || 0));
375
+ }
376
+ /**
377
+ * Optional Voyage reranking
378
+ */
379
+ async rerank(query, products) {
380
+ if (products.length === 0) return products;
381
+ try {
382
+ const docTexts = products.map(
383
+ (p) => `${p.title}. ${p.description || ""}. ${Object.entries(p.attributes).filter(([_, v]) => v).map(([k, v]) => `${k}: ${v}`).join(", ")}`
384
+ );
385
+ const response = await fetch("https://api.voyageai.com/v1/rerank", {
386
+ method: "POST",
387
+ headers: {
388
+ "Content-Type": "application/json",
389
+ "Authorization": `Bearer ${this.config.voyageApiKey}`
390
+ },
391
+ body: JSON.stringify({
392
+ query,
393
+ documents: docTexts,
394
+ model: "rerank-2",
395
+ top_k: this.config.rerankTopK
396
+ })
397
+ });
398
+ if (!response.ok) {
399
+ throw new Error(`Voyage rerank error: ${response.statusText}`);
400
+ }
401
+ const data = await response.json();
402
+ const scores = data.data.map((item) => item.relevance_score);
403
+ const reranked = products.map((doc, idx) => ({
404
+ ...doc,
405
+ vectorSearchScore: (doc.vectorSearchScore || 0) * 0.5 + (scores[idx] || 0) * 0.5
406
+ }));
407
+ return reranked.sort((a, b) => (b.vectorSearchScore || 0) - (a.vectorSearchScore || 0)).slice(0, this.config.rerankTopK);
408
+ } catch (error) {
409
+ console.error("Reranking failed:", error);
410
+ return products;
411
+ }
412
+ }
413
+ /**
414
+ * Build context string for LLM
415
+ */
416
+ buildContextString(products) {
417
+ const limited = products.slice(0, this.config.contextProductCount);
418
+ if (limited.length === 0) {
419
+ return this.config.language === "es" ? "No se encontraron productos en el cat\xE1logo." : "No products found in the catalog.";
420
+ }
421
+ const productBlocks = limited.map((product, idx) => {
422
+ const attrs = [];
423
+ if (product.attributes.category) attrs.push(`Category: ${product.attributes.category}`);
424
+ if (product.attributes.brand) attrs.push(`Brand: ${product.attributes.brand}`);
425
+ if (product.attributes.color) attrs.push(`Color: ${product.attributes.color}`);
426
+ if (product.attributes.material) attrs.push(`Material: ${product.attributes.material}`);
427
+ if (product.attributes.size?.length) {
428
+ attrs.push(`Sizes: ${product.attributes.size.join(", ")}`);
429
+ }
430
+ if (product.attributes.price !== void 0) {
431
+ attrs.push(`Price: $${product.attributes.price.toFixed(2)}`);
432
+ }
433
+ if (product.inStock !== void 0) {
434
+ attrs.push(product.inStock ? "In Stock" : "Out of Stock");
435
+ }
436
+ return `${idx + 1}. ${product.title}
437
+ SKU: ${product.sku}
438
+ ${product.description || ""}
439
+ ${attrs.join(" | ")}`;
440
+ });
441
+ const header = this.config.language === "es" ? "PRODUCTOS DISPONIBLES EN EL CAT\xC1LOGO:" : "AVAILABLE PRODUCTS IN CATALOG:";
442
+ return `${header}
443
+
444
+ ${productBlocks.join("\n\n")}`;
445
+ }
446
+ // ============================================================================
447
+ // Cache Management
448
+ // ============================================================================
449
+ /**
450
+ * Start periodic cache cleanup (remove expired entries)
451
+ */
452
+ startCacheCleanup() {
453
+ this.cleanupInterval = setInterval(() => {
454
+ this.cleanupExpiredCache();
455
+ }, 3e5);
456
+ }
457
+ /**
458
+ * Clean up expired cache entries
459
+ */
460
+ cleanupExpiredCache() {
461
+ const now = Date.now();
462
+ if (this.config.cache?.embeddings?.enabled) {
463
+ const ttl = this.config.cache?.embeddings?.ttl ?? 36e5;
464
+ for (const [key, entry] of this.embeddingCache.entries()) {
465
+ if (now - entry.timestamp >= ttl) {
466
+ this.embeddingCache.delete(key);
467
+ }
468
+ }
469
+ }
470
+ if (this.config.cache?.attributes?.enabled) {
471
+ const ttl = this.config.cache?.attributes?.ttl ?? 18e5;
472
+ for (const [key, entry] of this.attributeCache.entries()) {
473
+ if (now - entry.timestamp >= ttl) {
474
+ this.attributeCache.delete(key);
475
+ }
476
+ }
477
+ }
478
+ }
479
+ /**
480
+ * Get cache statistics
481
+ */
482
+ getCacheStats() {
483
+ return {
484
+ embeddings: {
485
+ size: this.embeddingCache.size,
486
+ maxSize: this.config.cache?.embeddings?.maxSize ?? 1e3,
487
+ hits: this.cacheStats.embeddings.hits,
488
+ misses: this.cacheStats.embeddings.misses,
489
+ hitRate: this.cacheStats.embeddings.hits + this.cacheStats.embeddings.misses > 0 ? (this.cacheStats.embeddings.hits / (this.cacheStats.embeddings.hits + this.cacheStats.embeddings.misses)).toFixed(2) : "0.00"
490
+ },
491
+ attributes: {
492
+ size: this.attributeCache.size,
493
+ maxSize: this.config.cache?.attributes?.maxSize ?? 500,
494
+ hits: this.cacheStats.attributes.hits,
495
+ misses: this.cacheStats.attributes.misses,
496
+ hitRate: this.cacheStats.attributes.hits + this.cacheStats.attributes.misses > 0 ? (this.cacheStats.attributes.hits / (this.cacheStats.attributes.hits + this.cacheStats.attributes.misses)).toFixed(2) : "0.00"
497
+ }
498
+ };
499
+ }
500
+ /**
501
+ * Clear all caches
502
+ */
503
+ clearCache() {
504
+ this.embeddingCache.clear();
505
+ this.attributeCache.clear();
506
+ this.cacheStats = {
507
+ embeddings: { hits: 0, misses: 0 },
508
+ attributes: { hits: 0, misses: 0 }
509
+ };
510
+ }
511
+ /**
512
+ * Get MongoDB collection
513
+ */
514
+ async getCollection() {
515
+ const db = await this.ensureConnection();
516
+ return db.collection(this.config.collection);
517
+ }
518
+ /**
519
+ * Generate embedding for a single text
520
+ */
521
+ async generateEmbedding(text) {
522
+ return this.embedText(text);
523
+ }
524
+ /**
525
+ * Ingest products into the RAG system
526
+ * Converts RAGDocuments to ProductDocs and indexes them with embeddings
527
+ */
528
+ async ingest(documents, options) {
529
+ const collection = await this.getCollection();
530
+ let indexed = 0;
531
+ let failed = 0;
532
+ const errors = [];
533
+ try {
534
+ const batchSize = options?.batchSize || 10;
535
+ for (let i = 0; i < documents.length; i += batchSize) {
536
+ const batch = documents.slice(i, i + batchSize);
537
+ const textsForEmbedding = batch.map((doc) => {
538
+ const metadata = doc.metadata || {};
539
+ const attributeParts = [];
540
+ const attributeFields = ["category", "brand", "color", "material", "size", "gender", "season"];
541
+ for (const field of attributeFields) {
542
+ if (metadata[field]) {
543
+ const value = Array.isArray(metadata[field]) ? metadata[field].join(", ") : metadata[field];
544
+ attributeParts.push(`${field}: ${value}`);
545
+ }
546
+ }
547
+ return attributeParts.length > 0 ? `${doc.content}. ${attributeParts.join(", ")}` : doc.content;
548
+ });
549
+ const embeddings = await this.generateEmbeddingsBatch(textsForEmbedding);
550
+ const productDocs = batch.map((doc, idx) => {
551
+ const metadata = doc.metadata || {};
552
+ return {
553
+ tenantId: this.config.tenantId,
554
+ agentId: options?.agentId,
555
+ sku: doc.id,
556
+ title: metadata.title || doc.content.substring(0, 100),
557
+ description: metadata.description || doc.content,
558
+ embedding: embeddings[idx],
559
+ attributes: {
560
+ category: metadata.category,
561
+ brand: metadata.brand,
562
+ color: metadata.color,
563
+ material: metadata.material,
564
+ size: metadata.size,
565
+ gender: metadata.gender,
566
+ season: metadata.season,
567
+ price: metadata.price,
568
+ ...metadata.attributes
569
+ },
570
+ inStock: metadata.inStock !== false,
571
+ metrics: metadata.metrics || {}
572
+ };
573
+ });
574
+ try {
575
+ if (options?.overwrite) {
576
+ const bulkOps = productDocs.map((doc) => ({
577
+ replaceOne: {
578
+ filter: {
579
+ tenantId: this.config.tenantId,
580
+ sku: doc.sku,
581
+ ...options.agentId ? { agentId: options.agentId } : {}
582
+ },
583
+ replacement: doc,
584
+ upsert: true
585
+ }
586
+ }));
587
+ const result = await collection.bulkWrite(bulkOps);
588
+ indexed += result.upsertedCount + result.modifiedCount;
589
+ } else if (options?.skipExisting) {
590
+ const existingSkus = await collection.find({
591
+ tenantId: this.config.tenantId,
592
+ sku: { $in: productDocs.map((d) => d.sku) },
593
+ ...options.agentId ? { agentId: options.agentId } : {}
594
+ }).project({ sku: 1 }).toArray();
595
+ const existingSet = new Set(existingSkus.map((d) => d.sku));
596
+ const newDocs = productDocs.filter((d) => !existingSet.has(d.sku));
597
+ if (newDocs.length > 0) {
598
+ const result = await collection.insertMany(newDocs);
599
+ indexed += result.insertedCount;
600
+ }
601
+ failed += productDocs.length - newDocs.length;
602
+ } else {
603
+ const bulkOps = productDocs.map((doc) => ({
604
+ updateOne: {
605
+ filter: {
606
+ tenantId: this.config.tenantId,
607
+ sku: doc.sku,
608
+ ...options?.agentId ? { agentId: options.agentId } : {}
609
+ },
610
+ update: { $set: doc },
611
+ upsert: true
612
+ }
613
+ }));
614
+ const result = await collection.bulkWrite(bulkOps);
615
+ indexed += result.upsertedCount + result.modifiedCount;
616
+ }
617
+ } catch (error) {
618
+ batch.forEach((doc) => {
619
+ failed++;
620
+ errors.push({
621
+ id: doc.id,
622
+ error: error.message || "Unknown error during insertion"
623
+ });
624
+ });
625
+ }
626
+ }
627
+ return {
628
+ success: failed === 0,
629
+ indexed,
630
+ failed,
631
+ errors: errors.length > 0 ? errors : void 0,
632
+ metadata: {
633
+ batchSize: options?.batchSize || 10,
634
+ totalDocuments: documents.length
635
+ }
636
+ };
637
+ } catch (error) {
638
+ return {
639
+ success: false,
640
+ indexed,
641
+ failed: documents.length - indexed,
642
+ errors: [{
643
+ id: "batch",
644
+ error: error.message || "Unknown error during ingestion"
645
+ }]
646
+ };
647
+ }
648
+ }
649
+ /**
650
+ * Update a single product
651
+ */
652
+ async update(id, document, options) {
653
+ const collection = await this.getCollection();
654
+ const update = {};
655
+ const metadata = document.metadata || {};
656
+ if (document.content) {
657
+ const attributeParts = [];
658
+ const attributeFields = ["category", "brand", "color", "material", "size", "gender", "season"];
659
+ for (const field of attributeFields) {
660
+ if (metadata[field]) {
661
+ const value = Array.isArray(metadata[field]) ? metadata[field].join(", ") : metadata[field];
662
+ attributeParts.push(`${field}: ${value}`);
663
+ }
664
+ }
665
+ const textForEmbedding = attributeParts.length > 0 ? `${document.content}. ${attributeParts.join(", ")}` : document.content;
666
+ const embedding = await this.generateEmbedding(textForEmbedding);
667
+ update.embedding = embedding;
668
+ update.description = document.content;
669
+ }
670
+ if (document.metadata) {
671
+ if (metadata.title) update.title = metadata.title;
672
+ if (metadata.inStock !== void 0) update.inStock = metadata.inStock;
673
+ const attributeUpdates = {};
674
+ const metricUpdates = {};
675
+ const attributeFields = ["category", "brand", "color", "material", "size", "gender", "season", "price"];
676
+ attributeFields.forEach((field) => {
677
+ if (metadata[field] !== void 0) {
678
+ attributeUpdates[`attributes.${field}`] = metadata[field];
679
+ }
680
+ });
681
+ if (metadata.metrics) {
682
+ Object.entries(metadata.metrics).forEach(([key, value]) => {
683
+ metricUpdates[`metrics.${key}`] = value;
684
+ });
685
+ }
686
+ Object.assign(update, attributeUpdates, metricUpdates);
687
+ if (metadata.attributes) {
688
+ Object.entries(metadata.attributes).forEach(([key, value]) => {
689
+ update[`attributes.${key}`] = value;
690
+ });
691
+ }
692
+ }
693
+ await collection.updateOne(
694
+ {
695
+ tenantId: this.config.tenantId,
696
+ sku: id,
697
+ ...options?.agentId ? { agentId: options.agentId } : {}
698
+ },
699
+ { $set: update }
700
+ );
701
+ }
702
+ /**
703
+ * Delete product(s) by SKU
704
+ */
705
+ async delete(ids, options) {
706
+ const collection = await this.getCollection();
707
+ const skuArray = Array.isArray(ids) ? ids : [ids];
708
+ const result = await collection.deleteMany({
709
+ tenantId: this.config.tenantId,
710
+ sku: { $in: skuArray },
711
+ ...options?.agentId ? { agentId: options.agentId } : {}
712
+ });
713
+ return result.deletedCount;
714
+ }
715
+ /**
716
+ * Bulk operations for efficient batch processing
717
+ */
718
+ async bulk(operations, options) {
719
+ let inserted = 0;
720
+ let updated = 0;
721
+ let deleted = 0;
722
+ let failed = 0;
723
+ const errors = [];
724
+ for (const op of operations) {
725
+ try {
726
+ switch (op.type) {
727
+ case "insert":
728
+ if (op.document) {
729
+ await this.ingest([op.document], options);
730
+ inserted++;
731
+ }
732
+ break;
733
+ case "update":
734
+ if (op.document) {
735
+ await this.update(op.id, op.document, options);
736
+ updated++;
737
+ }
738
+ break;
739
+ case "delete":
740
+ const count = await this.delete(op.id, options);
741
+ deleted += count;
742
+ break;
743
+ }
744
+ } catch (error) {
745
+ failed++;
746
+ errors.push({
747
+ id: op.id,
748
+ operation: op.type,
749
+ error: error.message || "Unknown error"
750
+ });
751
+ }
752
+ }
753
+ return {
754
+ success: failed === 0,
755
+ inserted,
756
+ updated,
757
+ deleted,
758
+ failed,
759
+ errors: errors.length > 0 ? errors : void 0
760
+ };
761
+ }
762
+ /**
763
+ * Generate embeddings for a batch of texts
764
+ */
765
+ async generateEmbeddingsBatch(texts) {
766
+ const embeddings = [];
767
+ const cacheConfig = this.config.cache?.embeddings;
768
+ for (const text of texts) {
769
+ if (cacheConfig?.enabled) {
770
+ const cached = this.embeddingCache.get(text);
771
+ const ttl = cacheConfig.ttl ?? 36e5;
772
+ if (cached && Date.now() - cached.timestamp < ttl) {
773
+ embeddings.push(cached.value);
774
+ this.cacheStats.embeddings.hits++;
775
+ continue;
776
+ }
777
+ }
778
+ this.cacheStats.embeddings.misses++;
779
+ const embedding = await this.generateEmbedding(text);
780
+ embeddings.push(embedding);
781
+ if (cacheConfig?.enabled) {
782
+ this.embeddingCache.set(text, {
783
+ value: embedding,
784
+ timestamp: Date.now()
785
+ });
786
+ }
787
+ }
788
+ return embeddings;
789
+ }
790
+ /**
791
+ * Ingest documents from URL source (CSV, JSON, XML, API)
792
+ */
793
+ async ingestFromUrl(source, options) {
794
+ try {
795
+ const controller = new AbortController();
796
+ const timeoutId = setTimeout(() => controller.abort(), source.timeout || 3e4);
797
+ const response = await fetch(source.url, {
798
+ headers: {
799
+ ...source.headers,
800
+ ...source.auth && this.buildAuthHeaders(source.auth)
801
+ },
802
+ signal: controller.signal
803
+ });
804
+ clearTimeout(timeoutId);
805
+ if (!response.ok) {
806
+ throw new Error(`HTTP error: ${response.status} ${response.statusText}`);
807
+ }
808
+ let documents;
809
+ if (source.type === "json" || source.type === "api") {
810
+ const data = await response.json();
811
+ documents = this.transformJsonToDocuments(data, source.transform);
812
+ } else if (source.type === "csv") {
813
+ const data = await response.text();
814
+ documents = await this.transformCsvToDocuments(data, source.transform);
815
+ } else if (source.type === "xml") {
816
+ const data = await response.text();
817
+ documents = await this.transformXmlToDocuments(data, source.transform);
818
+ } else {
819
+ throw new Error(`Unsupported source type: ${source.type}`);
820
+ }
821
+ documents = documents.map((doc) => ({
822
+ ...doc,
823
+ metadata: {
824
+ ...doc.metadata,
825
+ ...source.metadata,
826
+ sourceUrl: source.url,
827
+ fetchedAt: (/* @__PURE__ */ new Date()).toISOString()
828
+ }
829
+ }));
830
+ const ingestResult = await this.ingest(documents, options);
831
+ return {
832
+ ...ingestResult,
833
+ sourceUrl: source.url,
834
+ fetchedAt: /* @__PURE__ */ new Date(),
835
+ documentsFetched: documents.length
836
+ };
837
+ } catch (error) {
838
+ console.error("URL ingestion failed:", error);
839
+ return {
840
+ success: false,
841
+ indexed: 0,
842
+ failed: 0,
843
+ sourceUrl: source.url,
844
+ fetchedAt: /* @__PURE__ */ new Date(),
845
+ documentsFetched: 0,
846
+ errors: [{
847
+ id: "fetch",
848
+ error: error instanceof Error ? error.message : "Unknown error"
849
+ }]
850
+ };
851
+ }
852
+ }
853
+ /**
854
+ * Handle webhook payload for real-time updates
855
+ */
856
+ async handleWebhook(payload, source, options) {
857
+ try {
858
+ let documents = [];
859
+ if (source === "shopify") {
860
+ documents = this.parseShopifyWebhook(payload);
861
+ } else if (source === "woocommerce") {
862
+ documents = this.parseWooCommerceWebhook(payload);
863
+ } else if (source === "custom") {
864
+ documents = Array.isArray(payload) ? payload : [payload];
865
+ } else {
866
+ throw new Error(`Unsupported webhook source: ${source}`);
867
+ }
868
+ documents = documents.map((doc) => ({
869
+ ...doc,
870
+ metadata: {
871
+ ...doc.metadata,
872
+ webhookSource: source,
873
+ receivedAt: (/* @__PURE__ */ new Date()).toISOString()
874
+ }
875
+ }));
876
+ return await this.ingest(documents, { ...options, overwrite: true });
877
+ } catch (error) {
878
+ console.error("Webhook handling failed:", error);
879
+ return {
880
+ success: false,
881
+ indexed: 0,
882
+ failed: 1,
883
+ errors: [{
884
+ id: "webhook",
885
+ error: error instanceof Error ? error.message : "Unknown error"
886
+ }]
887
+ };
888
+ }
889
+ }
890
+ // ============================================================================
891
+ // Private Helper Methods for URL Ingestion
892
+ // ============================================================================
893
+ buildAuthHeaders(auth) {
894
+ if (!auth) return {};
895
+ if (auth.type === "bearer" && auth.token) {
896
+ return { Authorization: `Bearer ${auth.token}` };
897
+ } else if (auth.type === "basic" && auth.username && auth.password) {
898
+ const encoded = Buffer.from(`${auth.username}:${auth.password}`).toString("base64");
899
+ return { Authorization: `Basic ${encoded}` };
900
+ } else if (auth.type === "api-key" && auth.header && auth.key) {
901
+ return { [auth.header]: auth.key };
902
+ } else if (auth.type === "custom" && auth.headers) {
903
+ return auth.headers;
904
+ }
905
+ return {};
906
+ }
907
+ transformJsonToDocuments(data, transform) {
908
+ let items = data;
909
+ if (transform?.documentPath) {
910
+ items = this.extractByPath(data, transform.documentPath);
911
+ }
912
+ if (!Array.isArray(items)) {
913
+ items = [items];
914
+ }
915
+ return items.map((item, index) => {
916
+ const fieldMapping = transform?.fieldMapping || {};
917
+ const metadata = {};
918
+ for (const [key, path] of Object.entries(fieldMapping)) {
919
+ if (key !== "id" && key !== "content" && path && typeof path === "string") {
920
+ metadata[key] = this.extractField(item, path);
921
+ }
922
+ }
923
+ return {
924
+ id: this.extractField(item, fieldMapping.id || "id") || `doc-${index}`,
925
+ content: this.extractField(item, fieldMapping.content || "content") || JSON.stringify(item),
926
+ metadata
927
+ };
928
+ });
929
+ }
930
+ async transformCsvToDocuments(csvData, transform) {
931
+ const lines = csvData.trim().split("\n");
932
+ const headers = lines[0].split(",").map((h) => h.trim());
933
+ return lines.slice(1).map((line, index) => {
934
+ const values = line.split(",").map((v) => v.trim());
935
+ const item = headers.reduce((acc, header, i) => {
936
+ acc[header] = values[i];
937
+ return acc;
938
+ }, {});
939
+ const fieldMapping = transform?.fieldMapping || {};
940
+ const metadata = {};
941
+ for (const [key, path] of Object.entries(fieldMapping)) {
942
+ if (key !== "id" && key !== "content" && path && typeof path === "string") {
943
+ metadata[key] = this.extractField(item, path);
944
+ }
945
+ }
946
+ return {
947
+ id: this.extractField(item, fieldMapping.id || "id") || `doc-${index}`,
948
+ content: this.extractField(item, fieldMapping.content || "content") || JSON.stringify(item),
949
+ metadata
950
+ };
951
+ });
952
+ }
953
+ async transformXmlToDocuments(_xmlData, _transform) {
954
+ throw new Error("XML parsing not yet implemented. Please use JSON or CSV format.");
955
+ }
956
+ extractByPath(data, path) {
957
+ if (path.startsWith("$.")) {
958
+ const parts = path.slice(2).split(".");
959
+ let current = data;
960
+ for (const part of parts) {
961
+ if (part.endsWith("[*]")) {
962
+ const key = part.slice(0, -3);
963
+ current = current[key];
964
+ if (!Array.isArray(current)) {
965
+ throw new Error(`Path ${path} does not resolve to an array`);
966
+ }
967
+ return current;
968
+ } else {
969
+ current = current[part];
970
+ }
971
+ }
972
+ return current;
973
+ }
974
+ return data;
975
+ }
976
+ extractField(item, path) {
977
+ const parts = path.split(".");
978
+ let current = item;
979
+ for (const part of parts) {
980
+ if (part.includes("[")) {
981
+ const [key, index] = part.split("[");
982
+ const idx = parseInt(index.replace("]", ""));
983
+ current = current[key]?.[idx];
984
+ } else {
985
+ current = current[part];
986
+ }
987
+ if (current === void 0) return void 0;
988
+ }
989
+ return current;
990
+ }
991
+ parseShopifyWebhook(payload) {
992
+ return [{
993
+ id: payload.id?.toString() || payload.handle,
994
+ content: `${payload.title}
995
+ ${payload.body_html || ""}`,
996
+ metadata: {
997
+ title: payload.title,
998
+ price: payload.variants?.[0]?.price,
999
+ sku: payload.variants?.[0]?.sku,
1000
+ inStock: (payload.variants?.[0]?.inventory_quantity || 0) > 0,
1001
+ vendor: payload.vendor,
1002
+ product_type: payload.product_type,
1003
+ tags: payload.tags
1004
+ }
1005
+ }];
1006
+ }
1007
+ parseWooCommerceWebhook(payload) {
1008
+ return [{
1009
+ id: payload.id?.toString() || payload.sku,
1010
+ content: `${payload.name}
1011
+ ${payload.description || ""}`,
1012
+ metadata: {
1013
+ title: payload.name,
1014
+ price: payload.price,
1015
+ sku: payload.sku,
1016
+ inStock: payload.stock_status === "instock",
1017
+ categories: payload.categories?.map((c) => c.name)
1018
+ }
1019
+ }];
1020
+ }
1021
+ /**
1022
+ * Cleanup resources and close connections
1023
+ */
1024
+ async disconnect() {
1025
+ if (this.cleanupInterval) {
1026
+ clearInterval(this.cleanupInterval);
1027
+ this.cleanupInterval = void 0;
1028
+ }
1029
+ await this.client.close();
1030
+ this.db = null;
1031
+ }
1032
+ };
1033
+ // Annotate the CommonJS export names for ESM import in node:
1034
+ 0 && (module.exports = {
1035
+ EcommerceRAGPlugin
1036
+ });