@snap-agent/rag-ecommerce 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 ViloTech
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
package/README.md ADDED
@@ -0,0 +1,279 @@
1
+ # @snap-agent/rag-ecommerce
2
+
3
+ E-commerce RAG plugin for SnapAgent SDK - Sophisticated product search and recommendations with vector embeddings.
4
+
5
+ ## Features
6
+
7
+ - **Vector Search** - MongoDB Atlas with Voyage AI embeddings
8
+ - **Smart Attribute Extraction** - AI-powered query understanding
9
+ - **Soft Rescoring** - Attribute matching + business metrics
10
+ - **Optional Reranking** - Voyage reranker for precision
11
+ - **Built-in Caching** - 50-80% cost & latency reduction
12
+ - **Multilingual** - Spanish and English support
13
+ - **Fully Configurable** - Tune every aspect
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ npm install @snap-agent/rag-ecommerce @snap-agent/core
19
+ ```
20
+
21
+ ## Quick Start
22
+
23
+ ```typescript
24
+ import { createClient, MongoDBStorage } from '@snap-agent/core';
25
+ import { EcommerceRAGPlugin } from '@snap-agent/rag-ecommerce';
26
+
27
+ const client = createClient({
28
+ storage: new MongoDBStorage(process.env.MONGODB_URI!),
29
+ providers: {
30
+ openai: { apiKey: process.env.OPENAI_API_KEY! },
31
+ },
32
+ });
33
+
34
+ const agent = await client.createAgent({
35
+ name: 'Shopping Assistant',
36
+ instructions: 'You are a helpful shopping assistant.',
37
+ model: 'gpt-4o',
38
+ userId: 'user-123',
39
+ plugins: [
40
+ new EcommerceRAGPlugin({
41
+ mongoUri: process.env.MONGODB_URI!,
42
+ openaiApiKey: process.env.OPENAI_API_KEY!,
43
+ voyageApiKey: process.env.VOYAGE_API_KEY!,
44
+ tenantId: 'my-store',
45
+ }),
46
+ ],
47
+ });
48
+
49
+ const thread = await client.createThread({
50
+ agentId: agent.id,
51
+ userId: 'user-123',
52
+ });
53
+
54
+ const response = await client.chat({
55
+ threadId: thread.id,
56
+ message: 'I want red sneakers under $100',
57
+ useRAG: true,
58
+ });
59
+ ```
60
+
61
+ ## How It Works
62
+
63
+ ### 1. Query Understanding
64
+ ```
65
+ User: "red nike running shoes under $100"
66
+
67
+ Attribute Extraction (OpenAI):
68
+ {
69
+ color: "red",
70
+ brand: "nike",
71
+ category: "running shoes",
72
+ priceMax: 100
73
+ }
74
+ ```
75
+
76
+ ### 2. Vector Search
77
+ ```
78
+ Query → Embedding (Voyage) → MongoDB Atlas Vector Search
79
+
80
+ Returns: Top 50 semantically similar products
81
+ ```
82
+
83
+ ### 3. Soft Rescoring
84
+ ```
85
+ Base score + Attribute matches + Business metrics
86
+ - Color match: +0.15
87
+ - Brand match: +0.08
88
+ - Popularity: +0.05
89
+ - CTR: +0.10
90
+
91
+ Re-ranked by combined score
92
+ ```
93
+
94
+ ### 4. Optional Reranking
95
+ ```
96
+ Top 50 → Voyage Reranker → Top 10 most relevant
97
+ ```
98
+
99
+ ### 5. Context Injection
100
+ ```
101
+ Top 8 products → Formatted context → Injected into LLM prompt
102
+ ```
103
+
104
+ ## Configuration
105
+
106
+ ### Basic
107
+ ```typescript
108
+ new EcommerceRAGPlugin({
109
+ mongoUri: process.env.MONGODB_URI!,
110
+ openaiApiKey: process.env.OPENAI_API_KEY!,
111
+ voyageApiKey: process.env.VOYAGE_API_KEY!,
112
+ tenantId: 'my-store',
113
+ })
114
+ ```
115
+
116
+ ### Advanced
117
+ ```typescript
118
+ new EcommerceRAGPlugin({
119
+ // Required
120
+ mongoUri: process.env.MONGODB_URI!,
121
+ openaiApiKey: process.env.OPENAI_API_KEY!,
122
+ voyageApiKey: process.env.VOYAGE_API_KEY!,
123
+ tenantId: 'my-store',
124
+
125
+ // Customize attributes
126
+ attributeList: ['category', 'color', 'brand', 'price', 'size', 'style'],
127
+
128
+ // Tune scoring weights
129
+ rescoringWeights: {
130
+ color: 0.20,
131
+ brand: 0.15,
132
+ category: 0.10,
133
+ popularity: 0.15,
134
+ },
135
+
136
+ // Enable reranking
137
+ enableReranking: true,
138
+ rerankTopK: 5,
139
+
140
+ // Cache configuration
141
+ cache: {
142
+ embeddings: { enabled: true, ttl: 3600000, maxSize: 2000 },
143
+ attributes: { enabled: true, ttl: 1800000, maxSize: 1000 },
144
+ },
145
+
146
+ // Context
147
+ contextProductCount: 10,
148
+ language: 'en',
149
+ includeOutOfStock: false,
150
+ })
151
+ ```
152
+
153
+ ## Caching
154
+
155
+ Built-in intelligent caching for dramatic performance improvements:
156
+
157
+ ```typescript
158
+ const stats = plugin.getCacheStats();
159
+ console.log(stats);
160
+ // {
161
+ // embeddings: { hits: 1250, misses: 320, hitRate: '0.80' },
162
+ // attributes: { hits: 890, misses: 210, hitRate: '0.81' }
163
+ // }
164
+ ```
165
+
166
+ **Benefits:**
167
+ - 50-80% cost reduction
168
+ - 5-10x faster for repeat queries
169
+ - Automatic cleanup
170
+ - Zero configuration required
171
+
172
+ See [CACHING.md](./CACHING.md) for details.
173
+
174
+ ## Database Schema
175
+
176
+ ### Products Collection
177
+ ```typescript
178
+ {
179
+ tenantId: string,
180
+ agentId?: string,
181
+ sku: string,
182
+ title: string,
183
+ description?: string,
184
+ embedding: number[], // 1024-dim vector
185
+ attributes: {
186
+ category?: string,
187
+ brand?: string,
188
+ color?: string,
189
+ material?: string,
190
+ size?: string[],
191
+ price?: number,
192
+ gender?: 'M' | 'F' | 'Unisex',
193
+ },
194
+ inStock?: boolean,
195
+ metrics?: {
196
+ popularity?: number,
197
+ ctr?: number,
198
+ sales?: number,
199
+ }
200
+ }
201
+ ```
202
+
203
+ ### Required Indexes
204
+ ```javascript
205
+ // Vector search index
206
+ db.products.createSearchIndex({
207
+ name: "product_vector_index",
208
+ type: "vectorSearch",
209
+ definition: {
210
+ fields: [{
211
+ type: "vector",
212
+ path: "embedding",
213
+ numDimensions: 1024,
214
+ similarity: "cosine"
215
+ }]
216
+ }
217
+ });
218
+ ```
219
+
220
+ ## Environment Variables
221
+
222
+ ```bash
223
+ MONGODB_URI=mongodb+srv://...
224
+ OPENAI_API_KEY=sk-...
225
+ VOYAGE_API_KEY=pa-...
226
+ ```
227
+
228
+ ## Performance
229
+
230
+ | Metric | Without RAG | With RAG | With RAG + Cache |
231
+ |--------|------------|----------|------------------|
232
+ | Latency | 200ms | 600ms | 180ms |
233
+ | Cost/query | $0.0005 | $0.0008 | $0.0003 |
234
+ | Relevance | Low | High | High |
235
+
236
+ ## Examples
237
+
238
+ See [example-cache.ts](./example-cache.ts) for a complete working example with cache monitoring.
239
+
240
+ ## API Reference
241
+
242
+ ### Methods
243
+
244
+ #### `retrieveContext(message, options)`
245
+ Main retrieval method (called by SDK automatically)
246
+
247
+ **Returns:**
248
+ ```typescript
249
+ {
250
+ content: string, // Formatted product list
251
+ sources: [...], // Top products with scores
252
+ metadata: {
253
+ productCount: number,
254
+ extractedAttributes: {...},
255
+ topProducts: [...]
256
+ }
257
+ }
258
+ ```
259
+
260
+ #### `getCacheStats()`
261
+ Get cache performance statistics
262
+
263
+ #### `clearCache()`
264
+ Clear all caches
265
+
266
+ #### `disconnect()`
267
+ Cleanup MongoDB connection
268
+
269
+ ## License
270
+
271
+ MIT © ViloTech
272
+
273
+ ## Support
274
+
275
+ - [ViloTech]("https://vilotech.co")
276
+ - [Documentation](../../sdk/README.md)
277
+ - [GitHub Issues](https://github.com/vilotech/snap-agent/issues)
278
+ - [SnapAgent SDK](https://github.com/vilotech/snap-agent)
279
+
@@ -0,0 +1,246 @@
1
+ import { RAGPlugin, RAGContext, RAGDocument, IngestOptions, IngestResult, BulkOperation, BulkResult } from '@snap-agent/core';
2
+
3
+ interface URLSource {
4
+ url: string;
5
+ type: 'json' | 'csv' | 'xml' | 'api';
6
+ auth?: {
7
+ type: 'bearer' | 'basic' | 'api-key' | 'custom';
8
+ token?: string;
9
+ username?: string;
10
+ password?: string;
11
+ header?: string;
12
+ key?: string;
13
+ headers?: Record<string, string>;
14
+ };
15
+ transform?: {
16
+ documentPath?: string;
17
+ fieldMapping?: {
18
+ id?: string;
19
+ content?: string;
20
+ [key: string]: string | undefined;
21
+ };
22
+ };
23
+ headers?: Record<string, string>;
24
+ timeout?: number;
25
+ metadata?: Record<string, any>;
26
+ }
27
+ interface URLIngestResult extends IngestResult {
28
+ sourceUrl: string;
29
+ fetchedAt: Date;
30
+ documentsFetched: number;
31
+ }
32
+ interface EcommerceRAGConfig {
33
+ mongoUri: string;
34
+ dbName?: string;
35
+ collection?: string;
36
+ openaiApiKey: string;
37
+ voyageApiKey: string;
38
+ embeddingModel?: string;
39
+ tenantId: string;
40
+ attributeList?: string[];
41
+ enableAttributeExtraction?: boolean;
42
+ numCandidates?: number;
43
+ limit?: number;
44
+ vectorIndexName?: string;
45
+ rescoringWeights?: {
46
+ color?: number;
47
+ size?: number;
48
+ material?: number;
49
+ category?: number;
50
+ brand?: number;
51
+ popularity?: number;
52
+ ctr?: number;
53
+ sales?: number;
54
+ };
55
+ enableReranking?: boolean;
56
+ rerankTopK?: number;
57
+ contextProductCount?: number;
58
+ language?: 'es' | 'en';
59
+ includeOutOfStock?: boolean;
60
+ cache?: {
61
+ embeddings?: {
62
+ enabled?: boolean;
63
+ ttl?: number;
64
+ maxSize?: number;
65
+ };
66
+ attributes?: {
67
+ enabled?: boolean;
68
+ ttl?: number;
69
+ maxSize?: number;
70
+ };
71
+ };
72
+ priority?: number;
73
+ }
74
+ interface ProductDoc {
75
+ _id?: any;
76
+ tenantId: string;
77
+ agentId?: string;
78
+ sku: string;
79
+ title: string;
80
+ description?: string;
81
+ embedding: number[];
82
+ attributes: {
83
+ category?: string;
84
+ brand?: string;
85
+ color?: string;
86
+ material?: string;
87
+ size?: string[];
88
+ gender?: 'M' | 'F' | 'Unisex';
89
+ season?: string;
90
+ price?: number;
91
+ [key: string]: any;
92
+ };
93
+ inStock?: boolean;
94
+ metrics?: {
95
+ popularity?: number;
96
+ ctr?: number;
97
+ sales?: number;
98
+ };
99
+ vectorSearchScore?: number;
100
+ }
101
+ interface QueryAttrs {
102
+ category?: string;
103
+ color?: string;
104
+ gender?: string;
105
+ brand?: string;
106
+ material?: string;
107
+ size?: string;
108
+ season?: string;
109
+ priceMin?: number;
110
+ priceMax?: number;
111
+ [key: string]: any;
112
+ }
113
+ declare class EcommerceRAGPlugin implements RAGPlugin {
114
+ name: string;
115
+ type: "rag";
116
+ priority: number;
117
+ private config;
118
+ private client;
119
+ private db;
120
+ private openai;
121
+ private embeddingCache;
122
+ private attributeCache;
123
+ private cacheStats;
124
+ private cleanupInterval?;
125
+ constructor(config: EcommerceRAGConfig);
126
+ private ensureConnection;
127
+ /**
128
+ * Main retrieval method - called by the SDK
129
+ */
130
+ retrieveContext(message: string, options: {
131
+ agentId: string;
132
+ threadId?: string;
133
+ filters?: Record<string, any>;
134
+ metadata?: Record<string, any>;
135
+ }): Promise<RAGContext>;
136
+ /**
137
+ * Format context for LLM
138
+ */
139
+ formatContext(context: RAGContext): string;
140
+ /**
141
+ * Embed text using Voyage with caching
142
+ */
143
+ private embedText;
144
+ /**
145
+ * Extract attributes from user message using OpenAI with caching
146
+ */
147
+ private extractAttributes;
148
+ /**
149
+ * MongoDB Atlas Vector Search
150
+ */
151
+ private vectorSearch;
152
+ /**
153
+ * Soft rescore based on attributes and metrics
154
+ */
155
+ private softRescore;
156
+ /**
157
+ * Optional Voyage reranking
158
+ */
159
+ private rerank;
160
+ /**
161
+ * Build context string for LLM
162
+ */
163
+ private buildContextString;
164
+ /**
165
+ * Start periodic cache cleanup (remove expired entries)
166
+ */
167
+ private startCacheCleanup;
168
+ /**
169
+ * Clean up expired cache entries
170
+ */
171
+ private cleanupExpiredCache;
172
+ /**
173
+ * Get cache statistics
174
+ */
175
+ getCacheStats(): {
176
+ embeddings: {
177
+ size: number;
178
+ maxSize: number;
179
+ hits: number;
180
+ misses: number;
181
+ hitRate: string;
182
+ };
183
+ attributes: {
184
+ size: number;
185
+ maxSize: number;
186
+ hits: number;
187
+ misses: number;
188
+ hitRate: string;
189
+ };
190
+ };
191
+ /**
192
+ * Clear all caches
193
+ */
194
+ clearCache(): void;
195
+ /**
196
+ * Get MongoDB collection
197
+ */
198
+ private getCollection;
199
+ /**
200
+ * Generate embedding for a single text
201
+ */
202
+ private generateEmbedding;
203
+ /**
204
+ * Ingest products into the RAG system
205
+ * Converts RAGDocuments to ProductDocs and indexes them with embeddings
206
+ */
207
+ ingest(documents: RAGDocument[], options?: IngestOptions): Promise<IngestResult>;
208
+ /**
209
+ * Update a single product
210
+ */
211
+ update(id: string, document: Partial<RAGDocument>, options?: IngestOptions): Promise<void>;
212
+ /**
213
+ * Delete product(s) by SKU
214
+ */
215
+ delete(ids: string | string[], options?: IngestOptions): Promise<number>;
216
+ /**
217
+ * Bulk operations for efficient batch processing
218
+ */
219
+ bulk(operations: BulkOperation[], options?: IngestOptions): Promise<BulkResult>;
220
+ /**
221
+ * Generate embeddings for a batch of texts
222
+ */
223
+ private generateEmbeddingsBatch;
224
+ /**
225
+ * Ingest documents from URL source (CSV, JSON, XML, API)
226
+ */
227
+ ingestFromUrl(source: URLSource, options?: IngestOptions): Promise<URLIngestResult>;
228
+ /**
229
+ * Handle webhook payload for real-time updates
230
+ */
231
+ handleWebhook(payload: any, source: string, options?: IngestOptions): Promise<IngestResult>;
232
+ private buildAuthHeaders;
233
+ private transformJsonToDocuments;
234
+ private transformCsvToDocuments;
235
+ private transformXmlToDocuments;
236
+ private extractByPath;
237
+ private extractField;
238
+ private parseShopifyWebhook;
239
+ private parseWooCommerceWebhook;
240
+ /**
241
+ * Cleanup resources and close connections
242
+ */
243
+ disconnect(): Promise<void>;
244
+ }
245
+
246
+ export { type EcommerceRAGConfig, EcommerceRAGPlugin, type ProductDoc, type QueryAttrs, type URLIngestResult, type URLSource };