rust-kgdb 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,376 @@
1
+ /**
2
+ * Embeddings Example for rust-kgdb TypeScript SDK
3
+ *
4
+ * Demonstrates vector embedding capabilities including:
5
+ * - Storing and retrieving embeddings
6
+ * - Similarity search with HNSW
7
+ * - Composite multi-provider embeddings
8
+ * - Aggregation strategies (RRF, voting, max)
9
+ * - 1-hop ARCADE neighbor cache
10
+ */
11
+
12
+ import { EmbeddingService, AggregationStrategy } from 'rust-kgdb';
13
+
14
+ // =============================================================================
15
+ // Example 1: Basic Embedding Storage
16
+ // =============================================================================
17
+
18
+ async function basicEmbeddingExample() {
19
+ console.log('=== Basic Embedding Storage ===\n');
20
+
21
+ const embeddingService = new EmbeddingService();
22
+
23
+ // Store embeddings for entities
24
+ const entities = [
25
+ { id: 'http://example.org/apple', name: 'Apple Inc' },
26
+ { id: 'http://example.org/microsoft', name: 'Microsoft Corp' },
27
+ { id: 'http://example.org/google', name: 'Google LLC' },
28
+ { id: 'http://example.org/amazon', name: 'Amazon.com' },
29
+ { id: 'http://example.org/tesla', name: 'Tesla Inc' }
30
+ ];
31
+
32
+ // Generate mock embeddings (384 dimensions)
33
+ for (const entity of entities) {
34
+ // In production, use actual embedding providers
35
+ const embedding = generateMockEmbedding(384, entity.id);
36
+ embeddingService.storeEmbedding(entity.id, embedding);
37
+ console.log(`Stored embedding for ${entity.name} (${embedding.length} dims)`);
38
+ }
39
+
40
+ // Retrieve an embedding
41
+ const appleEmbedding = embeddingService.getEmbedding('http://example.org/apple');
42
+ if (appleEmbedding) {
43
+ console.log(`\nRetrieved Apple embedding: [${appleEmbedding.slice(0, 5).join(', ')}...]`);
44
+ }
45
+ console.log();
46
+ }
47
+
48
+ // =============================================================================
49
+ // Example 2: Similarity Search with HNSW
50
+ // =============================================================================
51
+
52
+ async function similaritySearchExample() {
53
+ console.log('=== Similarity Search with HNSW ===\n');
54
+
55
+ const embeddingService = new EmbeddingService();
56
+
57
+ // Create a product catalog with embeddings
58
+ const products = [
59
+ { id: 'product/laptop-1', category: 'electronics', name: 'MacBook Pro' },
60
+ { id: 'product/laptop-2', category: 'electronics', name: 'ThinkPad X1' },
61
+ { id: 'product/phone-1', category: 'electronics', name: 'iPhone 15' },
62
+ { id: 'product/phone-2', category: 'electronics', name: 'Galaxy S24' },
63
+ { id: 'product/tablet-1', category: 'electronics', name: 'iPad Pro' },
64
+ { id: 'product/shirt-1', category: 'clothing', name: 'Cotton T-Shirt' },
65
+ { id: 'product/jeans-1', category: 'clothing', name: 'Denim Jeans' },
66
+ { id: 'product/book-1', category: 'books', name: 'AI Handbook' },
67
+ { id: 'product/book-2', category: 'books', name: 'ML Guide' }
68
+ ];
69
+
70
+ // Store embeddings with category-aware vectors
71
+ for (const product of products) {
72
+ const embedding = generateCategoryEmbedding(384, product.category, product.name);
73
+ embeddingService.storeEmbedding(product.id, embedding);
74
+ }
75
+
76
+ console.log(`Indexed ${products.length} products\n`);
77
+
78
+ // Find similar products to MacBook Pro
79
+ const queryId = 'product/laptop-1';
80
+ const k = 5;
81
+ const threshold = 0.5;
82
+
83
+ console.log(`Finding top ${k} products similar to MacBook Pro:`);
84
+ const similar = embeddingService.findSimilar(queryId, k, threshold);
85
+ const results = JSON.parse(similar);
86
+
87
+ for (const result of results) {
88
+ const product = products.find(p => p.id === result.entity);
89
+ console.log(` ${product?.name}: similarity=${result.similarity.toFixed(4)}`);
90
+ }
91
+ console.log();
92
+ }
93
+
94
+ // =============================================================================
95
+ // Example 3: Composite Multi-Provider Embeddings
96
+ // =============================================================================
97
+
98
+ async function compositeEmbeddingExample() {
99
+ console.log('=== Composite Multi-Provider Embeddings ===\n');
100
+
101
+ const embeddingService = new EmbeddingService();
102
+
103
+ // Entity with multiple embedding representations
104
+ const entityId = 'http://example.org/apple-inc';
105
+
106
+ // Store embeddings from multiple providers
107
+ const compositeEmbeddings = {
108
+ // OpenAI text-embedding-3-small (1536 dims, but we simulate with 384)
109
+ openai: generateProviderEmbedding(384, 'openai', 'Apple Inc technology company'),
110
+
111
+ // Voyage AI voyage-2 (1024 dims, simulated)
112
+ voyage: generateProviderEmbedding(384, 'voyage', 'Apple Inc technology company'),
113
+
114
+ // Cohere embed-v3 (1024 dims, simulated)
115
+ cohere: generateProviderEmbedding(384, 'cohere', 'Apple Inc technology company'),
116
+
117
+ // Local RDF2Vec structural embedding
118
+ rdf2vec: generateProviderEmbedding(384, 'rdf2vec', 'http://example.org/apple-inc')
119
+ };
120
+
121
+ // Store as composite embedding
122
+ embeddingService.storeComposite(entityId, JSON.stringify(compositeEmbeddings));
123
+
124
+ console.log('Stored composite embedding with providers:');
125
+ for (const provider of Object.keys(compositeEmbeddings)) {
126
+ console.log(` - ${provider}: ${compositeEmbeddings[provider].length} dimensions`);
127
+ }
128
+
129
+ // Retrieve composite embedding
130
+ const retrieved = embeddingService.getComposite(entityId);
131
+ if (retrieved) {
132
+ const composite = JSON.parse(retrieved);
133
+ console.log(`\nRetrieved composite with ${Object.keys(composite.embeddings).length} providers`);
134
+ }
135
+ console.log();
136
+ }
137
+
138
+ // =============================================================================
139
+ // Example 4: Multi-Provider Similarity with Aggregation
140
+ // =============================================================================
141
+
142
+ async function aggregationExample() {
143
+ console.log('=== Multi-Provider Similarity with Aggregation ===\n');
144
+
145
+ const embeddingService = new EmbeddingService();
146
+
147
+ // Create entities with composite embeddings
148
+ const entities = [
149
+ 'http://example.org/apple',
150
+ 'http://example.org/google',
151
+ 'http://example.org/microsoft',
152
+ 'http://example.org/amazon',
153
+ 'http://example.org/meta'
154
+ ];
155
+
156
+ // Store composite embeddings for each
157
+ for (const entityId of entities) {
158
+ const composite = {
159
+ openai: generateProviderEmbedding(384, 'openai', entityId),
160
+ voyage: generateProviderEmbedding(384, 'voyage', entityId),
161
+ cohere: generateProviderEmbedding(384, 'cohere', entityId)
162
+ };
163
+ embeddingService.storeComposite(entityId, JSON.stringify(composite));
164
+ }
165
+
166
+ console.log(`Stored composite embeddings for ${entities.length} entities\n`);
167
+
168
+ const queryEntity = 'http://example.org/apple';
169
+ const k = 3;
170
+ const threshold = 0.3;
171
+
172
+ // Test different aggregation strategies
173
+ const strategies: AggregationStrategy[] = ['rrf', 'max', 'voting'];
174
+
175
+ for (const strategy of strategies) {
176
+ console.log(`Aggregation Strategy: ${strategy.toUpperCase()}`);
177
+
178
+ const results = embeddingService.findSimilarComposite(
179
+ queryEntity,
180
+ k,
181
+ threshold,
182
+ strategy
183
+ );
184
+
185
+ const parsed = JSON.parse(results);
186
+ for (const result of parsed) {
187
+ console.log(` ${result.entity}: score=${result.similarity.toFixed(4)}`);
188
+ }
189
+ console.log();
190
+ }
191
+ }
192
+
193
+ // =============================================================================
194
+ // Example 5: 1-Hop ARCADE Neighbor Cache
195
+ // =============================================================================
196
+
197
+ async function arcadeNeighborExample() {
198
+ console.log('=== 1-Hop ARCADE Neighbor Cache ===\n');
199
+
200
+ const embeddingService = new EmbeddingService();
201
+
202
+ // Build a knowledge graph with edges
203
+ const edges = [
204
+ ['alice', 'knows', 'bob'],
205
+ ['alice', 'knows', 'charlie'],
206
+ ['bob', 'knows', 'david'],
207
+ ['charlie', 'knows', 'eve'],
208
+ ['david', 'works_with', 'eve']
209
+ ];
210
+
211
+ // Add edges to ARCADE cache
212
+ for (const [src, , dst] of edges) {
213
+ embeddingService.addEdge(src, dst);
214
+ }
215
+
216
+ console.log(`Added ${edges.length} edges to ARCADE cache\n`);
217
+
218
+ // Query 1-hop neighbors
219
+ const testEntities = ['alice', 'bob', 'charlie', 'eve'];
220
+
221
+ for (const entity of testEntities) {
222
+ const neighbors = embeddingService.getNeighbors(entity, 'both');
223
+ const neighborList = JSON.parse(neighbors);
224
+ console.log(`${entity}'s neighbors: [${neighborList.join(', ')}]`);
225
+ }
226
+
227
+ // Find similar neighbors (combining structure + embeddings)
228
+ console.log('\nFinding similar 1-hop neighbors for Alice:');
229
+ const similarNeighbors = embeddingService.findSimilarNeighbors('alice', 10, 0.3);
230
+ const parsed = JSON.parse(similarNeighbors);
231
+ for (const result of parsed) {
232
+ console.log(` ${result.entity}: similarity=${result.similarity.toFixed(4)}`);
233
+ }
234
+ console.log();
235
+ }
236
+
237
+ // =============================================================================
238
+ // Example 6: Embedding Statistics and Metrics
239
+ // =============================================================================
240
+
241
+ async function metricsExample() {
242
+ console.log('=== Embedding Statistics and Metrics ===\n');
243
+
244
+ const embeddingService = new EmbeddingService();
245
+
246
+ // Populate with test data
247
+ for (let i = 0; i < 100; i++) {
248
+ const entityId = `entity-${i}`;
249
+ const embedding = generateMockEmbedding(384, entityId);
250
+ embeddingService.storeEmbedding(entityId, embedding);
251
+ }
252
+
253
+ // Get service metrics
254
+ const metricsJson = embeddingService.getMetrics();
255
+ const metrics = JSON.parse(metricsJson);
256
+
257
+ console.log('Embedding Service Metrics:');
258
+ console.log(` Total embeddings: ${metrics.embedding_count}`);
259
+ console.log(` HNSW index size: ${metrics.hnsw_size}`);
260
+ console.log(` Storage size (bytes): ${metrics.storage_bytes}`);
261
+
262
+ // Get cache statistics
263
+ const cacheStatsJson = embeddingService.getCacheStats();
264
+ const cacheStats = JSON.parse(cacheStatsJson);
265
+
266
+ console.log('\nARCADE Cache Statistics:');
267
+ console.log(` Cache entries: ${cacheStats.entries}`);
268
+ console.log(` Hit rate: ${(cacheStats.hit_rate * 100).toFixed(1)}%`);
269
+ console.log(` Memory usage: ${cacheStats.memory_bytes} bytes`);
270
+ console.log();
271
+ }
272
+
273
+ // =============================================================================
274
+ // Helper Functions
275
+ // =============================================================================
276
+
277
+ /**
278
+ * Generate a deterministic mock embedding based on entity ID
279
+ */
280
+ function generateMockEmbedding(dimensions: number, entityId: string): number[] {
281
+ const hash = simpleHash(entityId);
282
+ const embedding: number[] = [];
283
+
284
+ for (let i = 0; i < dimensions; i++) {
285
+ // Generate pseudo-random values based on hash and index
286
+ const value = Math.sin(hash + i * 0.1) * 0.5 + 0.5;
287
+ embedding.push(value);
288
+ }
289
+
290
+ // Normalize to unit length
291
+ const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
292
+ return embedding.map(v => v / norm);
293
+ }
294
+
295
+ /**
296
+ * Generate embedding with category bias
297
+ */
298
+ function generateCategoryEmbedding(dimensions: number, category: string, name: string): number[] {
299
+ const categoryHash = simpleHash(category);
300
+ const nameHash = simpleHash(name);
301
+ const embedding: number[] = [];
302
+
303
+ for (let i = 0; i < dimensions; i++) {
304
+ // Combine category and name influence
305
+ const categoryInfluence = Math.sin(categoryHash + i * 0.05) * 0.3;
306
+ const nameInfluence = Math.sin(nameHash + i * 0.1) * 0.5;
307
+ const value = 0.5 + categoryInfluence + nameInfluence;
308
+ embedding.push(Math.max(0, Math.min(1, value)));
309
+ }
310
+
311
+ // Normalize
312
+ const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
313
+ return embedding.map(v => v / norm);
314
+ }
315
+
316
+ /**
317
+ * Generate provider-specific embedding
318
+ */
319
+ function generateProviderEmbedding(dimensions: number, provider: string, text: string): number[] {
320
+ const providerHash = simpleHash(provider);
321
+ const textHash = simpleHash(text);
322
+ const embedding: number[] = [];
323
+
324
+ for (let i = 0; i < dimensions; i++) {
325
+ // Each provider has slightly different embedding characteristics
326
+ const providerBias = Math.cos(providerHash + i * 0.02) * 0.2;
327
+ const textValue = Math.sin(textHash + i * 0.08) * 0.6;
328
+ const value = 0.5 + providerBias + textValue;
329
+ embedding.push(Math.max(0, Math.min(1, value)));
330
+ }
331
+
332
+ // Normalize
333
+ const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
334
+ return embedding.map(v => v / norm);
335
+ }
336
+
337
+ /**
338
+ * Simple string hash function
339
+ */
340
+ function simpleHash(str: string): number {
341
+ let hash = 0;
342
+ for (let i = 0; i < str.length; i++) {
343
+ const char = str.charCodeAt(i);
344
+ hash = ((hash << 5) - hash) + char;
345
+ hash = hash & hash; // Convert to 32bit integer
346
+ }
347
+ return Math.abs(hash);
348
+ }
349
+
350
+ // =============================================================================
351
+ // Run All Examples
352
+ // =============================================================================
353
+
354
+ async function main() {
355
+ console.log('========================================');
356
+ console.log(' Embeddings SDK Examples');
357
+ console.log('========================================\n');
358
+
359
+ try {
360
+ await basicEmbeddingExample();
361
+ await similaritySearchExample();
362
+ await compositeEmbeddingExample();
363
+ await aggregationExample();
364
+ await arcadeNeighborExample();
365
+ await metricsExample();
366
+
367
+ console.log('========================================');
368
+ console.log(' All examples completed successfully!');
369
+ console.log('========================================');
370
+ } catch (error) {
371
+ console.error('Error running examples:', error);
372
+ process.exit(1);
373
+ }
374
+ }
375
+
376
+ main();