rust-kgdb 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1236 -1971
- package/examples/business-assertions.test.ts +1196 -0
- package/examples/core-concepts-demo.ts +502 -0
- package/examples/datalog-example.ts +478 -0
- package/examples/embeddings-example.ts +376 -0
- package/examples/graphframes-example.ts +367 -0
- package/examples/hypermind-fraud-underwriter.ts +669 -0
- package/examples/pregel-example.ts +399 -0
- package/package.json +3 -2
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings Example for rust-kgdb TypeScript SDK
|
|
3
|
+
*
|
|
4
|
+
* Demonstrates vector embedding capabilities including:
|
|
5
|
+
* - Storing and retrieving embeddings
|
|
6
|
+
* - Similarity search with HNSW
|
|
7
|
+
* - Composite multi-provider embeddings
|
|
8
|
+
* - Aggregation strategies (RRF, voting, max)
|
|
9
|
+
* - 1-hop ARCADE neighbor cache
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { EmbeddingService, AggregationStrategy } from 'rust-kgdb';
|
|
13
|
+
|
|
14
|
+
// =============================================================================
|
|
15
|
+
// Example 1: Basic Embedding Storage
|
|
16
|
+
// =============================================================================
|
|
17
|
+
|
|
18
|
+
async function basicEmbeddingExample() {
|
|
19
|
+
console.log('=== Basic Embedding Storage ===\n');
|
|
20
|
+
|
|
21
|
+
const embeddingService = new EmbeddingService();
|
|
22
|
+
|
|
23
|
+
// Store embeddings for entities
|
|
24
|
+
const entities = [
|
|
25
|
+
{ id: 'http://example.org/apple', name: 'Apple Inc' },
|
|
26
|
+
{ id: 'http://example.org/microsoft', name: 'Microsoft Corp' },
|
|
27
|
+
{ id: 'http://example.org/google', name: 'Google LLC' },
|
|
28
|
+
{ id: 'http://example.org/amazon', name: 'Amazon.com' },
|
|
29
|
+
{ id: 'http://example.org/tesla', name: 'Tesla Inc' }
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
// Generate mock embeddings (384 dimensions)
|
|
33
|
+
for (const entity of entities) {
|
|
34
|
+
// In production, use actual embedding providers
|
|
35
|
+
const embedding = generateMockEmbedding(384, entity.id);
|
|
36
|
+
embeddingService.storeEmbedding(entity.id, embedding);
|
|
37
|
+
console.log(`Stored embedding for ${entity.name} (${embedding.length} dims)`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Retrieve an embedding
|
|
41
|
+
const appleEmbedding = embeddingService.getEmbedding('http://example.org/apple');
|
|
42
|
+
if (appleEmbedding) {
|
|
43
|
+
console.log(`\nRetrieved Apple embedding: [${appleEmbedding.slice(0, 5).join(', ')}...]`);
|
|
44
|
+
}
|
|
45
|
+
console.log();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// =============================================================================
|
|
49
|
+
// Example 2: Similarity Search with HNSW
|
|
50
|
+
// =============================================================================
|
|
51
|
+
|
|
52
|
+
async function similaritySearchExample() {
|
|
53
|
+
console.log('=== Similarity Search with HNSW ===\n');
|
|
54
|
+
|
|
55
|
+
const embeddingService = new EmbeddingService();
|
|
56
|
+
|
|
57
|
+
// Create a product catalog with embeddings
|
|
58
|
+
const products = [
|
|
59
|
+
{ id: 'product/laptop-1', category: 'electronics', name: 'MacBook Pro' },
|
|
60
|
+
{ id: 'product/laptop-2', category: 'electronics', name: 'ThinkPad X1' },
|
|
61
|
+
{ id: 'product/phone-1', category: 'electronics', name: 'iPhone 15' },
|
|
62
|
+
{ id: 'product/phone-2', category: 'electronics', name: 'Galaxy S24' },
|
|
63
|
+
{ id: 'product/tablet-1', category: 'electronics', name: 'iPad Pro' },
|
|
64
|
+
{ id: 'product/shirt-1', category: 'clothing', name: 'Cotton T-Shirt' },
|
|
65
|
+
{ id: 'product/jeans-1', category: 'clothing', name: 'Denim Jeans' },
|
|
66
|
+
{ id: 'product/book-1', category: 'books', name: 'AI Handbook' },
|
|
67
|
+
{ id: 'product/book-2', category: 'books', name: 'ML Guide' }
|
|
68
|
+
];
|
|
69
|
+
|
|
70
|
+
// Store embeddings with category-aware vectors
|
|
71
|
+
for (const product of products) {
|
|
72
|
+
const embedding = generateCategoryEmbedding(384, product.category, product.name);
|
|
73
|
+
embeddingService.storeEmbedding(product.id, embedding);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
console.log(`Indexed ${products.length} products\n`);
|
|
77
|
+
|
|
78
|
+
// Find similar products to MacBook Pro
|
|
79
|
+
const queryId = 'product/laptop-1';
|
|
80
|
+
const k = 5;
|
|
81
|
+
const threshold = 0.5;
|
|
82
|
+
|
|
83
|
+
console.log(`Finding top ${k} products similar to MacBook Pro:`);
|
|
84
|
+
const similar = embeddingService.findSimilar(queryId, k, threshold);
|
|
85
|
+
const results = JSON.parse(similar);
|
|
86
|
+
|
|
87
|
+
for (const result of results) {
|
|
88
|
+
const product = products.find(p => p.id === result.entity);
|
|
89
|
+
console.log(` ${product?.name}: similarity=${result.similarity.toFixed(4)}`);
|
|
90
|
+
}
|
|
91
|
+
console.log();
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// =============================================================================
|
|
95
|
+
// Example 3: Composite Multi-Provider Embeddings
|
|
96
|
+
// =============================================================================
|
|
97
|
+
|
|
98
|
+
async function compositeEmbeddingExample() {
|
|
99
|
+
console.log('=== Composite Multi-Provider Embeddings ===\n');
|
|
100
|
+
|
|
101
|
+
const embeddingService = new EmbeddingService();
|
|
102
|
+
|
|
103
|
+
// Entity with multiple embedding representations
|
|
104
|
+
const entityId = 'http://example.org/apple-inc';
|
|
105
|
+
|
|
106
|
+
// Store embeddings from multiple providers
|
|
107
|
+
const compositeEmbeddings = {
|
|
108
|
+
// OpenAI text-embedding-3-small (1536 dims, but we simulate with 384)
|
|
109
|
+
openai: generateProviderEmbedding(384, 'openai', 'Apple Inc technology company'),
|
|
110
|
+
|
|
111
|
+
// Voyage AI voyage-2 (1024 dims, simulated)
|
|
112
|
+
voyage: generateProviderEmbedding(384, 'voyage', 'Apple Inc technology company'),
|
|
113
|
+
|
|
114
|
+
// Cohere embed-v3 (1024 dims, simulated)
|
|
115
|
+
cohere: generateProviderEmbedding(384, 'cohere', 'Apple Inc technology company'),
|
|
116
|
+
|
|
117
|
+
// Local RDF2Vec structural embedding
|
|
118
|
+
rdf2vec: generateProviderEmbedding(384, 'rdf2vec', 'http://example.org/apple-inc')
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
// Store as composite embedding
|
|
122
|
+
embeddingService.storeComposite(entityId, JSON.stringify(compositeEmbeddings));
|
|
123
|
+
|
|
124
|
+
console.log('Stored composite embedding with providers:');
|
|
125
|
+
for (const provider of Object.keys(compositeEmbeddings)) {
|
|
126
|
+
console.log(` - ${provider}: ${compositeEmbeddings[provider].length} dimensions`);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Retrieve composite embedding
|
|
130
|
+
const retrieved = embeddingService.getComposite(entityId);
|
|
131
|
+
if (retrieved) {
|
|
132
|
+
const composite = JSON.parse(retrieved);
|
|
133
|
+
console.log(`\nRetrieved composite with ${Object.keys(composite.embeddings).length} providers`);
|
|
134
|
+
}
|
|
135
|
+
console.log();
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// =============================================================================
|
|
139
|
+
// Example 4: Multi-Provider Similarity with Aggregation
|
|
140
|
+
// =============================================================================
|
|
141
|
+
|
|
142
|
+
async function aggregationExample() {
|
|
143
|
+
console.log('=== Multi-Provider Similarity with Aggregation ===\n');
|
|
144
|
+
|
|
145
|
+
const embeddingService = new EmbeddingService();
|
|
146
|
+
|
|
147
|
+
// Create entities with composite embeddings
|
|
148
|
+
const entities = [
|
|
149
|
+
'http://example.org/apple',
|
|
150
|
+
'http://example.org/google',
|
|
151
|
+
'http://example.org/microsoft',
|
|
152
|
+
'http://example.org/amazon',
|
|
153
|
+
'http://example.org/meta'
|
|
154
|
+
];
|
|
155
|
+
|
|
156
|
+
// Store composite embeddings for each
|
|
157
|
+
for (const entityId of entities) {
|
|
158
|
+
const composite = {
|
|
159
|
+
openai: generateProviderEmbedding(384, 'openai', entityId),
|
|
160
|
+
voyage: generateProviderEmbedding(384, 'voyage', entityId),
|
|
161
|
+
cohere: generateProviderEmbedding(384, 'cohere', entityId)
|
|
162
|
+
};
|
|
163
|
+
embeddingService.storeComposite(entityId, JSON.stringify(composite));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
console.log(`Stored composite embeddings for ${entities.length} entities\n`);
|
|
167
|
+
|
|
168
|
+
const queryEntity = 'http://example.org/apple';
|
|
169
|
+
const k = 3;
|
|
170
|
+
const threshold = 0.3;
|
|
171
|
+
|
|
172
|
+
// Test different aggregation strategies
|
|
173
|
+
const strategies: AggregationStrategy[] = ['rrf', 'max', 'voting'];
|
|
174
|
+
|
|
175
|
+
for (const strategy of strategies) {
|
|
176
|
+
console.log(`Aggregation Strategy: ${strategy.toUpperCase()}`);
|
|
177
|
+
|
|
178
|
+
const results = embeddingService.findSimilarComposite(
|
|
179
|
+
queryEntity,
|
|
180
|
+
k,
|
|
181
|
+
threshold,
|
|
182
|
+
strategy
|
|
183
|
+
);
|
|
184
|
+
|
|
185
|
+
const parsed = JSON.parse(results);
|
|
186
|
+
for (const result of parsed) {
|
|
187
|
+
console.log(` ${result.entity}: score=${result.similarity.toFixed(4)}`);
|
|
188
|
+
}
|
|
189
|
+
console.log();
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// =============================================================================
|
|
194
|
+
// Example 5: 1-Hop ARCADE Neighbor Cache
|
|
195
|
+
// =============================================================================
|
|
196
|
+
|
|
197
|
+
async function arcadeNeighborExample() {
|
|
198
|
+
console.log('=== 1-Hop ARCADE Neighbor Cache ===\n');
|
|
199
|
+
|
|
200
|
+
const embeddingService = new EmbeddingService();
|
|
201
|
+
|
|
202
|
+
// Build a knowledge graph with edges
|
|
203
|
+
const edges = [
|
|
204
|
+
['alice', 'knows', 'bob'],
|
|
205
|
+
['alice', 'knows', 'charlie'],
|
|
206
|
+
['bob', 'knows', 'david'],
|
|
207
|
+
['charlie', 'knows', 'eve'],
|
|
208
|
+
['david', 'works_with', 'eve']
|
|
209
|
+
];
|
|
210
|
+
|
|
211
|
+
// Add edges to ARCADE cache
|
|
212
|
+
for (const [src, , dst] of edges) {
|
|
213
|
+
embeddingService.addEdge(src, dst);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
console.log(`Added ${edges.length} edges to ARCADE cache\n`);
|
|
217
|
+
|
|
218
|
+
// Query 1-hop neighbors
|
|
219
|
+
const testEntities = ['alice', 'bob', 'charlie', 'eve'];
|
|
220
|
+
|
|
221
|
+
for (const entity of testEntities) {
|
|
222
|
+
const neighbors = embeddingService.getNeighbors(entity, 'both');
|
|
223
|
+
const neighborList = JSON.parse(neighbors);
|
|
224
|
+
console.log(`${entity}'s neighbors: [${neighborList.join(', ')}]`);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Find similar neighbors (combining structure + embeddings)
|
|
228
|
+
console.log('\nFinding similar 1-hop neighbors for Alice:');
|
|
229
|
+
const similarNeighbors = embeddingService.findSimilarNeighbors('alice', 10, 0.3);
|
|
230
|
+
const parsed = JSON.parse(similarNeighbors);
|
|
231
|
+
for (const result of parsed) {
|
|
232
|
+
console.log(` ${result.entity}: similarity=${result.similarity.toFixed(4)}`);
|
|
233
|
+
}
|
|
234
|
+
console.log();
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// =============================================================================
|
|
238
|
+
// Example 6: Embedding Statistics and Metrics
|
|
239
|
+
// =============================================================================
|
|
240
|
+
|
|
241
|
+
async function metricsExample() {
|
|
242
|
+
console.log('=== Embedding Statistics and Metrics ===\n');
|
|
243
|
+
|
|
244
|
+
const embeddingService = new EmbeddingService();
|
|
245
|
+
|
|
246
|
+
// Populate with test data
|
|
247
|
+
for (let i = 0; i < 100; i++) {
|
|
248
|
+
const entityId = `entity-${i}`;
|
|
249
|
+
const embedding = generateMockEmbedding(384, entityId);
|
|
250
|
+
embeddingService.storeEmbedding(entityId, embedding);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Get service metrics
|
|
254
|
+
const metricsJson = embeddingService.getMetrics();
|
|
255
|
+
const metrics = JSON.parse(metricsJson);
|
|
256
|
+
|
|
257
|
+
console.log('Embedding Service Metrics:');
|
|
258
|
+
console.log(` Total embeddings: ${metrics.embedding_count}`);
|
|
259
|
+
console.log(` HNSW index size: ${metrics.hnsw_size}`);
|
|
260
|
+
console.log(` Storage size (bytes): ${metrics.storage_bytes}`);
|
|
261
|
+
|
|
262
|
+
// Get cache statistics
|
|
263
|
+
const cacheStatsJson = embeddingService.getCacheStats();
|
|
264
|
+
const cacheStats = JSON.parse(cacheStatsJson);
|
|
265
|
+
|
|
266
|
+
console.log('\nARCADE Cache Statistics:');
|
|
267
|
+
console.log(` Cache entries: ${cacheStats.entries}`);
|
|
268
|
+
console.log(` Hit rate: ${(cacheStats.hit_rate * 100).toFixed(1)}%`);
|
|
269
|
+
console.log(` Memory usage: ${cacheStats.memory_bytes} bytes`);
|
|
270
|
+
console.log();
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// =============================================================================
|
|
274
|
+
// Helper Functions
|
|
275
|
+
// =============================================================================
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Generate a deterministic mock embedding based on entity ID
|
|
279
|
+
*/
|
|
280
|
+
function generateMockEmbedding(dimensions: number, entityId: string): number[] {
|
|
281
|
+
const hash = simpleHash(entityId);
|
|
282
|
+
const embedding: number[] = [];
|
|
283
|
+
|
|
284
|
+
for (let i = 0; i < dimensions; i++) {
|
|
285
|
+
// Generate pseudo-random values based on hash and index
|
|
286
|
+
const value = Math.sin(hash + i * 0.1) * 0.5 + 0.5;
|
|
287
|
+
embedding.push(value);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Normalize to unit length
|
|
291
|
+
const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
|
|
292
|
+
return embedding.map(v => v / norm);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* Generate embedding with category bias
|
|
297
|
+
*/
|
|
298
|
+
function generateCategoryEmbedding(dimensions: number, category: string, name: string): number[] {
|
|
299
|
+
const categoryHash = simpleHash(category);
|
|
300
|
+
const nameHash = simpleHash(name);
|
|
301
|
+
const embedding: number[] = [];
|
|
302
|
+
|
|
303
|
+
for (let i = 0; i < dimensions; i++) {
|
|
304
|
+
// Combine category and name influence
|
|
305
|
+
const categoryInfluence = Math.sin(categoryHash + i * 0.05) * 0.3;
|
|
306
|
+
const nameInfluence = Math.sin(nameHash + i * 0.1) * 0.5;
|
|
307
|
+
const value = 0.5 + categoryInfluence + nameInfluence;
|
|
308
|
+
embedding.push(Math.max(0, Math.min(1, value)));
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Normalize
|
|
312
|
+
const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
|
|
313
|
+
return embedding.map(v => v / norm);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
/**
|
|
317
|
+
* Generate provider-specific embedding
|
|
318
|
+
*/
|
|
319
|
+
function generateProviderEmbedding(dimensions: number, provider: string, text: string): number[] {
|
|
320
|
+
const providerHash = simpleHash(provider);
|
|
321
|
+
const textHash = simpleHash(text);
|
|
322
|
+
const embedding: number[] = [];
|
|
323
|
+
|
|
324
|
+
for (let i = 0; i < dimensions; i++) {
|
|
325
|
+
// Each provider has slightly different embedding characteristics
|
|
326
|
+
const providerBias = Math.cos(providerHash + i * 0.02) * 0.2;
|
|
327
|
+
const textValue = Math.sin(textHash + i * 0.08) * 0.6;
|
|
328
|
+
const value = 0.5 + providerBias + textValue;
|
|
329
|
+
embedding.push(Math.max(0, Math.min(1, value)));
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// Normalize
|
|
333
|
+
const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
|
|
334
|
+
return embedding.map(v => v / norm);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Simple string hash function
|
|
339
|
+
*/
|
|
340
|
+
function simpleHash(str: string): number {
|
|
341
|
+
let hash = 0;
|
|
342
|
+
for (let i = 0; i < str.length; i++) {
|
|
343
|
+
const char = str.charCodeAt(i);
|
|
344
|
+
hash = ((hash << 5) - hash) + char;
|
|
345
|
+
hash = hash & hash; // Convert to 32bit integer
|
|
346
|
+
}
|
|
347
|
+
return Math.abs(hash);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// =============================================================================
|
|
351
|
+
// Run All Examples
|
|
352
|
+
// =============================================================================
|
|
353
|
+
|
|
354
|
+
async function main() {
|
|
355
|
+
console.log('========================================');
|
|
356
|
+
console.log(' Embeddings SDK Examples');
|
|
357
|
+
console.log('========================================\n');
|
|
358
|
+
|
|
359
|
+
try {
|
|
360
|
+
await basicEmbeddingExample();
|
|
361
|
+
await similaritySearchExample();
|
|
362
|
+
await compositeEmbeddingExample();
|
|
363
|
+
await aggregationExample();
|
|
364
|
+
await arcadeNeighborExample();
|
|
365
|
+
await metricsExample();
|
|
366
|
+
|
|
367
|
+
console.log('========================================');
|
|
368
|
+
console.log(' All examples completed successfully!');
|
|
369
|
+
console.log('========================================');
|
|
370
|
+
} catch (error) {
|
|
371
|
+
console.error('Error running examples:', error);
|
|
372
|
+
process.exit(1);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
main();
|