rust-kgdb 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +92 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -16,6 +16,8 @@
|
|
|
16
16
|
| **GraphDB** | Core RDF/SPARQL database with 100% W3C compliance |
|
|
17
17
|
| **GraphFrames** | Spark-compatible graph analytics (PageRank, triangles, motifs) |
|
|
18
18
|
| **EmbeddingService** | Vector similarity search, text search, multi-provider embeddings |
|
|
19
|
+
| **Embedding Triggers** | Automatic embedding generation on INSERT/UPDATE/DELETE |
|
|
20
|
+
| **Embedding Providers** | OpenAI, Voyage, Cohere, Anthropic, Mistral, Jina, Ollama, HF-TEI |
|
|
19
21
|
| **DatalogProgram** | Rule-based reasoning with transitive closure |
|
|
20
22
|
| **Pregel** | Bulk Synchronous Parallel graph processing |
|
|
21
23
|
| **Hypergraph** | Native hyperedge support beyond RDF triples |
|
|
@@ -178,6 +180,11 @@ console.log('Composite embedding:', composite ? 'stored' : 'not found')
|
|
|
178
180
|
// Count composite embeddings
|
|
179
181
|
console.log('Total composites:', service.countComposites())
|
|
180
182
|
|
|
183
|
+
// === Composite Similarity Search (RRF Aggregation) ===
|
|
184
|
+
// Find similar using Reciprocal Rank Fusion across multiple providers
|
|
185
|
+
const compositeSimilar = JSON.parse(service.findSimilarComposite('product_123', 10, 0.5, 'rrf'))
|
|
186
|
+
console.log('Similar (composite RRF):', compositeSimilar)
|
|
187
|
+
|
|
181
188
|
// === Use Case: Semantic Product Search ===
|
|
182
189
|
// Store product embeddings
|
|
183
190
|
const products = ['laptop', 'phone', 'tablet', 'keyboard', 'mouse']
|
|
@@ -192,6 +199,91 @@ const relatedToLaptop = JSON.parse(service.findSimilar('laptop', 5, 0.0))
|
|
|
192
199
|
console.log('Products similar to laptop:', relatedToLaptop)
|
|
193
200
|
```
|
|
194
201
|
|
|
202
|
+
### 3b. Embedding Triggers (Automatic Embedding Generation)
|
|
203
|
+
|
|
204
|
+
```javascript
|
|
205
|
+
// Triggers automatically generate embeddings when data changes
|
|
206
|
+
// Configure triggers to fire on INSERT/UPDATE/DELETE events
|
|
207
|
+
|
|
208
|
+
// Example: Auto-embed new entities on insert
|
|
209
|
+
const triggerConfig = {
|
|
210
|
+
name: 'auto_embed_on_insert',
|
|
211
|
+
event: 'AfterInsert',
|
|
212
|
+
action: {
|
|
213
|
+
type: 'GenerateEmbedding',
|
|
214
|
+
source: 'Subject', // Embed the subject of the triple
|
|
215
|
+
provider: 'openai' // Use OpenAI provider
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Multiple triggers for different providers
|
|
220
|
+
const triggers = [
|
|
221
|
+
{ name: 'embed_openai', provider: 'openai' },
|
|
222
|
+
{ name: 'embed_voyage', provider: 'voyage' },
|
|
223
|
+
{ name: 'embed_cohere', provider: 'cohere' }
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
// Each trigger fires independently, creating composite embeddings
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### 3c. Embedding Providers (Multi-Provider Architecture)
|
|
230
|
+
|
|
231
|
+
```javascript
|
|
232
|
+
// rust-kgdb supports multiple embedding providers:
|
|
233
|
+
//
|
|
234
|
+
// Built-in Providers:
|
|
235
|
+
// - 'openai' → text-embedding-3-small (1536 or 384 dim)
|
|
236
|
+
// - 'voyage' → voyage-2, voyage-lite-02-instruct
|
|
237
|
+
// - 'cohere' → embed-v3
|
|
238
|
+
// - 'anthropic' → Via Voyage partnership
|
|
239
|
+
// - 'mistral' → mistral-embed
|
|
240
|
+
// - 'jina' → jina-embeddings-v2
|
|
241
|
+
// - 'ollama' → Local models (llama, mistral, etc.)
|
|
242
|
+
// - 'hf-tei' → HuggingFace Text Embedding Inference
|
|
243
|
+
//
|
|
244
|
+
// Provider Configuration (Rust-side):
|
|
245
|
+
|
|
246
|
+
const providerConfig = {
|
|
247
|
+
providers: {
|
|
248
|
+
openai: {
|
|
249
|
+
api_key: process.env.OPENAI_API_KEY,
|
|
250
|
+
model: 'text-embedding-3-small',
|
|
251
|
+
dimensions: 384
|
|
252
|
+
},
|
|
253
|
+
voyage: {
|
|
254
|
+
api_key: process.env.VOYAGE_API_KEY,
|
|
255
|
+
model: 'voyage-2',
|
|
256
|
+
dimensions: 1024
|
|
257
|
+
},
|
|
258
|
+
cohere: {
|
|
259
|
+
api_key: process.env.COHERE_API_KEY,
|
|
260
|
+
model: 'embed-english-v3.0',
|
|
261
|
+
dimensions: 384
|
|
262
|
+
},
|
|
263
|
+
ollama: {
|
|
264
|
+
base_url: 'http://localhost:11434',
|
|
265
|
+
model: 'nomic-embed-text',
|
|
266
|
+
dimensions: 768
|
|
267
|
+
}
|
|
268
|
+
},
|
|
269
|
+
default_provider: 'openai'
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Why Multi-Provider?
|
|
273
|
+
// Google Research (arxiv.org/abs/2508.21038) shows single embeddings hit
|
|
274
|
+
// a "recall ceiling" - different providers capture different semantic aspects:
|
|
275
|
+
// - OpenAI: General semantic understanding
|
|
276
|
+
// - Voyage: Domain-specific (legal, financial, code)
|
|
277
|
+
// - Cohere: Multilingual support
|
|
278
|
+
// - Ollama: Privacy-preserving local inference
|
|
279
|
+
|
|
280
|
+
// Aggregation Strategies for composite search:
|
|
281
|
+
// - 'rrf' → Reciprocal Rank Fusion (recommended)
|
|
282
|
+
// - 'max' → Maximum score across providers
|
|
283
|
+
// - 'avg' → Weighted average
|
|
284
|
+
// - 'voting' → Consensus (entity must appear in N providers)
|
|
285
|
+
```
|
|
286
|
+
|
|
195
287
|
### 4. DatalogProgram (Rule-Based Reasoning)
|
|
196
288
|
|
|
197
289
|
```javascript
|
package/package.json
CHANGED