rust-kgdb 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +92 -0
  2. package/package.json +1 -1
package/README.md CHANGED
@@ -16,6 +16,8 @@
16
16
  | **GraphDB** | Core RDF/SPARQL database with 100% W3C compliance |
17
17
  | **GraphFrames** | Spark-compatible graph analytics (PageRank, triangles, motifs) |
18
18
  | **EmbeddingService** | Vector similarity search, text search, multi-provider embeddings |
19
+ | **Embedding Triggers** | Automatic embedding generation on INSERT/UPDATE/DELETE |
20
+ | **Embedding Providers** | OpenAI, Voyage, Cohere, Anthropic, Mistral, Jina, Ollama, HF-TEI |
19
21
  | **DatalogProgram** | Rule-based reasoning with transitive closure |
20
22
  | **Pregel** | Bulk Synchronous Parallel graph processing |
21
23
  | **Hypergraph** | Native hyperedge support beyond RDF triples |
@@ -178,6 +180,11 @@ console.log('Composite embedding:', composite ? 'stored' : 'not found')
178
180
  // Count composite embeddings
179
181
  console.log('Total composites:', service.countComposites())
180
182
 
183
+ // === Composite Similarity Search (RRF Aggregation) ===
184
+ // Find similar using Reciprocal Rank Fusion across multiple providers
185
+ const compositeSimilar = JSON.parse(service.findSimilarComposite('product_123', 10, 0.5, 'rrf'))
186
+ console.log('Similar (composite RRF):', compositeSimilar)
187
+
181
188
  // === Use Case: Semantic Product Search ===
182
189
  // Store product embeddings
183
190
  const products = ['laptop', 'phone', 'tablet', 'keyboard', 'mouse']
@@ -192,6 +199,91 @@ const relatedToLaptop = JSON.parse(service.findSimilar('laptop', 5, 0.0))
192
199
  console.log('Products similar to laptop:', relatedToLaptop)
193
200
  ```
194
201
 
202
+ ### 3b. Embedding Triggers (Automatic Embedding Generation)
203
+
204
+ ```javascript
205
+ // Triggers automatically generate embeddings when data changes
206
+ // Configure triggers to fire on INSERT/UPDATE/DELETE events
207
+
208
+ // Example: Auto-embed new entities on insert
209
+ const triggerConfig = {
210
+ name: 'auto_embed_on_insert',
211
+ event: 'AfterInsert',
212
+ action: {
213
+ type: 'GenerateEmbedding',
214
+ source: 'Subject', // Embed the subject of the triple
215
+ provider: 'openai' // Use OpenAI provider
216
+ }
217
+ }
218
+
219
+ // Multiple triggers for different providers
220
+ const triggers = [
221
+ { name: 'embed_openai', provider: 'openai' },
222
+ { name: 'embed_voyage', provider: 'voyage' },
223
+ { name: 'embed_cohere', provider: 'cohere' }
224
+ ]
225
+
226
+ // Each trigger fires independently, creating composite embeddings
227
+ ```
228
+
229
+ ### 3c. Embedding Providers (Multi-Provider Architecture)
230
+
231
+ ```javascript
232
+ // rust-kgdb supports multiple embedding providers:
233
+ //
234
+ // Built-in Providers:
235
+ // - 'openai' → text-embedding-3-small (1536 or 384 dim)
236
+ // - 'voyage' → voyage-2, voyage-lite-02-instruct
237
+ // - 'cohere' → embed-v3
238
+ // - 'anthropic' → Via Voyage partnership
239
+ // - 'mistral' → mistral-embed
240
+ // - 'jina' → jina-embeddings-v2
241
+ // - 'ollama' → Local models (llama, mistral, etc.)
242
+ // - 'hf-tei' → HuggingFace Text Embedding Inference
243
+ //
244
+ // Provider Configuration (Rust-side):
245
+
246
+ const providerConfig = {
247
+ providers: {
248
+ openai: {
249
+ api_key: process.env.OPENAI_API_KEY,
250
+ model: 'text-embedding-3-small',
251
+ dimensions: 384
252
+ },
253
+ voyage: {
254
+ api_key: process.env.VOYAGE_API_KEY,
255
+ model: 'voyage-2',
256
+ dimensions: 1024
257
+ },
258
+ cohere: {
259
+ api_key: process.env.COHERE_API_KEY,
260
+ model: 'embed-english-v3.0',
261
+ dimensions: 384
262
+ },
263
+ ollama: {
264
+ base_url: 'http://localhost:11434',
265
+ model: 'nomic-embed-text',
266
+ dimensions: 768
267
+ }
268
+ },
269
+ default_provider: 'openai'
270
+ }
271
+
272
+ // Why Multi-Provider?
273
+ // Google Research (arxiv.org/abs/2508.21038) shows single embeddings hit
274
+ // a "recall ceiling" - different providers capture different semantic aspects:
275
+ // - OpenAI: General semantic understanding
276
+ // - Voyage: Domain-specific (legal, financial, code)
277
+ // - Cohere: Multilingual support
278
+ // - Ollama: Privacy-preserving local inference
279
+
280
+ // Aggregation Strategies for composite search:
281
+ // - 'rrf' → Reciprocal Rank Fusion (recommended)
282
+ // - 'max' → Maximum score across providers
283
+ // - 'avg' → Weighted average
284
+ // - 'voting' → Consensus (entity must appear in N providers)
285
+ ```
286
+
195
287
  ### 4. DatalogProgram (Rule-Based Reasoning)
196
288
 
197
289
  ```javascript
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rust-kgdb",
3
- "version": "0.3.1",
3
+ "version": "0.3.2",
4
4
  "description": "High-performance RDF/SPARQL database with GraphFrames analytics, vector embeddings, Datalog reasoning, and Pregel BSP processing",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",