rust-kgdb 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +209 -1
  2. package/package.json +1 -1
package/README.md CHANGED
@@ -14,8 +14,11 @@
14
14
  | Feature | Description |
15
15
  |---------|-------------|
16
16
  | **GraphDB** | Core RDF/SPARQL database with 100% W3C compliance |
17
- | **GraphFrames** | Spark-compatible graph analytics (PageRank, triangles, motifs) |
17
+ | **GraphFrames** | Spark-compatible graph analytics (PageRank, triangles, components) |
18
+ | **Motif Finding** | Graph pattern DSL for structural queries (fraud rings, recommendations) |
18
19
  | **EmbeddingService** | Vector similarity search, text search, multi-provider embeddings |
20
+ | **Embedding Triggers** | Automatic embedding generation on INSERT/UPDATE/DELETE |
21
+ | **Embedding Providers** | OpenAI, Voyage, Cohere, Anthropic, Mistral, Jina, Ollama, HF-TEI |
19
22
  | **DatalogProgram** | Rule-based reasoning with transitive closure |
20
23
  | **Pregel** | Bulk Synchronous Parallel graph processing |
21
24
  | **Hypergraph** | Native hyperedge support beyond RDF triples |
@@ -138,6 +141,121 @@ console.log('Star graph:', star.vertexCount(), 'vertices,', star.edgeCount(), 'e
138
141
  console.log('Cycle graph:', cycle.vertexCount(), 'vertices,', cycle.edgeCount(), 'edges')
139
142
  ```
140
143
 
144
+ ### 2b. Motif Pattern Matching (Graph Pattern DSL)
145
+
146
+ Motifs are recurring structural patterns in graphs. rust-kgdb supports a powerful DSL for finding motifs:
147
+
148
+ ```javascript
149
+ const { GraphFrame, completeGraph, chainGraph, cycleGraph, friendsGraph } = require('rust-kgdb')
150
+
151
+ // === Basic Motif Syntax ===
152
+ // (a)-[]->(b) Single edge from a to b
153
+ // (a)-[e]->(b) Named edge 'e' from a to b
154
+ // (a)-[]->(b); (b)-[]->(c) Two-hop path (chain pattern)
155
+ // !(a)-[]->(b) Negation (edge does NOT exist)
156
+
157
+ // === Find Single Edges ===
158
+ const chain = chainGraph(5) // v0 -> v1 -> v2 -> v3 -> v4
159
+ const edges = JSON.parse(chain.find("(a)-[]->(b)"))
160
+ console.log('All edges:', edges.length) // 4
161
+
162
+ // === Two-Hop Paths (Friend-of-Friend Pattern) ===
163
+ const twoHop = JSON.parse(chain.find("(a)-[]->(b); (b)-[]->(c)"))
164
+ console.log('Two-hop paths:', twoHop.length) // 3
165
+ // v0->v1->v2, v1->v2->v3, v2->v3->v4
166
+
167
+ // === Three-Hop Paths ===
168
+ const threeHop = JSON.parse(chain.find("(a)-[]->(b); (b)-[]->(c); (c)-[]->(d)"))
169
+ console.log('Three-hop paths:', threeHop.length) // 2
170
+
171
+ // === Triangle Pattern (Cycle of Length 3) ===
172
+ const k4 = completeGraph(4) // K4 has triangles
173
+ const triangles = JSON.parse(k4.find("(a)-[]->(b); (b)-[]->(c); (c)-[]->(a)"))
174
+ // Filter to avoid counting same triangle multiple times
175
+ const uniqueTriangles = triangles.filter(t => t.a < t.b && t.b < t.c)
176
+ console.log('Triangles in K4:', uniqueTriangles.length) // 4
177
+
178
+ // === Star Pattern (Hub with Multiple Spokes) ===
179
+ const social = new GraphFrame(
180
+ JSON.stringify([
181
+ {id: "influencer"},
182
+ {id: "follower1"}, {id: "follower2"}, {id: "follower3"}
183
+ ]),
184
+ JSON.stringify([
185
+ {src: "influencer", dst: "follower1"},
186
+ {src: "influencer", dst: "follower2"},
187
+ {src: "influencer", dst: "follower3"}
188
+ ])
189
+ )
190
+ // Find hub pattern: someone with 2+ outgoing edges
191
+ const hubPattern = JSON.parse(social.find("(hub)-[]->(f1); (hub)-[]->(f2)"))
192
+ console.log('Hub patterns (2+ followers):', hubPattern.length)
193
+
194
+ // === Reciprocal Relationship (Mutual Friends) ===
195
+ const mutual = new GraphFrame(
196
+ JSON.stringify([{id: "alice"}, {id: "bob"}, {id: "carol"}]),
197
+ JSON.stringify([
198
+ {src: "alice", dst: "bob"},
199
+ {src: "bob", dst: "alice"}, // Reciprocal
200
+ {src: "bob", dst: "carol"} // One-way
201
+ ])
202
+ )
203
+ const reciprocal = JSON.parse(mutual.find("(a)-[]->(b); (b)-[]->(a)"))
204
+ console.log('Mutual relationships:', reciprocal.length) // 2 (alice<->bob counted twice)
205
+
206
+ // === Diamond Pattern (Common in Fraud Detection) ===
207
+ // A -> B, A -> C, B -> D, C -> D (convergence point D)
208
+ const diamond = new GraphFrame(
209
+ JSON.stringify([{id: "A"}, {id: "B"}, {id: "C"}, {id: "D"}]),
210
+ JSON.stringify([
211
+ {src: "A", dst: "B"},
212
+ {src: "A", dst: "C"},
213
+ {src: "B", dst: "D"},
214
+ {src: "C", dst: "D"}
215
+ ])
216
+ )
217
+ const diamondPattern = JSON.parse(diamond.find(
218
+ "(a)-[]->(b); (a)-[]->(c); (b)-[]->(d); (c)-[]->(d)"
219
+ ))
220
+ console.log('Diamond patterns:', diamondPattern.length) // 1
221
+
222
+ // === Use Case: Fraud Ring Detection ===
223
+ // Find circular money transfers: A -> B -> C -> A
224
+ const transactions = new GraphFrame(
225
+ JSON.stringify([
226
+ {id: "acc001"}, {id: "acc002"}, {id: "acc003"}, {id: "acc004"}
227
+ ]),
228
+ JSON.stringify([
229
+ {src: "acc001", dst: "acc002", amount: 10000},
230
+ {src: "acc002", dst: "acc003", amount: 9900},
231
+ {src: "acc003", dst: "acc001", amount: 9800}, // Suspicious cycle!
232
+ {src: "acc003", dst: "acc004", amount: 5000} // Normal transfer
233
+ ])
234
+ )
235
+ const cycles = JSON.parse(transactions.find(
236
+ "(a)-[]->(b); (b)-[]->(c); (c)-[]->(a)"
237
+ ))
238
+ console.log('Circular transfer patterns:', cycles.length) // Found fraud ring!
239
+
240
+ // === Use Case: Recommendation (Friends-of-Friends not yet connected) ===
241
+ const network = friendsGraph()
242
+ const fofPattern = JSON.parse(network.find("(a)-[]->(b); (b)-[]->(c)"))
243
+ // Filter: a != c and no direct edge a->c (potential recommendation)
244
+ console.log('Friend-of-friend patterns for recommendations:', fofPattern.length)
245
+ ```
246
+
247
+ ### Motif Pattern Reference
248
+
249
+ | Pattern | DSL Syntax | Description |
250
+ |---------|------------|-------------|
251
+ | **Edge** | `(a)-[]->(b)` | Single directed edge |
252
+ | **Named Edge** | `(a)-[e]->(b)` | Edge with binding name |
253
+ | **Two-hop** | `(a)-[]->(b); (b)-[]->(c)` | Path of length 2 |
254
+ | **Triangle** | `(a)-[]->(b); (b)-[]->(c); (c)-[]->(a)` | 3-cycle |
255
+ | **Star** | `(h)-[]->(a); (h)-[]->(b); (h)-[]->(c)` | Hub pattern |
256
+ | **Diamond** | `(a)-[]->(b); (a)-[]->(c); (b)-[]->(d); (c)-[]->(d)` | Convergence |
257
+ | **Negation** | `!(a)-[]->(b)` | Edge must NOT exist |
258
+
141
259
  ### 3. EmbeddingService (Vector Similarity & Text Search)
142
260
 
143
261
  ```javascript
@@ -178,6 +296,11 @@ console.log('Composite embedding:', composite ? 'stored' : 'not found')
178
296
  // Count composite embeddings
179
297
  console.log('Total composites:', service.countComposites())
180
298
 
299
+ // === Composite Similarity Search (RRF Aggregation) ===
300
+ // Find similar using Reciprocal Rank Fusion across multiple providers
301
+ const compositeSimilar = JSON.parse(service.findSimilarComposite('product_123', 10, 0.5, 'rrf'))
302
+ console.log('Similar (composite RRF):', compositeSimilar)
303
+
181
304
  // === Use Case: Semantic Product Search ===
182
305
  // Store product embeddings
183
306
  const products = ['laptop', 'phone', 'tablet', 'keyboard', 'mouse']
@@ -192,6 +315,91 @@ const relatedToLaptop = JSON.parse(service.findSimilar('laptop', 5, 0.0))
192
315
  console.log('Products similar to laptop:', relatedToLaptop)
193
316
  ```
194
317
 
318
+ ### 3b. Embedding Triggers (Automatic Embedding Generation)
319
+
320
+ ```javascript
321
+ // Triggers automatically generate embeddings when data changes
322
+ // Configure triggers to fire on INSERT/UPDATE/DELETE events
323
+
324
+ // Example: Auto-embed new entities on insert
325
+ const triggerConfig = {
326
+ name: 'auto_embed_on_insert',
327
+ event: 'AfterInsert',
328
+ action: {
329
+ type: 'GenerateEmbedding',
330
+ source: 'Subject', // Embed the subject of the triple
331
+ provider: 'openai' // Use OpenAI provider
332
+ }
333
+ }
334
+
335
+ // Multiple triggers for different providers
336
+ const triggers = [
337
+ { name: 'embed_openai', provider: 'openai' },
338
+ { name: 'embed_voyage', provider: 'voyage' },
339
+ { name: 'embed_cohere', provider: 'cohere' }
340
+ ]
341
+
342
+ // Each trigger fires independently, creating composite embeddings
343
+ ```
344
+
345
+ ### 3c. Embedding Providers (Multi-Provider Architecture)
346
+
347
+ ```javascript
348
+ // rust-kgdb supports multiple embedding providers:
349
+ //
350
+ // Built-in Providers:
351
+ // - 'openai' → text-embedding-3-small (1536 or 384 dim)
352
+ // - 'voyage' → voyage-2, voyage-lite-02-instruct
353
+ // - 'cohere' → embed-v3
354
+ // - 'anthropic' → Via Voyage partnership
355
+ // - 'mistral' → mistral-embed
356
+ // - 'jina' → jina-embeddings-v2
357
+ // - 'ollama' → Local models (llama, mistral, etc.)
358
+ // - 'hf-tei' → HuggingFace Text Embedding Inference
359
+ //
360
+ // Provider Configuration (Rust-side):
361
+
362
+ const providerConfig = {
363
+ providers: {
364
+ openai: {
365
+ api_key: process.env.OPENAI_API_KEY,
366
+ model: 'text-embedding-3-small',
367
+ dimensions: 384
368
+ },
369
+ voyage: {
370
+ api_key: process.env.VOYAGE_API_KEY,
371
+ model: 'voyage-2',
372
+ dimensions: 1024
373
+ },
374
+ cohere: {
375
+ api_key: process.env.COHERE_API_KEY,
376
+ model: 'embed-english-v3.0',
377
+ dimensions: 384
378
+ },
379
+ ollama: {
380
+ base_url: 'http://localhost:11434',
381
+ model: 'nomic-embed-text',
382
+ dimensions: 768
383
+ }
384
+ },
385
+ default_provider: 'openai'
386
+ }
387
+
388
+ // Why Multi-Provider?
389
+ // Google Research (arxiv.org/abs/2508.21038) shows single embeddings hit
390
+ // a "recall ceiling" - different providers capture different semantic aspects:
391
+ // - OpenAI: General semantic understanding
392
+ // - Voyage: Domain-specific (legal, financial, code)
393
+ // - Cohere: Multilingual support
394
+ // - Ollama: Privacy-preserving local inference
395
+
396
+ // Aggregation Strategies for composite search:
397
+ // - 'rrf' → Reciprocal Rank Fusion (recommended)
398
+ // - 'max' → Maximum score across providers
399
+ // - 'avg' → Weighted average
400
+ // - 'voting' → Consensus (entity must appear in N providers)
401
+ ```
402
+
195
403
  ### 4. DatalogProgram (Rule-Based Reasoning)
196
404
 
197
405
  ```javascript
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rust-kgdb",
3
- "version": "0.3.1",
3
+ "version": "0.3.3",
4
4
  "description": "High-performance RDF/SPARQL database with GraphFrames analytics, vector embeddings, Datalog reasoning, and Pregel BSP processing",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",