rust-kgdb 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +209 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -14,8 +14,11 @@
|
|
|
14
14
|
| Feature | Description |
|
|
15
15
|
|---------|-------------|
|
|
16
16
|
| **GraphDB** | Core RDF/SPARQL database with 100% W3C compliance |
|
|
17
|
-
| **GraphFrames** | Spark-compatible graph analytics (PageRank, triangles,
|
|
17
|
+
| **GraphFrames** | Spark-compatible graph analytics (PageRank, triangles, components) |
|
|
18
|
+
| **Motif Finding** | Graph pattern DSL for structural queries (fraud rings, recommendations) |
|
|
18
19
|
| **EmbeddingService** | Vector similarity search, text search, multi-provider embeddings |
|
|
20
|
+
| **Embedding Triggers** | Automatic embedding generation on INSERT/UPDATE/DELETE |
|
|
21
|
+
| **Embedding Providers** | OpenAI, Voyage, Cohere, Anthropic, Mistral, Jina, Ollama, HF-TEI |
|
|
19
22
|
| **DatalogProgram** | Rule-based reasoning with transitive closure |
|
|
20
23
|
| **Pregel** | Bulk Synchronous Parallel graph processing |
|
|
21
24
|
| **Hypergraph** | Native hyperedge support beyond RDF triples |
|
|
@@ -138,6 +141,121 @@ console.log('Star graph:', star.vertexCount(), 'vertices,', star.edgeCount(), 'e
|
|
|
138
141
|
console.log('Cycle graph:', cycle.vertexCount(), 'vertices,', cycle.edgeCount(), 'edges')
|
|
139
142
|
```
|
|
140
143
|
|
|
144
|
+
### 2b. Motif Pattern Matching (Graph Pattern DSL)
|
|
145
|
+
|
|
146
|
+
Motifs are recurring structural patterns in graphs. rust-kgdb supports a powerful DSL for finding motifs:
|
|
147
|
+
|
|
148
|
+
```javascript
|
|
149
|
+
const { GraphFrame, completeGraph, chainGraph, cycleGraph, friendsGraph } = require('rust-kgdb')
|
|
150
|
+
|
|
151
|
+
// === Basic Motif Syntax ===
|
|
152
|
+
// (a)-[]->(b) Single edge from a to b
|
|
153
|
+
// (a)-[e]->(b) Named edge 'e' from a to b
|
|
154
|
+
// (a)-[]->(b); (b)-[]->(c) Two-hop path (chain pattern)
|
|
155
|
+
// !(a)-[]->(b) Negation (edge does NOT exist)
|
|
156
|
+
|
|
157
|
+
// === Find Single Edges ===
|
|
158
|
+
const chain = chainGraph(5) // v0 -> v1 -> v2 -> v3 -> v4
|
|
159
|
+
const edges = JSON.parse(chain.find("(a)-[]->(b)"))
|
|
160
|
+
console.log('All edges:', edges.length) // 4
|
|
161
|
+
|
|
162
|
+
// === Two-Hop Paths (Friend-of-Friend Pattern) ===
|
|
163
|
+
const twoHop = JSON.parse(chain.find("(a)-[]->(b); (b)-[]->(c)"))
|
|
164
|
+
console.log('Two-hop paths:', twoHop.length) // 3
|
|
165
|
+
// v0->v1->v2, v1->v2->v3, v2->v3->v4
|
|
166
|
+
|
|
167
|
+
// === Three-Hop Paths ===
|
|
168
|
+
const threeHop = JSON.parse(chain.find("(a)-[]->(b); (b)-[]->(c); (c)-[]->(d)"))
|
|
169
|
+
console.log('Three-hop paths:', threeHop.length) // 2
|
|
170
|
+
|
|
171
|
+
// === Triangle Pattern (Cycle of Length 3) ===
|
|
172
|
+
const k4 = completeGraph(4) // K4 has triangles
|
|
173
|
+
const triangles = JSON.parse(k4.find("(a)-[]->(b); (b)-[]->(c); (c)-[]->(a)"))
|
|
174
|
+
// Filter to avoid counting same triangle multiple times
|
|
175
|
+
const uniqueTriangles = triangles.filter(t => t.a < t.b && t.b < t.c)
|
|
176
|
+
console.log('Triangles in K4:', uniqueTriangles.length) // 4
|
|
177
|
+
|
|
178
|
+
// === Star Pattern (Hub with Multiple Spokes) ===
|
|
179
|
+
const social = new GraphFrame(
|
|
180
|
+
JSON.stringify([
|
|
181
|
+
{id: "influencer"},
|
|
182
|
+
{id: "follower1"}, {id: "follower2"}, {id: "follower3"}
|
|
183
|
+
]),
|
|
184
|
+
JSON.stringify([
|
|
185
|
+
{src: "influencer", dst: "follower1"},
|
|
186
|
+
{src: "influencer", dst: "follower2"},
|
|
187
|
+
{src: "influencer", dst: "follower3"}
|
|
188
|
+
])
|
|
189
|
+
)
|
|
190
|
+
// Find hub pattern: someone with 2+ outgoing edges
|
|
191
|
+
const hubPattern = JSON.parse(social.find("(hub)-[]->(f1); (hub)-[]->(f2)"))
|
|
192
|
+
console.log('Hub patterns (2+ followers):', hubPattern.length)
|
|
193
|
+
|
|
194
|
+
// === Reciprocal Relationship (Mutual Friends) ===
|
|
195
|
+
const mutual = new GraphFrame(
|
|
196
|
+
JSON.stringify([{id: "alice"}, {id: "bob"}, {id: "carol"}]),
|
|
197
|
+
JSON.stringify([
|
|
198
|
+
{src: "alice", dst: "bob"},
|
|
199
|
+
{src: "bob", dst: "alice"}, // Reciprocal
|
|
200
|
+
{src: "bob", dst: "carol"} // One-way
|
|
201
|
+
])
|
|
202
|
+
)
|
|
203
|
+
const reciprocal = JSON.parse(mutual.find("(a)-[]->(b); (b)-[]->(a)"))
|
|
204
|
+
console.log('Mutual relationships:', reciprocal.length) // 2 (alice<->bob counted twice)
|
|
205
|
+
|
|
206
|
+
// === Diamond Pattern (Common in Fraud Detection) ===
|
|
207
|
+
// A -> B, A -> C, B -> D, C -> D (convergence point D)
|
|
208
|
+
const diamond = new GraphFrame(
|
|
209
|
+
JSON.stringify([{id: "A"}, {id: "B"}, {id: "C"}, {id: "D"}]),
|
|
210
|
+
JSON.stringify([
|
|
211
|
+
{src: "A", dst: "B"},
|
|
212
|
+
{src: "A", dst: "C"},
|
|
213
|
+
{src: "B", dst: "D"},
|
|
214
|
+
{src: "C", dst: "D"}
|
|
215
|
+
])
|
|
216
|
+
)
|
|
217
|
+
const diamondPattern = JSON.parse(diamond.find(
|
|
218
|
+
"(a)-[]->(b); (a)-[]->(c); (b)-[]->(d); (c)-[]->(d)"
|
|
219
|
+
))
|
|
220
|
+
console.log('Diamond patterns:', diamondPattern.length) // 1
|
|
221
|
+
|
|
222
|
+
// === Use Case: Fraud Ring Detection ===
|
|
223
|
+
// Find circular money transfers: A -> B -> C -> A
|
|
224
|
+
const transactions = new GraphFrame(
|
|
225
|
+
JSON.stringify([
|
|
226
|
+
{id: "acc001"}, {id: "acc002"}, {id: "acc003"}, {id: "acc004"}
|
|
227
|
+
]),
|
|
228
|
+
JSON.stringify([
|
|
229
|
+
{src: "acc001", dst: "acc002", amount: 10000},
|
|
230
|
+
{src: "acc002", dst: "acc003", amount: 9900},
|
|
231
|
+
{src: "acc003", dst: "acc001", amount: 9800}, // Suspicious cycle!
|
|
232
|
+
{src: "acc003", dst: "acc004", amount: 5000} // Normal transfer
|
|
233
|
+
])
|
|
234
|
+
)
|
|
235
|
+
const cycles = JSON.parse(transactions.find(
|
|
236
|
+
"(a)-[]->(b); (b)-[]->(c); (c)-[]->(a)"
|
|
237
|
+
))
|
|
238
|
+
console.log('Circular transfer patterns:', cycles.length) // Found fraud ring!
|
|
239
|
+
|
|
240
|
+
// === Use Case: Recommendation (Friends-of-Friends not yet connected) ===
|
|
241
|
+
const network = friendsGraph()
|
|
242
|
+
const fofPattern = JSON.parse(network.find("(a)-[]->(b); (b)-[]->(c)"))
|
|
243
|
+
// Filter: a != c and no direct edge a->c (potential recommendation)
|
|
244
|
+
console.log('Friend-of-friend patterns for recommendations:', fofPattern.length)
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
### Motif Pattern Reference
|
|
248
|
+
|
|
249
|
+
| Pattern | DSL Syntax | Description |
|
|
250
|
+
|---------|------------|-------------|
|
|
251
|
+
| **Edge** | `(a)-[]->(b)` | Single directed edge |
|
|
252
|
+
| **Named Edge** | `(a)-[e]->(b)` | Edge with binding name |
|
|
253
|
+
| **Two-hop** | `(a)-[]->(b); (b)-[]->(c)` | Path of length 2 |
|
|
254
|
+
| **Triangle** | `(a)-[]->(b); (b)-[]->(c); (c)-[]->(a)` | 3-cycle |
|
|
255
|
+
| **Star** | `(h)-[]->(a); (h)-[]->(b); (h)-[]->(c)` | Hub pattern |
|
|
256
|
+
| **Diamond** | `(a)-[]->(b); (a)-[]->(c); (b)-[]->(d); (c)-[]->(d)` | Convergence |
|
|
257
|
+
| **Negation** | `!(a)-[]->(b)` | Edge must NOT exist |
|
|
258
|
+
|
|
141
259
|
### 3. EmbeddingService (Vector Similarity & Text Search)
|
|
142
260
|
|
|
143
261
|
```javascript
|
|
@@ -178,6 +296,11 @@ console.log('Composite embedding:', composite ? 'stored' : 'not found')
|
|
|
178
296
|
// Count composite embeddings
|
|
179
297
|
console.log('Total composites:', service.countComposites())
|
|
180
298
|
|
|
299
|
+
// === Composite Similarity Search (RRF Aggregation) ===
|
|
300
|
+
// Find similar using Reciprocal Rank Fusion across multiple providers
|
|
301
|
+
const compositeSimilar = JSON.parse(service.findSimilarComposite('product_123', 10, 0.5, 'rrf'))
|
|
302
|
+
console.log('Similar (composite RRF):', compositeSimilar)
|
|
303
|
+
|
|
181
304
|
// === Use Case: Semantic Product Search ===
|
|
182
305
|
// Store product embeddings
|
|
183
306
|
const products = ['laptop', 'phone', 'tablet', 'keyboard', 'mouse']
|
|
@@ -192,6 +315,91 @@ const relatedToLaptop = JSON.parse(service.findSimilar('laptop', 5, 0.0))
|
|
|
192
315
|
console.log('Products similar to laptop:', relatedToLaptop)
|
|
193
316
|
```
|
|
194
317
|
|
|
318
|
+
### 3b. Embedding Triggers (Automatic Embedding Generation)
|
|
319
|
+
|
|
320
|
+
```javascript
|
|
321
|
+
// Triggers automatically generate embeddings when data changes
|
|
322
|
+
// Configure triggers to fire on INSERT/UPDATE/DELETE events
|
|
323
|
+
|
|
324
|
+
// Example: Auto-embed new entities on insert
|
|
325
|
+
const triggerConfig = {
|
|
326
|
+
name: 'auto_embed_on_insert',
|
|
327
|
+
event: 'AfterInsert',
|
|
328
|
+
action: {
|
|
329
|
+
type: 'GenerateEmbedding',
|
|
330
|
+
source: 'Subject', // Embed the subject of the triple
|
|
331
|
+
provider: 'openai' // Use OpenAI provider
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Multiple triggers for different providers
|
|
336
|
+
const triggers = [
|
|
337
|
+
{ name: 'embed_openai', provider: 'openai' },
|
|
338
|
+
{ name: 'embed_voyage', provider: 'voyage' },
|
|
339
|
+
{ name: 'embed_cohere', provider: 'cohere' }
|
|
340
|
+
]
|
|
341
|
+
|
|
342
|
+
// Each trigger fires independently, creating composite embeddings
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
### 3c. Embedding Providers (Multi-Provider Architecture)
|
|
346
|
+
|
|
347
|
+
```javascript
|
|
348
|
+
// rust-kgdb supports multiple embedding providers:
|
|
349
|
+
//
|
|
350
|
+
// Built-in Providers:
|
|
351
|
+
// - 'openai' → text-embedding-3-small (1536 or 384 dim)
|
|
352
|
+
// - 'voyage' → voyage-2, voyage-lite-02-instruct
|
|
353
|
+
// - 'cohere' → embed-v3
|
|
354
|
+
// - 'anthropic' → Via Voyage partnership
|
|
355
|
+
// - 'mistral' → mistral-embed
|
|
356
|
+
// - 'jina' → jina-embeddings-v2
|
|
357
|
+
// - 'ollama' → Local models (llama, mistral, etc.)
|
|
358
|
+
// - 'hf-tei' → HuggingFace Text Embedding Inference
|
|
359
|
+
//
|
|
360
|
+
// Provider Configuration (Rust-side):
|
|
361
|
+
|
|
362
|
+
const providerConfig = {
|
|
363
|
+
providers: {
|
|
364
|
+
openai: {
|
|
365
|
+
api_key: process.env.OPENAI_API_KEY,
|
|
366
|
+
model: 'text-embedding-3-small',
|
|
367
|
+
dimensions: 384
|
|
368
|
+
},
|
|
369
|
+
voyage: {
|
|
370
|
+
api_key: process.env.VOYAGE_API_KEY,
|
|
371
|
+
model: 'voyage-2',
|
|
372
|
+
dimensions: 1024
|
|
373
|
+
},
|
|
374
|
+
cohere: {
|
|
375
|
+
api_key: process.env.COHERE_API_KEY,
|
|
376
|
+
model: 'embed-english-v3.0',
|
|
377
|
+
dimensions: 384
|
|
378
|
+
},
|
|
379
|
+
ollama: {
|
|
380
|
+
base_url: 'http://localhost:11434',
|
|
381
|
+
model: 'nomic-embed-text',
|
|
382
|
+
dimensions: 768
|
|
383
|
+
}
|
|
384
|
+
},
|
|
385
|
+
default_provider: 'openai'
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// Why Multi-Provider?
|
|
389
|
+
// Google Research (arxiv.org/abs/2508.21038) shows single embeddings hit
|
|
390
|
+
// a "recall ceiling" - different providers capture different semantic aspects:
|
|
391
|
+
// - OpenAI: General semantic understanding
|
|
392
|
+
// - Voyage: Domain-specific (legal, financial, code)
|
|
393
|
+
// - Cohere: Multilingual support
|
|
394
|
+
// - Ollama: Privacy-preserving local inference
|
|
395
|
+
|
|
396
|
+
// Aggregation Strategies for composite search:
|
|
397
|
+
// - 'rrf' → Reciprocal Rank Fusion (recommended)
|
|
398
|
+
// - 'max' → Maximum score across providers
|
|
399
|
+
// - 'avg' → Weighted average
|
|
400
|
+
// - 'voting' → Consensus (entity must appear in N providers)
|
|
401
|
+
```
|
|
402
|
+
|
|
195
403
|
### 4. DatalogProgram (Rule-Based Reasoning)
|
|
196
404
|
|
|
197
405
|
```javascript
|
package/package.json
CHANGED