rust-kgdb 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +203 -0
- package/README.md +101 -23
- package/index.d.ts +347 -0
- package/index.js +39 -1
- package/package.json +13 -13
- package/rust-kgdb-napi.darwin-x64.node +0 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to the rust-kgdb TypeScript SDK will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## [0.3.0] - 2025-12-11
|
|
6
|
+
|
|
7
|
+
### Major New Features
|
|
8
|
+
|
|
9
|
+
This release introduces four major new APIs that transform rust-kgdb from a simple RDF database into a comprehensive graph analytics platform.
|
|
10
|
+
|
|
11
|
+
#### GraphFrames API - Powered by Apache DataFusion
|
|
12
|
+
|
|
13
|
+
Complete graph analytics with DataFrame semantics, inspired by Apache Spark GraphFrames but running entirely in Rust for maximum performance.
|
|
14
|
+
|
|
15
|
+
**New Classes:**
|
|
16
|
+
- `GraphFrame` - High-performance graph analytics with DataFrame semantics
|
|
17
|
+
|
|
18
|
+
**New Factory Functions:**
|
|
19
|
+
- `friendsGraph()` - Create sample social network graph
|
|
20
|
+
- `chainGraph(length)` - Create linear chain graph
|
|
21
|
+
- `starGraph(spokes)` - Create hub-and-spoke topology
|
|
22
|
+
- `completeGraph(n)` - Create fully connected K_n graph
|
|
23
|
+
- `cycleGraph(n)` - Create circular graph
|
|
24
|
+
- `binaryTreeGraph(depth)` - Create binary tree
|
|
25
|
+
- `bipartiteGraph(leftSize, rightSize)` - Create bipartite graph
|
|
26
|
+
|
|
27
|
+
**Graph Algorithms:**
|
|
28
|
+
- `pageRank(resetProb, maxIter)` - PageRank algorithm with configurable damping
|
|
29
|
+
- `connectedComponents()` - Find connected components
|
|
30
|
+
- `shortestPaths(landmarks)` - BFS shortest paths from landmarks
|
|
31
|
+
- `labelPropagation(maxIter)` - Community detection via label propagation
|
|
32
|
+
- `triangleCount()` - Count triangles in graph (WCOJ optimized)
|
|
33
|
+
- `find(pattern)` - Motif finding with GraphFrame DSL pattern syntax
|
|
34
|
+
|
|
35
|
+
**Degree Operations:**
|
|
36
|
+
- `inDegrees()` - Compute in-degree for all vertices
|
|
37
|
+
- `outDegrees()` - Compute out-degree for all vertices
|
|
38
|
+
- `degrees()` - Compute total degree for all vertices
|
|
39
|
+
|
|
40
|
+
```typescript
|
|
41
|
+
// Example: PageRank on a social network
|
|
42
|
+
const graph = new GraphFrame(
|
|
43
|
+
JSON.stringify([{id: "alice"}, {id: "bob"}, {id: "carol"}]),
|
|
44
|
+
JSON.stringify([{src: "alice", dst: "bob"}, {src: "bob", dst: "carol"}])
|
|
45
|
+
)
|
|
46
|
+
const ranks = JSON.parse(graph.pageRank(0.15, 20))
|
|
47
|
+
console.log(ranks) // { ranks: { alice: 0.33, bob: 0.33, carol: 0.33 } }
|
|
48
|
+
|
|
49
|
+
// Example: Find triangles
|
|
50
|
+
const triangles = graph.triangleCount() // Uses WCOJ for optimal performance
|
|
51
|
+
|
|
52
|
+
// Example: Motif pattern matching
|
|
53
|
+
const patterns = JSON.parse(graph.find("(a)-[]->(b); (b)-[]->(c)"))
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
#### EmbeddingService - Multi-Provider Vector Embeddings
|
|
57
|
+
|
|
58
|
+
HNSW-based similarity search with 1-hop ARCADE neighbor cache for graph-aware retrieval.
|
|
59
|
+
|
|
60
|
+
**Core Operations:**
|
|
61
|
+
- `storeVector(entityId, vector)` - Store 384-dim embedding for entity
|
|
62
|
+
- `getVector(entityId)` - Retrieve embedding (returns null if not found)
|
|
63
|
+
- `deleteVector(entityId)` - Remove embedding
|
|
64
|
+
- `rebuildIndex()` - Rebuild HNSW similarity index
|
|
65
|
+
|
|
66
|
+
**Similarity Search:**
|
|
67
|
+
- `findSimilar(entityId, k, threshold)` - Find k most similar entities above threshold
|
|
68
|
+
- `findSimilarGraceful(entityId, k, threshold)` - Same but returns empty array if disabled
|
|
69
|
+
|
|
70
|
+
**1-Hop ARCADE Cache:**
|
|
71
|
+
- `getNeighborsOut(entityId)` - Get outgoing 1-hop neighbors
|
|
72
|
+
- `getNeighborsIn(entityId)` - Get incoming 1-hop neighbors
|
|
73
|
+
- `onTripleInsert(subject, predicate, object, graph)` - Notify on triple insert
|
|
74
|
+
- `onTripleDelete(subject, predicate, object, graph)` - Notify on triple delete
|
|
75
|
+
|
|
76
|
+
**Composite Multi-Provider Embeddings:**
|
|
77
|
+
- `storeComposite(entityId, embeddingsJson)` - Store embeddings from multiple providers
|
|
78
|
+
- `getComposite(entityId)` - Get composite embedding
|
|
79
|
+
- `findSimilarComposite(entityId, k, threshold, strategy)` - Search with aggregation (RRF, voting, max)
|
|
80
|
+
- `listCompositeEntities()` - List all entities with composite embeddings
|
|
81
|
+
- `countComposites()` - Count composite embeddings
|
|
82
|
+
- `registeredProviders()` - List registered provider names
|
|
83
|
+
|
|
84
|
+
**Metrics & Monitoring:**
|
|
85
|
+
- `isEnabled()` - Check if service is enabled
|
|
86
|
+
- `getMetrics()` - Get service metrics as JSON
|
|
87
|
+
- `getCacheStats()` - Get cache statistics as JSON
|
|
88
|
+
|
|
89
|
+
```typescript
|
|
90
|
+
// Example: Store and search embeddings
|
|
91
|
+
const service = new EmbeddingService()
|
|
92
|
+
service.storeVector('entity1', new Array(384).fill(0.1))
|
|
93
|
+
service.storeVector('entity2', new Array(384).fill(0.2))
|
|
94
|
+
const similar = JSON.parse(service.findSimilar('entity1', 10, 0.7))
|
|
95
|
+
|
|
96
|
+
// Example: Multi-provider composite embeddings
|
|
97
|
+
service.storeComposite('entity', JSON.stringify({
|
|
98
|
+
openai: [...], // 384-dim from OpenAI
|
|
99
|
+
voyage: [...], // 384-dim from Voyage
|
|
100
|
+
cohere: [...] // 384-dim from Cohere
|
|
101
|
+
}))
|
|
102
|
+
const results = JSON.parse(service.findSimilarComposite('entity', 10, 0.7, 'rrf'))
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
#### DatalogProgram - Rule-Based Reasoning Engine
|
|
106
|
+
|
|
107
|
+
Logic programming with semi-naive evaluation for efficient fixpoint computation.
|
|
108
|
+
|
|
109
|
+
**Program Construction:**
|
|
110
|
+
- `new DatalogProgram()` - Create new Datalog program
|
|
111
|
+
- `addFact(factJson)` - Add extensional fact (EDB)
|
|
112
|
+
- `addRule(ruleJson)` - Add inference rule (IDB)
|
|
113
|
+
- `factCount()` - Get number of facts
|
|
114
|
+
- `ruleCount()` - Get number of rules
|
|
115
|
+
|
|
116
|
+
**Evaluation Functions:**
|
|
117
|
+
- `evaluateDatalog(program)` - Evaluate program using semi-naive algorithm
|
|
118
|
+
- `queryDatalog(program, predicate)` - Query specific predicate
|
|
119
|
+
|
|
120
|
+
```typescript
|
|
121
|
+
// Example: Transitive closure (ancestor relationship)
|
|
122
|
+
const program = new DatalogProgram()
|
|
123
|
+
|
|
124
|
+
// Add facts
|
|
125
|
+
program.addFact(JSON.stringify({predicate: 'parent', terms: ['alice', 'bob']}))
|
|
126
|
+
program.addFact(JSON.stringify({predicate: 'parent', terms: ['bob', 'charlie']}))
|
|
127
|
+
|
|
128
|
+
// Add rule: grandparent(X, Z) :- parent(X, Y), parent(Y, Z)
|
|
129
|
+
program.addRule(JSON.stringify({
|
|
130
|
+
head: {predicate: 'grandparent', terms: ['?X', '?Z']},
|
|
131
|
+
body: [
|
|
132
|
+
{predicate: 'parent', terms: ['?X', '?Y']},
|
|
133
|
+
{predicate: 'parent', terms: ['?Y', '?Z']}
|
|
134
|
+
]
|
|
135
|
+
}))
|
|
136
|
+
|
|
137
|
+
// Evaluate and query
|
|
138
|
+
const result = evaluateDatalog(program)
|
|
139
|
+
const grandparents = JSON.parse(queryDatalog(program, 'grandparent'))
|
|
140
|
+
// grandparents = [{predicate: 'grandparent', terms: ['alice', 'charlie']}]
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
#### Pregel API - Bulk Synchronous Parallel Processing
|
|
144
|
+
|
|
145
|
+
Iterative graph algorithms using the BSP model.
|
|
146
|
+
|
|
147
|
+
**Functions:**
|
|
148
|
+
- `pregelShortestPaths(graph, landmark, maxSupersteps)` - Compute shortest paths from landmark vertex
|
|
149
|
+
|
|
150
|
+
```typescript
|
|
151
|
+
// Example: Shortest paths in a chain graph
|
|
152
|
+
const graph = chainGraph(10) // v0 -> v1 -> v2 -> ... -> v9
|
|
153
|
+
const result = JSON.parse(pregelShortestPaths(graph, 'v0', 20))
|
|
154
|
+
console.log(result.distances) // { v0: 0, v1: 1, v2: 2, ..., v9: 9 }
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Core GraphDB Improvements
|
|
158
|
+
|
|
159
|
+
- All existing SPARQL/RDF operations continue to work unchanged
|
|
160
|
+
- `loadTtl()` - Load Turtle/N-Triples format RDF data
|
|
161
|
+
- `querySelect()` - Execute SPARQL SELECT queries
|
|
162
|
+
- `query()` - Execute SPARQL CONSTRUCT/ASK/DESCRIBE queries
|
|
163
|
+
- `countTriples()` - Count total triples
|
|
164
|
+
- `clear()` - Clear all data
|
|
165
|
+
- `getGraphUri()` - Get app graph URI
|
|
166
|
+
- `getVersion()` - Get library version
|
|
167
|
+
|
|
168
|
+
### Test Coverage
|
|
169
|
+
|
|
170
|
+
- 42 comprehensive tests covering all new APIs
|
|
171
|
+
- 100% feature coverage for GraphFrames, Embeddings, Datalog, and Pregel
|
|
172
|
+
- Automated test script: `npm test`
|
|
173
|
+
|
|
174
|
+
### Breaking Changes
|
|
175
|
+
|
|
176
|
+
None. All existing APIs remain unchanged and backward compatible.
|
|
177
|
+
|
|
178
|
+
### Dependencies
|
|
179
|
+
|
|
180
|
+
- NAPI-RS 2.18 for native Node.js bindings
|
|
181
|
+
- Rust 1.75+ for compilation
|
|
182
|
+
- Node.js 14+ runtime
|
|
183
|
+
|
|
184
|
+
## [0.2.2] - 2025-12-01
|
|
185
|
+
|
|
186
|
+
### Added
|
|
187
|
+
- Initial GraphFrames implementation (internal)
|
|
188
|
+
- Distributed cluster support
|
|
189
|
+
|
|
190
|
+
## [0.2.0] - 2025-11-25
|
|
191
|
+
|
|
192
|
+
### Added
|
|
193
|
+
- Core GraphDB with RDF/SPARQL support
|
|
194
|
+
- 100% W3C SPARQL 1.1 compliance
|
|
195
|
+
- WCOJ (Worst-Case Optimal Join) execution
|
|
196
|
+
- Multi-platform native binaries (macOS, Linux, Windows)
|
|
197
|
+
|
|
198
|
+
## [0.1.0] - 2025-11-15
|
|
199
|
+
|
|
200
|
+
### Added
|
|
201
|
+
- Initial release
|
|
202
|
+
- Basic RDF triple storage
|
|
203
|
+
- SPARQL query support
|
package/README.md
CHANGED
|
@@ -20,15 +20,32 @@ rust-kgdb supports three deployment modes:
|
|
|
20
20
|
| **Single Node (RocksDB/LMDB)** | Production, persistence needed | Single node, persistent | Via Rust crate |
|
|
21
21
|
| **Distributed Cluster** | Enterprise, 1B+ triples | Horizontal scaling, 9+ partitions | Contact us |
|
|
22
22
|
|
|
23
|
-
###
|
|
23
|
+
### Distributed Cluster Mode (Enterprise)
|
|
24
24
|
|
|
25
|
-
For enterprise deployments requiring:
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
-
|
|
29
|
-
- **OLAP
|
|
30
|
-
-
|
|
31
|
-
- Kubernetes-
|
|
25
|
+
For enterprise deployments requiring 1B+ triples and horizontal scaling:
|
|
26
|
+
|
|
27
|
+
**Key Features:**
|
|
28
|
+
- **Subject-Anchored Partitioning**: All triples for a subject are guaranteed on the same partition for optimal locality
|
|
29
|
+
- **Arrow-Powered OLAP**: High-performance analytical queries executed as optimized SQL at scale
|
|
30
|
+
- **Automatic Query Routing**: The coordinator intelligently routes queries to the right executors
|
|
31
|
+
- **Kubernetes-Native**: StatefulSet-based executors with automatic failover
|
|
32
|
+
- **Linear Horizontal Scaling**: Add more executor pods to scale throughput
|
|
33
|
+
|
|
34
|
+
**How It Works:**
|
|
35
|
+
|
|
36
|
+
Your SPARQL queries work unchanged. For large-scale aggregations, the cluster automatically optimizes execution:
|
|
37
|
+
|
|
38
|
+
```sparql
|
|
39
|
+
-- Your SPARQL query
|
|
40
|
+
SELECT (COUNT(*) AS ?count) (AVG(?salary) AS ?avgSalary)
|
|
41
|
+
WHERE {
|
|
42
|
+
?employee <http://ex/type> <http://ex/Employee> .
|
|
43
|
+
?employee <http://ex/salary> ?salary .
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
-- Cluster executes as optimized SQL internally
|
|
47
|
+
-- Results aggregated across all partitions automatically
|
|
48
|
+
```
|
|
32
49
|
|
|
33
50
|
**Request a demo: gonnect.uk@gmail.com**
|
|
34
51
|
|
|
@@ -282,18 +299,17 @@ fs.writeFileSync('backup.nt', ntriples)
|
|
|
282
299
|
npm install rust-kgdb
|
|
283
300
|
```
|
|
284
301
|
|
|
285
|
-
### Platform Support
|
|
302
|
+
### Platform Support (v0.2.1)
|
|
286
303
|
|
|
287
|
-
| Platform | Architecture | Status |
|
|
288
|
-
|
|
289
|
-
| **macOS** | Intel (x64) | ✅ |
|
|
290
|
-
| **macOS** | Apple Silicon (arm64) |
|
|
291
|
-
| **Linux** | x64 |
|
|
292
|
-
| **Linux** | arm64 |
|
|
293
|
-
| **Windows** | x64 |
|
|
294
|
-
| **Windows** | arm64 | ⏳ v0.2.0 | — |
|
|
304
|
+
| Platform | Architecture | Status | Notes |
|
|
305
|
+
|----------|-------------|--------|-------|
|
|
306
|
+
| **macOS** | Intel (x64) | ✅ **Works out of the box** | Pre-built binary included |
|
|
307
|
+
| **macOS** | Apple Silicon (arm64) | ⏳ v0.2.2 | Coming soon |
|
|
308
|
+
| **Linux** | x64 | ⏳ v0.2.2 | Coming soon |
|
|
309
|
+
| **Linux** | arm64 | ⏳ v0.2.2 | Coming soon |
|
|
310
|
+
| **Windows** | x64 | ⏳ v0.2.2 | Coming soon |
|
|
295
311
|
|
|
296
|
-
**
|
|
312
|
+
**This release (v0.2.1)** includes pre-built binary for **macOS x64 only**. Other platforms will be added in the next release.
|
|
297
313
|
|
|
298
314
|
---
|
|
299
315
|
|
|
@@ -446,15 +462,64 @@ db.querySelect(`
|
|
|
446
462
|
### UPDATE Operations
|
|
447
463
|
|
|
448
464
|
```typescript
|
|
449
|
-
// INSERT DATA
|
|
465
|
+
// INSERT DATA - Add new triples
|
|
450
466
|
db.updateInsert(`
|
|
451
|
-
|
|
467
|
+
PREFIX ex: <http://example.org/>
|
|
468
|
+
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
|
469
|
+
|
|
470
|
+
INSERT DATA {
|
|
471
|
+
ex:david a foaf:Person ;
|
|
472
|
+
foaf:name "David" ;
|
|
473
|
+
foaf:age 28 ;
|
|
474
|
+
foaf:email "david@example.org" .
|
|
475
|
+
|
|
476
|
+
ex:project1 ex:hasLead ex:david ;
|
|
477
|
+
ex:budget 50000 ;
|
|
478
|
+
ex:status "active" .
|
|
479
|
+
}
|
|
452
480
|
`)
|
|
453
481
|
|
|
454
|
-
//
|
|
482
|
+
// Verify insert
|
|
483
|
+
const count = db.count()
|
|
484
|
+
console.log(`Total triples after insert: ${count}`)
|
|
485
|
+
|
|
486
|
+
// DELETE WHERE - Remove matching triples
|
|
455
487
|
db.updateDelete(`
|
|
456
|
-
|
|
488
|
+
PREFIX ex: <http://example.org/>
|
|
489
|
+
DELETE WHERE { ?s ex:status "completed" }
|
|
490
|
+
`)
|
|
491
|
+
```
|
|
492
|
+
|
|
493
|
+
### Bulk Data Loading Example
|
|
494
|
+
|
|
495
|
+
```typescript
|
|
496
|
+
import { GraphDB } from 'rust-kgdb'
|
|
497
|
+
import { readFileSync } from 'fs'
|
|
498
|
+
|
|
499
|
+
const db = new GraphDB('http://example.org/bulk-load')
|
|
500
|
+
|
|
501
|
+
// Load Turtle file
|
|
502
|
+
const ttlData = readFileSync('data/knowledge-graph.ttl', 'utf-8')
|
|
503
|
+
db.loadTtl(ttlData, null) // null = default graph
|
|
504
|
+
|
|
505
|
+
// Load into named graph
|
|
506
|
+
const orgData = readFileSync('data/organization.ttl', 'utf-8')
|
|
507
|
+
db.loadTtl(orgData, 'http://example.org/graphs/org')
|
|
508
|
+
|
|
509
|
+
// Load N-Triples format
|
|
510
|
+
const ntData = readFileSync('data/triples.nt', 'utf-8')
|
|
511
|
+
db.loadNTriples(ntData, null)
|
|
512
|
+
|
|
513
|
+
console.log(`Loaded ${db.count()} triples`)
|
|
514
|
+
|
|
515
|
+
// Query across all graphs
|
|
516
|
+
const results = db.querySelect(`
|
|
517
|
+
SELECT ?g (COUNT(*) AS ?count) WHERE {
|
|
518
|
+
GRAPH ?g { ?s ?p ?o }
|
|
519
|
+
}
|
|
520
|
+
GROUP BY ?g
|
|
457
521
|
`)
|
|
522
|
+
console.log('Triples per graph:', results)
|
|
458
523
|
```
|
|
459
524
|
|
|
460
525
|
---
|
|
@@ -614,11 +679,24 @@ Total: ~120 bytes/triple including indexes
|
|
|
614
679
|
|
|
615
680
|
## Version History
|
|
616
681
|
|
|
682
|
+
### v0.2.2 (2025-12-08) - Enhanced Documentation
|
|
683
|
+
|
|
684
|
+
- Added comprehensive INSERT DATA examples with PREFIX syntax
|
|
685
|
+
- Added bulk data loading example with named graphs
|
|
686
|
+
- Enhanced SPARQL UPDATE section with real-world patterns
|
|
687
|
+
- Improved documentation for data import workflows
|
|
688
|
+
|
|
689
|
+
### v0.2.1 (2025-12-08) - npm Platform Fix
|
|
690
|
+
|
|
691
|
+
- Fixed native module loading for platform-specific binaries
|
|
692
|
+
- This release includes pre-built binary for **macOS x64** only
|
|
693
|
+
- Other platforms coming in next release
|
|
694
|
+
|
|
617
695
|
### v0.2.0 (2025-12-08) - Distributed Cluster Support
|
|
618
696
|
|
|
619
697
|
- **NEW: Distributed cluster architecture** with HDRF partitioning
|
|
620
698
|
- **Subject-Hash Filter** for accurate COUNT deduplication across replicas
|
|
621
|
-
- **
|
|
699
|
+
- **Arrow-powered OLAP** query path for high-performance analytical queries
|
|
622
700
|
- Coordinator-Executor pattern with gRPC communication
|
|
623
701
|
- 9-partition default for optimal data distribution
|
|
624
702
|
- **Contact for cluster deployment**: gonnect.uk@gmail.com
|
package/index.d.ts
CHANGED
|
@@ -77,3 +77,350 @@ export class GraphDB {
|
|
|
77
77
|
* Get library version
|
|
78
78
|
*/
|
|
79
79
|
export function getVersion(): string
|
|
80
|
+
|
|
81
|
+
// ==============================================
|
|
82
|
+
// GraphFrames API - Powered by Apache DataFusion
|
|
83
|
+
// ==============================================
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* GraphFrame: High-performance graph analytics with DataFrame semantics
|
|
87
|
+
* **Powered by Apache DataFusion** for OLAP aggregations
|
|
88
|
+
*
|
|
89
|
+
* @example
|
|
90
|
+
* ```typescript
|
|
91
|
+
* const graph = new GraphFrame(
|
|
92
|
+
* JSON.stringify([{id: "alice"}, {id: "bob"}]),
|
|
93
|
+
* JSON.stringify([{src: "alice", dst: "bob"}])
|
|
94
|
+
* )
|
|
95
|
+
*
|
|
96
|
+
* // Run PageRank
|
|
97
|
+
* const ranks = JSON.parse(graph.pageRank(0.15, 20))
|
|
98
|
+
*
|
|
99
|
+
* // Find triangles
|
|
100
|
+
* const triangles = graph.triangleCount()
|
|
101
|
+
*
|
|
102
|
+
* // Motif finding
|
|
103
|
+
* const patterns = JSON.parse(graph.find("(a)-[]->(b); (b)-[]->(c)"))
|
|
104
|
+
* ```
|
|
105
|
+
*/
|
|
106
|
+
export class GraphFrame {
|
|
107
|
+
/**
|
|
108
|
+
* Create from JSON vertex and edge arrays
|
|
109
|
+
* @param verticesJson - JSON array of vertices (must have "id" field)
|
|
110
|
+
* @param edgesJson - JSON array of edges (must have "src" and "dst" fields)
|
|
111
|
+
*/
|
|
112
|
+
constructor(verticesJson: string, edgesJson: string)
|
|
113
|
+
|
|
114
|
+
/** Get vertex count */
|
|
115
|
+
vertexCount(): number
|
|
116
|
+
|
|
117
|
+
/** Get edge count */
|
|
118
|
+
edgeCount(): number
|
|
119
|
+
|
|
120
|
+
/** Check if empty */
|
|
121
|
+
isEmpty(): boolean
|
|
122
|
+
|
|
123
|
+
/** Get in-degrees as JSON */
|
|
124
|
+
inDegrees(): string
|
|
125
|
+
|
|
126
|
+
/** Get out-degrees as JSON */
|
|
127
|
+
outDegrees(): string
|
|
128
|
+
|
|
129
|
+
/** Get total degrees as JSON */
|
|
130
|
+
degrees(): string
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Run PageRank algorithm
|
|
134
|
+
* @param resetProb - Reset probability (damping factor), typically 0.15
|
|
135
|
+
* @param maxIter - Maximum iterations
|
|
136
|
+
* @returns JSON with ranks, iterations, converged
|
|
137
|
+
*/
|
|
138
|
+
pageRank(resetProb: number, maxIter: number): string
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Find connected components
|
|
142
|
+
* @returns JSON with component assignments
|
|
143
|
+
*/
|
|
144
|
+
connectedComponents(): string
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Compute shortest paths from landmarks
|
|
148
|
+
* @param landmarks - Array of vertex IDs to compute distances from
|
|
149
|
+
* @returns JSON with distances from each landmark
|
|
150
|
+
*/
|
|
151
|
+
shortestPaths(landmarks: string[]): string
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Run label propagation for community detection
|
|
155
|
+
* @param maxIter - Maximum iterations
|
|
156
|
+
* @returns JSON with label assignments
|
|
157
|
+
*/
|
|
158
|
+
labelPropagation(maxIter: number): string
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Count triangles in the graph
|
|
162
|
+
* @returns Number of triangles
|
|
163
|
+
*/
|
|
164
|
+
triangleCount(): number
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Find motif patterns using GraphFrame DSL
|
|
168
|
+
* Pattern syntax: "(a)-[e]->(b); (b)-[]->(c); !(c)-[]->(a)"
|
|
169
|
+
* @param pattern - Motif pattern string
|
|
170
|
+
* @returns JSON array of matches
|
|
171
|
+
*/
|
|
172
|
+
find(pattern: string): string
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Convert to JSON
|
|
176
|
+
* @returns JSON representation of the graph
|
|
177
|
+
*/
|
|
178
|
+
toJson(): string
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Example graph factory functions
|
|
182
|
+
|
|
183
|
+
/** Create friends example graph */
|
|
184
|
+
export function friendsGraph(): GraphFrame
|
|
185
|
+
|
|
186
|
+
/** Create chain graph with given length */
|
|
187
|
+
export function chainGraph(length: number): GraphFrame
|
|
188
|
+
|
|
189
|
+
/** Create star graph with given spokes */
|
|
190
|
+
export function starGraph(spokes: number): GraphFrame
|
|
191
|
+
|
|
192
|
+
/** Create complete graph K_n */
|
|
193
|
+
export function completeGraph(n: number): GraphFrame
|
|
194
|
+
|
|
195
|
+
/** Create cycle graph with n vertices */
|
|
196
|
+
export function cycleGraph(n: number): GraphFrame
|
|
197
|
+
|
|
198
|
+
/** Create binary tree with given depth */
|
|
199
|
+
export function binaryTreeGraph(depth: number): GraphFrame
|
|
200
|
+
|
|
201
|
+
/** Create bipartite graph */
|
|
202
|
+
export function bipartiteGraph(leftSize: number, rightSize: number): GraphFrame
|
|
203
|
+
|
|
204
|
+
// ==============================================
|
|
205
|
+
// Pregel API - Bulk Synchronous Parallel Processing
|
|
206
|
+
// ==============================================
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Run Pregel shortest paths algorithm on a GraphFrame
|
|
210
|
+
* @param graph - The GraphFrame to process
|
|
211
|
+
* @param landmark - Source vertex ID for shortest paths calculation
|
|
212
|
+
* @param maxSupersteps - Maximum number of supersteps before termination
|
|
213
|
+
* @returns JSON with distances and superstep count
|
|
214
|
+
*
|
|
215
|
+
* @example
|
|
216
|
+
* ```typescript
|
|
217
|
+
* const graph = chainGraph(5)
|
|
218
|
+
* const result = JSON.parse(pregelShortestPaths(graph, "v0", 10))
|
|
219
|
+
* console.log(result.distances) // Distances from v0 to all vertices
|
|
220
|
+
* ```
|
|
221
|
+
*/
|
|
222
|
+
export function pregelShortestPaths(graph: GraphFrame, landmark: string, maxSupersteps: number): string
|
|
223
|
+
|
|
224
|
+
// ==============================================
|
|
225
|
+
// Embeddings API - Multi-Provider Semantic Search
|
|
226
|
+
// ==============================================
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* EmbeddingService: Multi-provider vector embeddings with HNSW similarity search
|
|
230
|
+
*
|
|
231
|
+
* Features:
|
|
232
|
+
* - Store and retrieve vector embeddings for entities
|
|
233
|
+
* - HNSW-based similarity search with configurable threshold
|
|
234
|
+
* - 1-hop ARCADE neighbor cache for graph-aware retrieval
|
|
235
|
+
* - Multi-provider composite embeddings with aggregation strategies (RRF, voting, max)
|
|
236
|
+
*
|
|
237
|
+
* @example
|
|
238
|
+
* ```typescript
|
|
239
|
+
* const service = new EmbeddingService()
|
|
240
|
+
*
|
|
241
|
+
* // Store embeddings
|
|
242
|
+
* service.storeVector('entity1', [0.1, 0.2, 0.3, ...])
|
|
243
|
+
*
|
|
244
|
+
* // Find similar entities
|
|
245
|
+
* const similar = JSON.parse(service.findSimilar('entity1', 10, 0.7))
|
|
246
|
+
*
|
|
247
|
+
* // Composite multi-provider embeddings
|
|
248
|
+
* service.storeComposite('entity1', JSON.stringify({
|
|
249
|
+
* openai: [0.1, 0.2, ...],
|
|
250
|
+
* voyage: [0.3, 0.4, ...],
|
|
251
|
+
* cohere: [0.5, 0.6, ...]
|
|
252
|
+
* }))
|
|
253
|
+
* const results = JSON.parse(service.findSimilarComposite('entity1', 10, 0.7, 'rrf'))
|
|
254
|
+
* ```
|
|
255
|
+
*/
|
|
256
|
+
export class EmbeddingService {
|
|
257
|
+
/** Create new embedding service */
|
|
258
|
+
constructor()
|
|
259
|
+
|
|
260
|
+
/** Check if service is enabled */
|
|
261
|
+
isEnabled(): boolean
|
|
262
|
+
|
|
263
|
+
/** Store embedding vector for entity */
|
|
264
|
+
storeVector(entityId: string, vector: number[]): void
|
|
265
|
+
|
|
266
|
+
/** Get embedding for entity (returns null if not found) */
|
|
267
|
+
getVector(entityId: string): number[] | null
|
|
268
|
+
|
|
269
|
+
/** Delete embedding for entity */
|
|
270
|
+
deleteVector(entityId: string): void
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Find similar entities using HNSW search
|
|
274
|
+
* @param entityId - Source entity to find similar entities for
|
|
275
|
+
* @param k - Number of results to return
|
|
276
|
+
* @param threshold - Minimum similarity threshold (0.0 to 1.0)
|
|
277
|
+
* @returns JSON array of {entity, similarity} objects
|
|
278
|
+
*/
|
|
279
|
+
findSimilar(entityId: string, k: number, threshold: number): string
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Find similar with graceful degradation (returns empty array if service disabled)
|
|
283
|
+
*/
|
|
284
|
+
findSimilarGraceful(entityId: string, k: number, threshold: number): string
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Get 1-hop outgoing neighbors (ARCADE cache)
|
|
288
|
+
* @returns JSON array of neighbor IDs
|
|
289
|
+
*/
|
|
290
|
+
getNeighborsOut(entityId: string): string
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Get 1-hop incoming neighbors (ARCADE cache)
|
|
294
|
+
* @returns JSON array of neighbor IDs
|
|
295
|
+
*/
|
|
296
|
+
getNeighborsIn(entityId: string): string
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* Notify service of triple insert (for cache updates)
|
|
300
|
+
*/
|
|
301
|
+
onTripleInsert(subject: string, predicate: string, object: string, graph: string | null): void
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Notify service of triple delete (for cache updates)
|
|
305
|
+
*/
|
|
306
|
+
onTripleDelete(subject: string, predicate: string, object: string, graph: string | null): void
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Store composite embedding from multiple providers
|
|
310
|
+
* @param entityId - Entity ID
|
|
311
|
+
* @param embeddingsJson - JSON object mapping provider name to embedding vector
|
|
312
|
+
*/
|
|
313
|
+
storeComposite(entityId: string, embeddingsJson: string): void
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Get composite embedding for entity
|
|
317
|
+
* @returns JSON with embeddings from all providers, or null if not found
|
|
318
|
+
*/
|
|
319
|
+
getComposite(entityId: string): string | null
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Find similar using composite embeddings with aggregation
|
|
323
|
+
* @param entityId - Source entity
|
|
324
|
+
* @param k - Number of results
|
|
325
|
+
* @param threshold - Minimum similarity
|
|
326
|
+
* @param strategy - Aggregation strategy: 'rrf' | 'max' | 'voting' | 'weighted'
|
|
327
|
+
* @returns JSON array of similarity results
|
|
328
|
+
*/
|
|
329
|
+
findSimilarComposite(entityId: string, k: number, threshold: number, strategy: string): string
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* List all entities with composite embeddings
|
|
333
|
+
*/
|
|
334
|
+
listCompositeEntities(): string[]
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Count total composite embeddings
|
|
338
|
+
*/
|
|
339
|
+
countComposites(): number
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Get list of registered provider names
|
|
343
|
+
*/
|
|
344
|
+
registeredProviders(): string[]
|
|
345
|
+
|
|
346
|
+
/** Get service metrics as JSON */
|
|
347
|
+
getMetrics(): string
|
|
348
|
+
|
|
349
|
+
/** Get cache statistics as JSON */
|
|
350
|
+
getCacheStats(): string
|
|
351
|
+
|
|
352
|
+
/** Rebuild HNSW similarity index */
|
|
353
|
+
rebuildIndex(): void
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// ==============================================
|
|
357
|
+
// Datalog API - Rule-Based Reasoning Engine
|
|
358
|
+
// ==============================================
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* DatalogProgram: Logic programming with semi-naive evaluation
|
|
362
|
+
*
|
|
363
|
+
* Features:
|
|
364
|
+
* - Add extensional facts (EDB)
|
|
365
|
+
* - Define inference rules (IDB)
|
|
366
|
+
* - Semi-naive evaluation for efficient fixpoint computation
|
|
367
|
+
*
|
|
368
|
+
* @example
|
|
369
|
+
* ```typescript
|
|
370
|
+
* const program = new DatalogProgram()
|
|
371
|
+
*
|
|
372
|
+
* // Add facts (JSON format)
|
|
373
|
+
* program.addFact(JSON.stringify({predicate: 'parent', terms: ['alice', 'bob']}))
|
|
374
|
+
* program.addFact(JSON.stringify({predicate: 'parent', terms: ['bob', 'charlie']}))
|
|
375
|
+
*
|
|
376
|
+
* // Add rule: grandparent(X, Z) :- parent(X, Y), parent(Y, Z)
|
|
377
|
+
* program.addRule(JSON.stringify({
|
|
378
|
+
* head: {predicate: 'grandparent', terms: ['?X', '?Z']},
|
|
379
|
+
* body: [
|
|
380
|
+
* {predicate: 'parent', terms: ['?X', '?Y']},
|
|
381
|
+
* {predicate: 'parent', terms: ['?Y', '?Z']}
|
|
382
|
+
* ]
|
|
383
|
+
* }))
|
|
384
|
+
*
|
|
385
|
+
* // Evaluate and query
|
|
386
|
+
* const results = evaluateDatalog(program)
|
|
387
|
+
* const grandparents = queryDatalog(program, 'grandparent')
|
|
388
|
+
* ```
|
|
389
|
+
*/
|
|
390
|
+
export class DatalogProgram {
|
|
391
|
+
/** Create new Datalog program */
|
|
392
|
+
constructor()
|
|
393
|
+
|
|
394
|
+
/**
|
|
395
|
+
* Add a fact (extensional database tuple) in JSON format
|
|
396
|
+
* @param factJson - JSON: {"predicate": "name", "terms": ["a", "b"]}
|
|
397
|
+
*/
|
|
398
|
+
addFact(factJson: string): void
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* Add an inference rule in JSON format
|
|
402
|
+
* @param ruleJson - JSON: {"head": {...}, "body": [...]}
|
|
403
|
+
*/
|
|
404
|
+
addRule(ruleJson: string): void
|
|
405
|
+
|
|
406
|
+
/** Get count of facts in EDB */
|
|
407
|
+
factCount(): number
|
|
408
|
+
|
|
409
|
+
/** Get count of rules */
|
|
410
|
+
ruleCount(): number
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/**
|
|
414
|
+
* Evaluate a Datalog program using semi-naive evaluation
|
|
415
|
+
* @param program - The DatalogProgram to evaluate
|
|
416
|
+
* @returns JSON with evaluation results
|
|
417
|
+
*/
|
|
418
|
+
export function evaluateDatalog(program: DatalogProgram): string
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* Query a predicate from a Datalog program
|
|
422
|
+
* @param program - The DatalogProgram to query
|
|
423
|
+
* @param predicate - The predicate name to query
|
|
424
|
+
* @returns JSON array of matching facts
|
|
425
|
+
*/
|
|
426
|
+
export function queryDatalog(program: DatalogProgram, predicate: string): string
|
package/index.js
CHANGED
|
@@ -32,10 +32,48 @@ function loadNativeBinding() {
|
|
|
32
32
|
return nativeBinding
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
-
const {
|
|
35
|
+
const {
|
|
36
|
+
GraphDb,
|
|
37
|
+
getVersion,
|
|
38
|
+
// GraphFrames - Powered by Apache DataFusion
|
|
39
|
+
GraphFrame,
|
|
40
|
+
friendsGraph,
|
|
41
|
+
chainGraph,
|
|
42
|
+
starGraph,
|
|
43
|
+
completeGraph,
|
|
44
|
+
cycleGraph,
|
|
45
|
+
binaryTreeGraph,
|
|
46
|
+
bipartiteGraph,
|
|
47
|
+
// Embeddings API - Multi-Provider Semantic Search
|
|
48
|
+
EmbeddingService,
|
|
49
|
+
// Datalog API - Rule-Based Reasoning Engine
|
|
50
|
+
DatalogProgram,
|
|
51
|
+
evaluateDatalog,
|
|
52
|
+
queryDatalog,
|
|
53
|
+
// Pregel API - Bulk Synchronous Parallel Processing
|
|
54
|
+
pregelShortestPaths,
|
|
55
|
+
} = loadNativeBinding()
|
|
36
56
|
|
|
37
57
|
module.exports = {
|
|
58
|
+
// Core GraphDB
|
|
38
59
|
GraphDB: GraphDb, // Export as GraphDB for consistency
|
|
39
60
|
GraphDb, // Also export as GraphDb
|
|
40
61
|
getVersion,
|
|
62
|
+
// GraphFrames API - Powered by Apache DataFusion
|
|
63
|
+
GraphFrame,
|
|
64
|
+
friendsGraph,
|
|
65
|
+
chainGraph,
|
|
66
|
+
starGraph,
|
|
67
|
+
completeGraph,
|
|
68
|
+
cycleGraph,
|
|
69
|
+
binaryTreeGraph,
|
|
70
|
+
bipartiteGraph,
|
|
71
|
+
// Embeddings API - Multi-Provider Semantic Search
|
|
72
|
+
EmbeddingService,
|
|
73
|
+
// Datalog API - Rule-Based Reasoning Engine
|
|
74
|
+
DatalogProgram,
|
|
75
|
+
evaluateDatalog,
|
|
76
|
+
queryDatalog,
|
|
77
|
+
// Pregel API - Bulk Synchronous Parallel Processing
|
|
78
|
+
pregelShortestPaths,
|
|
41
79
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rust-kgdb",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "High-performance RDF/SPARQL database with
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "High-performance RDF/SPARQL database with GraphFrames analytics, vector embeddings, Datalog reasoning, and Pregel BSP processing",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|
|
7
7
|
"napi": {
|
|
@@ -19,9 +19,8 @@
|
|
|
19
19
|
"scripts": {
|
|
20
20
|
"build": "napi build --platform --release native/rust-kgdb-napi",
|
|
21
21
|
"build:debug": "napi build --platform native/rust-kgdb-napi",
|
|
22
|
-
"
|
|
23
|
-
"test": "jest"
|
|
24
|
-
"version": "0.2.0"
|
|
22
|
+
"test": "node test-all-features.js",
|
|
23
|
+
"test:jest": "jest"
|
|
25
24
|
},
|
|
26
25
|
"keywords": [
|
|
27
26
|
"rdf",
|
|
@@ -30,6 +29,12 @@
|
|
|
30
29
|
"knowledge-graph",
|
|
31
30
|
"database",
|
|
32
31
|
"triplestore",
|
|
32
|
+
"graphframes",
|
|
33
|
+
"pagerank",
|
|
34
|
+
"embeddings",
|
|
35
|
+
"vector-search",
|
|
36
|
+
"datalog",
|
|
37
|
+
"pregel",
|
|
33
38
|
"napi-rs",
|
|
34
39
|
"rust"
|
|
35
40
|
],
|
|
@@ -44,6 +49,7 @@
|
|
|
44
49
|
"@types/jest": "^29.5.0",
|
|
45
50
|
"@types/node": "^20.0.0",
|
|
46
51
|
"jest": "^29.7.0",
|
|
52
|
+
"ts-jest": "^29.4.6",
|
|
47
53
|
"typescript": "^5.0.0"
|
|
48
54
|
},
|
|
49
55
|
"engines": {
|
|
@@ -53,13 +59,7 @@
|
|
|
53
59
|
"index.js",
|
|
54
60
|
"index.d.ts",
|
|
55
61
|
"README.md",
|
|
62
|
+
"CHANGELOG.md",
|
|
56
63
|
"*.node"
|
|
57
|
-
]
|
|
58
|
-
"optionalDependencies": {
|
|
59
|
-
"rust-kgdb-win32-x64-msvc": "0.2.1",
|
|
60
|
-
"rust-kgdb-darwin-x64": "0.2.1",
|
|
61
|
-
"rust-kgdb-linux-x64-gnu": "0.2.1",
|
|
62
|
-
"rust-kgdb-darwin-arm64": "0.2.1",
|
|
63
|
-
"rust-kgdb-linux-arm64-gnu": "0.2.1"
|
|
64
|
-
}
|
|
64
|
+
]
|
|
65
65
|
}
|
|
Binary file
|