rust-kgdb 0.6.64 → 0.6.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +607 -32
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -38,16 +38,25 @@ For Enterprises:
|
|
|
38
38
|
- Zero hallucinations - Every answer traces back to your actual data
|
|
39
39
|
- Full audit trail - Regulators can verify every AI decision (SOX, GDPR, FDA 21 CFR Part 11)
|
|
40
40
|
- No infrastructure - Runs embedded in your app, no servers to manage
|
|
41
|
+
- Idempotent responses - Same question always returns same answer (semantic hashing)
|
|
41
42
|
|
|
42
43
|
For Engineering Teams:
|
|
43
44
|
- 449ns lookups - 35x faster than RDFox
|
|
44
45
|
- 24 bytes per triple - 25% more memory efficient than competitors
|
|
45
46
|
- 132K writes/sec - Handle enterprise transaction volumes
|
|
47
|
+
- Long-term memory - Agent remembers past conversations (94% recall at 10K depth)
|
|
46
48
|
|
|
47
49
|
For AI/ML Teams:
|
|
48
50
|
- 86.4% SPARQL accuracy - vs 0% with vanilla LLMs on LUBM benchmark
|
|
49
51
|
- 16ms similarity search - Find related entities across 10K vectors
|
|
50
52
|
- Schema-aware generation - AI uses YOUR ontology, not guessed class names
|
|
53
|
+
- Conversation knowledge extraction - Auto-extract entities and relationships from chat
|
|
54
|
+
|
|
55
|
+
For Knowledge Management:
|
|
56
|
+
- Memory Hypergraph - Episodes link to KG entities via hyper-edges
|
|
57
|
+
- Temporal decay - Recent memories weighted higher than old ones
|
|
58
|
+
- Semantic deduplication - "What about Provider X?" and "Tell me about Provider X" return cached result
|
|
59
|
+
- Single query traversal - SPARQL walks both memory AND knowledge graph in one query
|
|
51
60
|
|
|
52
61
|
## What Is rust-kgdb?
|
|
53
62
|
|
|
@@ -77,6 +86,11 @@ A high-performance RDF/SPARQL database that runs inside your application. No ser
|
|
|
77
86
|
| Memory/Triple | 24 bytes | 32 bytes | 50-60 bytes |
|
|
78
87
|
| Bulk Insert | 146K/sec | 200K/sec | 50K/sec |
|
|
79
88
|
|
|
89
|
+
Sources:
|
|
90
|
+
- rust-kgdb: Criterion benchmarks on LUBM(1) dataset, Apple Silicon
|
|
91
|
+
- RDFox: [Oxford Semantic Technologies benchmarks](https://www.oxfordsemantic.tech/product)
|
|
92
|
+
- Apache Jena: [Jena performance documentation](https://jena.apache.org/documentation/tdb/performance.html)
|
|
93
|
+
|
|
80
94
|
Like SQLite - but for knowledge graphs.
|
|
81
95
|
|
|
82
96
|
### HyperMind: Neuro-Symbolic Agent Framework
|
|
@@ -145,18 +159,31 @@ const { GraphDB, HyperMindAgent } = require('rust-kgdb');
|
|
|
145
159
|
|
|
146
160
|
const db = new GraphDB('http://insurance.org/');
|
|
147
161
|
db.loadTtl(`
|
|
148
|
-
|
|
149
|
-
|
|
162
|
+
<http://insurance.org/Provider_445> <http://insurance.org/totalClaims> "89" .
|
|
163
|
+
<http://insurance.org/Provider_445> <http://insurance.org/avgClaimAmount> "47000" .
|
|
164
|
+
<http://insurance.org/Provider_445> <http://insurance.org/denialRate> "0.34" .
|
|
165
|
+
<http://insurance.org/Provider_445> <http://insurance.org/hasPattern> <http://insurance.org/UnbundledBilling> .
|
|
166
|
+
<http://insurance.org/Provider_445> <http://insurance.org/flaggedBy> <http://insurance.org/SIU_2024_Q1> .
|
|
150
167
|
`);
|
|
151
168
|
|
|
152
|
-
|
|
153
|
-
const
|
|
169
|
+
// Create agent with knowledge graph binding
|
|
170
|
+
const agent = new HyperMindAgent({
|
|
171
|
+
kg: db, // REQUIRED: GraphDB instance
|
|
172
|
+
name: 'fraud-detector', // Optional: Agent name
|
|
173
|
+
apiKey: process.env.OPENAI_API_KEY // Optional: LLM API key
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
// Natural language query -> Grounded results
|
|
177
|
+
const result = await agent.call("Which providers show suspicious billing patterns?");
|
|
154
178
|
|
|
155
179
|
console.log(result.answer);
|
|
156
180
|
// "Provider_445: 34% denial rate, flagged by SIU Q1 2024, unbundled billing pattern"
|
|
157
181
|
|
|
158
|
-
console.log(result.
|
|
159
|
-
// Full
|
|
182
|
+
console.log(result.explanation);
|
|
183
|
+
// Full execution trace showing tool calls
|
|
184
|
+
|
|
185
|
+
console.log(result.proof);
|
|
186
|
+
// Cryptographic proof DAG with SHA-256 hashes
|
|
160
187
|
```
|
|
161
188
|
|
|
162
189
|
## Core Components
|
|
@@ -227,6 +254,48 @@ embeddings.rebuildIndex();
|
|
|
227
254
|
const similar = embeddings.findSimilar('claim_001', 10, 0.7);
|
|
228
255
|
```
|
|
229
256
|
|
|
257
|
+
### Embedding Triggers: Auto-Generate on Insert
|
|
258
|
+
|
|
259
|
+
```javascript
|
|
260
|
+
const { GraphDB, EmbeddingService, TriggerManager } = require('rust-kgdb');
|
|
261
|
+
|
|
262
|
+
const db = new GraphDB('http://example.org/');
|
|
263
|
+
const embeddings = new EmbeddingService();
|
|
264
|
+
|
|
265
|
+
// Configure trigger to auto-generate embeddings on triple insert
|
|
266
|
+
const triggers = new TriggerManager({
|
|
267
|
+
db,
|
|
268
|
+
embeddings,
|
|
269
|
+
provider: 'openai', // or 'ollama', 'anthropic'
|
|
270
|
+
providerConfig: {
|
|
271
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
272
|
+
model: 'text-embedding-3-small'
|
|
273
|
+
}
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
// Register trigger: generate embedding when entity is inserted
|
|
277
|
+
triggers.register({
|
|
278
|
+
event: 'INSERT',
|
|
279
|
+
pattern: '?entity rdf:type ?class',
|
|
280
|
+
action: 'GENERATE_EMBEDDING',
|
|
281
|
+
config: {
|
|
282
|
+
fields: ['rdfs:label', 'rdfs:comment', 'schema:description'],
|
|
283
|
+
concatenate: true
|
|
284
|
+
}
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
// Now when you insert data, embeddings are auto-generated
|
|
288
|
+
db.loadTtl(`
|
|
289
|
+
:claim_001 a :Claim ;
|
|
290
|
+
rdfs:label "Suspicious orthopedic claim" ;
|
|
291
|
+
rdfs:comment "High-value claim from flagged provider" .
|
|
292
|
+
`);
|
|
293
|
+
// Trigger fires -> embedding generated for :claim_001
|
|
294
|
+
|
|
295
|
+
// Query by similarity (uses auto-generated embeddings)
|
|
296
|
+
const similar = embeddings.findSimilar('claim_001', 10, 0.7);
|
|
297
|
+
```
|
|
298
|
+
|
|
230
299
|
### DatalogProgram: Rule-Based Reasoning
|
|
231
300
|
|
|
232
301
|
```javascript
|
|
@@ -361,6 +430,417 @@ Memory Retrieval Performance:
|
|
|
361
430
|
- 16.7ms search speed for 10K queries
|
|
362
431
|
- 132K ops/sec write throughput
|
|
363
432
|
|
|
433
|
+
### Conversation Knowledge Extraction
|
|
434
|
+
|
|
435
|
+
Every conversation automatically extracts entities and relationships into the knowledge graph:
|
|
436
|
+
|
|
437
|
+
```javascript
|
|
438
|
+
// Agent conversation automatically extracts knowledge
|
|
439
|
+
const result = await agent.ask("Provider P001 submitted 5 claims last month totaling $47,000");
|
|
440
|
+
|
|
441
|
+
// Behind the scenes, HyperMind extracts and stores:
|
|
442
|
+
// :Conversation_001 :mentions :Provider_P001 .
|
|
443
|
+
// :Provider_P001 :claimCount "5" ; :claimTotal "47000" ; :period "last_month" .
|
|
444
|
+
// :Conversation_001 :timestamp "2024-12-17" ; :extractedFacts 3 .
|
|
445
|
+
|
|
446
|
+
// Later queries can use this extracted knowledge
|
|
447
|
+
const followUp = await agent.ask("What do we know about Provider P001?");
|
|
448
|
+
// Returns facts from BOTH original data AND extracted conversation knowledge
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
### Idempotent Responses (Same Question = Same Answer)
|
|
452
|
+
|
|
453
|
+
```javascript
|
|
454
|
+
// First call: Compute answer, store with semantic hash
|
|
455
|
+
const result1 = await agent.ask("Which providers have high denial rates?");
|
|
456
|
+
// Execution time: 450ms, stores result with hash
|
|
457
|
+
|
|
458
|
+
// Second call: Different wording, SAME semantic meaning
|
|
459
|
+
const result2 = await agent.ask("Show me providers with lots of denials");
|
|
460
|
+
// Execution time: 2ms (cache hit via semantic hash)
|
|
461
|
+
// Returns IDENTICAL result - no LLM call needed
|
|
462
|
+
|
|
463
|
+
// Why this matters:
|
|
464
|
+
// - Consistent answers across team members
|
|
465
|
+
// - No LLM cost for repeated questions
|
|
466
|
+
// - Audit trail shows same query = same result
|
|
467
|
+
```
|
|
468
|
+
|
|
469
|
+
## HyperAgent Core Concepts
|
|
470
|
+
|
|
471
|
+
```
|
|
472
|
+
+-----------------------------------------------------------------------------+
|
|
473
|
+
| HYPERAGENT EXECUTION MODEL |
|
|
474
|
+
| |
|
|
475
|
+
| User: "Find suspicious claims" |
|
|
476
|
+
| | |
|
|
477
|
+
| v |
|
|
478
|
+
| +-------------------------------------------------------------+ |
|
|
479
|
+
| | 1. INTENT ANALYSIS (deterministic, no LLM) | |
|
|
480
|
+
| | Keywords: "suspicious" -> FRAUD_DETECTION | |
|
|
481
|
+
| | Keywords: "claims" -> CLAIM_ENTITY | |
|
|
482
|
+
| +-------------------------------------------------------------+ |
|
|
483
|
+
| | |
|
|
484
|
+
| v |
|
|
485
|
+
| +-------------------------------------------------------------+ |
|
|
486
|
+
| | 2. SCHEMA BINDING | |
|
|
487
|
+
| | SchemaContext has: Claim, Provider, Claimant classes | |
|
|
488
|
+
| | Properties: denialRate, totalClaims, flaggedBy | |
|
|
489
|
+
| +-------------------------------------------------------------+ |
|
|
490
|
+
| | |
|
|
491
|
+
| v |
|
|
492
|
+
| +-------------------------------------------------------------+ |
|
|
493
|
+
| | 3. STEP GENERATION (schema-driven) | |
|
|
494
|
+
| | Step 1: kg.sparql.query -> Find high denial providers | |
|
|
495
|
+
| | Step 2: kg.datalog.infer -> Apply fraud rules | |
|
|
496
|
+
| | Step 3: kg.motif.find -> Detect circular patterns | |
|
|
497
|
+
| +-------------------------------------------------------------+ |
|
|
498
|
+
| | |
|
|
499
|
+
| v |
|
|
500
|
+
| +-------------------------------------------------------------+ |
|
|
501
|
+
| | 4. VALIDATED EXECUTION (sandbox + audit) | |
|
|
502
|
+
| | Each step: Proxy -> Sandbox -> Tool -> ProofDAG | |
|
|
503
|
+
| +-------------------------------------------------------------+ |
|
|
504
|
+
| | |
|
|
505
|
+
| v |
|
|
506
|
+
| Result: Facts from YOUR data with full audit trail |
|
|
507
|
+
+-----------------------------------------------------------------------------+
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
Key Principles:
|
|
511
|
+
- LLM is OPTIONAL - Only used for natural language summarization
|
|
512
|
+
- Query generation is DETERMINISTIC from SchemaContext
|
|
513
|
+
- Every step produces cryptographic witness (SHA-256)
|
|
514
|
+
- Capability-based security prevents unauthorized operations
|
|
515
|
+
|
|
516
|
+
## SPARQL Query Examples
|
|
517
|
+
|
|
518
|
+
```javascript
|
|
519
|
+
const { GraphDB } = require('rust-kgdb');
|
|
520
|
+
const db = new GraphDB('http://example.org/');
|
|
521
|
+
|
|
522
|
+
// Load sample data
|
|
523
|
+
db.loadTtl(`
|
|
524
|
+
:alice :knows :bob ; :age 30 ; :city "London" .
|
|
525
|
+
:bob :knows :charlie ; :age 25 ; :city "Paris" .
|
|
526
|
+
:charlie :knows :alice ; :age 35 ; :city "London" .
|
|
527
|
+
`);
|
|
528
|
+
|
|
529
|
+
// Basic SELECT query
|
|
530
|
+
const friends = db.querySelect(`
|
|
531
|
+
SELECT ?person ?friend WHERE {
|
|
532
|
+
?person :knows ?friend
|
|
533
|
+
}
|
|
534
|
+
`);
|
|
535
|
+
|
|
536
|
+
// FILTER with comparison
|
|
537
|
+
const adults = db.querySelect(`
|
|
538
|
+
SELECT ?person ?age WHERE {
|
|
539
|
+
?person :age ?age .
|
|
540
|
+
FILTER(?age >= 30)
|
|
541
|
+
}
|
|
542
|
+
`);
|
|
543
|
+
|
|
544
|
+
// OPTIONAL pattern
|
|
545
|
+
const withCity = db.querySelect(`
|
|
546
|
+
SELECT ?person ?city WHERE {
|
|
547
|
+
?person :knows ?someone .
|
|
548
|
+
OPTIONAL { ?person :city ?city }
|
|
549
|
+
}
|
|
550
|
+
`);
|
|
551
|
+
|
|
552
|
+
// Aggregation
|
|
553
|
+
const avgAge = db.querySelect(`
|
|
554
|
+
SELECT (AVG(?age) as ?average) WHERE {
|
|
555
|
+
?person :age ?age
|
|
556
|
+
}
|
|
557
|
+
`);
|
|
558
|
+
|
|
559
|
+
// CONSTRUCT new triples
|
|
560
|
+
const inferred = db.queryConstruct(`
|
|
561
|
+
CONSTRUCT { ?a :friendOfFriend ?c }
|
|
562
|
+
WHERE {
|
|
563
|
+
?a :knows ?b .
|
|
564
|
+
?b :knows ?c .
|
|
565
|
+
FILTER(?a != ?c)
|
|
566
|
+
}
|
|
567
|
+
`);
|
|
568
|
+
|
|
569
|
+
// Named Graph operations
|
|
570
|
+
db.loadTtl(':data1 :value "100" .', 'http://example.org/graph1');
|
|
571
|
+
db.loadTtl(':data2 :value "200" .', 'http://example.org/graph2');
|
|
572
|
+
const fromGraph = db.querySelect(`
|
|
573
|
+
SELECT ?s ?v FROM <http://example.org/graph1> WHERE {
|
|
574
|
+
?s :value ?v
|
|
575
|
+
}
|
|
576
|
+
`);
|
|
577
|
+
```
|
|
578
|
+
|
|
579
|
+
## Datalog Reasoning Examples
|
|
580
|
+
|
|
581
|
+
```javascript
|
|
582
|
+
const { DatalogProgram, evaluateDatalog } = require('rust-kgdb');
|
|
583
|
+
|
|
584
|
+
const datalog = new DatalogProgram();
|
|
585
|
+
|
|
586
|
+
// Add base facts
|
|
587
|
+
datalog.addFact(JSON.stringify({predicate:'parent', terms:['alice','bob']}));
|
|
588
|
+
datalog.addFact(JSON.stringify({predicate:'parent', terms:['bob','charlie']}));
|
|
589
|
+
datalog.addFact(JSON.stringify({predicate:'parent', terms:['charlie','dave']}));
|
|
590
|
+
|
|
591
|
+
// Transitive closure rule: ancestor(X,Z) :- parent(X,Y), ancestor(Y,Z)
|
|
592
|
+
datalog.addRule(JSON.stringify({
|
|
593
|
+
head: {predicate:'ancestor', terms:['?X','?Y']},
|
|
594
|
+
body: [
|
|
595
|
+
{predicate:'parent', terms:['?X','?Y']}
|
|
596
|
+
]
|
|
597
|
+
}));
|
|
598
|
+
datalog.addRule(JSON.stringify({
|
|
599
|
+
head: {predicate:'ancestor', terms:['?X','?Z']},
|
|
600
|
+
body: [
|
|
601
|
+
{predicate:'parent', terms:['?X','?Y']},
|
|
602
|
+
{predicate:'ancestor', terms:['?Y','?Z']}
|
|
603
|
+
]
|
|
604
|
+
}));
|
|
605
|
+
|
|
606
|
+
// Semi-naive evaluation (fixpoint)
|
|
607
|
+
const inferred = evaluateDatalog(datalog);
|
|
608
|
+
// Results: ancestor(alice,bob), ancestor(alice,charlie), ancestor(alice,dave)
|
|
609
|
+
// ancestor(bob,charlie), ancestor(bob,dave)
|
|
610
|
+
// ancestor(charlie,dave)
|
|
611
|
+
|
|
612
|
+
// Fraud detection rules
|
|
613
|
+
const fraudDatalog = new DatalogProgram();
|
|
614
|
+
fraudDatalog.addFact(JSON.stringify({predicate:'claim', terms:['C001','P001','50000']}));
|
|
615
|
+
fraudDatalog.addFact(JSON.stringify({predicate:'claim', terms:['C002','P001','48000']}));
|
|
616
|
+
fraudDatalog.addFact(JSON.stringify({predicate:'sameAddress', terms:['P001','P002']}));
|
|
617
|
+
fraudDatalog.addFact(JSON.stringify({predicate:'claim', terms:['C003','P002','51000']}));
|
|
618
|
+
|
|
619
|
+
// Collusion rule
|
|
620
|
+
fraudDatalog.addRule(JSON.stringify({
|
|
621
|
+
head: {predicate:'potential_collusion', terms:['?P1','?P2']},
|
|
622
|
+
body: [
|
|
623
|
+
{predicate:'sameAddress', terms:['?P1','?P2']},
|
|
624
|
+
{predicate:'claim', terms:['?C1','?P1','?A1']},
|
|
625
|
+
{predicate:'claim', terms:['?C2','?P2','?A2']}
|
|
626
|
+
]
|
|
627
|
+
}));
|
|
628
|
+
```
|
|
629
|
+
|
|
630
|
+
## Motif Finding Examples
|
|
631
|
+
|
|
632
|
+
```javascript
|
|
633
|
+
const { GraphFrame, friendsGraph } = require('rust-kgdb');
|
|
634
|
+
|
|
635
|
+
// Create graph
|
|
636
|
+
const gf = new GraphFrame(
|
|
637
|
+
JSON.stringify([
|
|
638
|
+
{id:'alice'}, {id:'bob'}, {id:'charlie'},
|
|
639
|
+
{id:'dave'}, {id:'eve'}
|
|
640
|
+
]),
|
|
641
|
+
JSON.stringify([
|
|
642
|
+
{src:'alice', dst:'bob'},
|
|
643
|
+
{src:'bob', dst:'charlie'},
|
|
644
|
+
{src:'charlie', dst:'alice'},
|
|
645
|
+
{src:'dave', dst:'alice'},
|
|
646
|
+
{src:'eve', dst:'dave'}
|
|
647
|
+
])
|
|
648
|
+
);
|
|
649
|
+
|
|
650
|
+
// Find triangles: (a)->(b)->(c)->(a)
|
|
651
|
+
const triangles = gf.find('(a)-[e1]->(b); (b)-[e2]->(c); (c)-[e3]->(a)');
|
|
652
|
+
// Returns: [{a:'alice', b:'bob', c:'charlie', ...}]
|
|
653
|
+
|
|
654
|
+
// Find chains: (a)->(b)->(c)
|
|
655
|
+
const chains = gf.find('(a)-[e1]->(b); (b)-[e2]->(c)');
|
|
656
|
+
|
|
657
|
+
// Find stars: hub with multiple spokes
|
|
658
|
+
const stars = gf.find('(hub)-[e1]->(spoke1); (hub)-[e2]->(spoke2)');
|
|
659
|
+
|
|
660
|
+
// Find bidirectional edges
|
|
661
|
+
const bidir = gf.find('(a)-[e1]->(b); (b)-[e2]->(a)');
|
|
662
|
+
|
|
663
|
+
// Fraud pattern: circular payments
|
|
664
|
+
// A pays B, B pays C, C pays A
|
|
665
|
+
const circular = gf.find('(a)-[pay1]->(b); (b)-[pay2]->(c); (c)-[pay3]->(a)');
|
|
666
|
+
```
|
|
667
|
+
|
|
668
|
+
## Clustered KGDB
|
|
669
|
+
|
|
670
|
+
For datasets exceeding single-node capacity (1B+ triples), rust-kgdb supports distributed deployment:
|
|
671
|
+
|
|
672
|
+
```
|
|
673
|
+
+-----------------------------------------------------------------------------+
|
|
674
|
+
| DISTRIBUTED CLUSTER ARCHITECTURE |
|
|
675
|
+
| |
|
|
676
|
+
| +-------------------+ |
|
|
677
|
+
| | COORDINATOR | <- Routes queries, manages partitions |
|
|
678
|
+
| | (Raft consensus) | |
|
|
679
|
+
| +--------+----------+ |
|
|
680
|
+
| | |
|
|
681
|
+
| +--------+--------+--------+--------+ |
|
|
682
|
+
| | | | | | |
|
|
683
|
+
| v v v v v |
|
|
684
|
+
| +----+ +----+ +----+ +----+ +----+ |
|
|
685
|
+
| |Exec| |Exec| |Exec| |Exec| |Exec| <- Partition executors |
|
|
686
|
+
| | 0 | | 1 | | 2 | | 3 | | 4 | |
|
|
687
|
+
| +----+ +----+ +----+ +----+ +----+ |
|
|
688
|
+
| | | | | | |
|
|
689
|
+
| v v v v v |
|
|
690
|
+
| [===] [===] [===] [===] [===] <- Local RocksDB partitions |
|
|
691
|
+
| |
|
|
692
|
+
| HDRF Partitioning: Subject-anchored streaming (load factor < 1.1) |
|
|
693
|
+
| Shadow Partitions: Zero-downtime rebalancing (~10ms pause) |
|
|
694
|
+
| DataFusion: Arrow-native OLAP for analytical queries |
|
|
695
|
+
+-----------------------------------------------------------------------------+
|
|
696
|
+
```
|
|
697
|
+
|
|
698
|
+
Cluster Features:
|
|
699
|
+
- HDRF streaming partitioner (subject-anchored, maintains locality)
|
|
700
|
+
- Raft consensus for distributed coordination
|
|
701
|
+
- gRPC for inter-node communication
|
|
702
|
+
- DataFusion integration for OLAP queries
|
|
703
|
+
- Shadow partitions for zero-downtime rebalancing
|
|
704
|
+
|
|
705
|
+
Deployment:
|
|
706
|
+
|
|
707
|
+
```bash
|
|
708
|
+
# Kubernetes deployment
|
|
709
|
+
kubectl apply -f infra/k8s/coordinator.yaml
|
|
710
|
+
kubectl apply -f infra/k8s/executor.yaml
|
|
711
|
+
|
|
712
|
+
# Helm chart
|
|
713
|
+
helm install rust-kgdb ./infra/helm -n rust-kgdb --create-namespace
|
|
714
|
+
|
|
715
|
+
# Verify cluster
|
|
716
|
+
kubectl get pods -n rust-kgdb
|
|
717
|
+
curl http://<coordinator-ip>:8080/api/v1/health
|
|
718
|
+
```
|
|
719
|
+
|
|
720
|
+
## HyperAgent: Fraud Detection Example
|
|
721
|
+
|
|
722
|
+
```javascript
|
|
723
|
+
const { GraphDB, HyperMindAgent, DatalogProgram, evaluateDatalog } = require('rust-kgdb');
|
|
724
|
+
|
|
725
|
+
// Create database with insurance claims data (N-Triples format for reliability)
|
|
726
|
+
const db = new GraphDB('http://insurance.org/');
|
|
727
|
+
db.loadTtl(`
|
|
728
|
+
<http://insurance.org/PROV001> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://insurance.org/Provider> .
|
|
729
|
+
<http://insurance.org/PROV001> <http://insurance.org/name> "ABC Medical" .
|
|
730
|
+
<http://insurance.org/PROV001> <http://insurance.org/specialty> "Orthopedics" .
|
|
731
|
+
<http://insurance.org/PROV001> <http://insurance.org/totalClaims> "89" .
|
|
732
|
+
<http://insurance.org/PROV001> <http://insurance.org/denialRate> "0.34" .
|
|
733
|
+
<http://insurance.org/PROV001> <http://insurance.org/hasPattern> <http://insurance.org/UnbundledBilling> .
|
|
734
|
+
<http://insurance.org/PROV001> <http://insurance.org/flaggedBy> <http://insurance.org/SIU_2024_Q1> .
|
|
735
|
+
|
|
736
|
+
<http://insurance.org/CLMT001> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://insurance.org/Claimant> .
|
|
737
|
+
<http://insurance.org/CLMT001> <http://insurance.org/name> "John Smith" .
|
|
738
|
+
<http://insurance.org/CLMT001> <http://insurance.org/address> "123 Main St" .
|
|
739
|
+
<http://insurance.org/CLMT002> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://insurance.org/Claimant> .
|
|
740
|
+
<http://insurance.org/CLMT002> <http://insurance.org/name> "Jane Doe" .
|
|
741
|
+
<http://insurance.org/CLMT002> <http://insurance.org/address> "123 Main St" .
|
|
742
|
+
<http://insurance.org/CLMT001> <http://insurance.org/knows> <http://insurance.org/CLMT002> .
|
|
743
|
+
`, null);
|
|
744
|
+
|
|
745
|
+
// Create agent with knowledge graph binding
|
|
746
|
+
const agent = new HyperMindAgent({
|
|
747
|
+
kg: db,
|
|
748
|
+
name: 'fraud-detector',
|
|
749
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
750
|
+
sandbox: {
|
|
751
|
+
capabilities: ['ReadKG', 'ExecuteTool'], // Read-only by default
|
|
752
|
+
fuelLimit: 1000000
|
|
753
|
+
}
|
|
754
|
+
});
|
|
755
|
+
|
|
756
|
+
// Natural language fraud detection
|
|
757
|
+
const result = await agent.call("Which providers show suspicious billing patterns?");
|
|
758
|
+
|
|
759
|
+
console.log(result.answer);
|
|
760
|
+
// "Provider PROV001 (ABC Medical) shows concerning patterns:
|
|
761
|
+
// - 34% denial rate (industry average: 8%)
|
|
762
|
+
// - Flagged by SIU in Q1 2024 for unbundled billing"
|
|
763
|
+
|
|
764
|
+
console.log(result.explanation);
|
|
765
|
+
// Full execution trace showing tool calls
|
|
766
|
+
|
|
767
|
+
console.log(result.proof);
|
|
768
|
+
// Cryptographic proof DAG with SHA-256 hashes
|
|
769
|
+
|
|
770
|
+
// Use Datalog for collusion detection rules
|
|
771
|
+
const datalog = new DatalogProgram();
|
|
772
|
+
datalog.addFact(JSON.stringify({predicate:'knows', terms:['CLMT001','CLMT002']}));
|
|
773
|
+
datalog.addFact(JSON.stringify({predicate:'sameAddress', terms:['CLMT001','CLMT002']}));
|
|
774
|
+
datalog.addRule(JSON.stringify({
|
|
775
|
+
head: {predicate:'potential_collusion', terms:['?X','?Y']},
|
|
776
|
+
body: [
|
|
777
|
+
{predicate:'knows', terms:['?X','?Y']},
|
|
778
|
+
{predicate:'sameAddress', terms:['?X','?Y']}
|
|
779
|
+
]
|
|
780
|
+
}));
|
|
781
|
+
const inferred = evaluateDatalog(datalog);
|
|
782
|
+
console.log('Collusion detected:', JSON.parse(inferred));
|
|
783
|
+
```
|
|
784
|
+
|
|
785
|
+
## HyperAgent: Underwriting Example
|
|
786
|
+
|
|
787
|
+
```javascript
|
|
788
|
+
const { GraphDB, HyperMindAgent, EmbeddingService } = require('rust-kgdb');
|
|
789
|
+
|
|
790
|
+
// Create database with underwriting data (N-Triples format)
|
|
791
|
+
const db = new GraphDB('http://underwriting.org/');
|
|
792
|
+
db.loadTtl(`
|
|
793
|
+
<http://underwriting.org/APP001> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://underwriting.org/Applicant> .
|
|
794
|
+
<http://underwriting.org/APP001> <http://underwriting.org/name> "Acme Corp" .
|
|
795
|
+
<http://underwriting.org/APP001> <http://underwriting.org/industry> "Manufacturing" .
|
|
796
|
+
<http://underwriting.org/APP001> <http://underwriting.org/employees> "250" .
|
|
797
|
+
<http://underwriting.org/APP001> <http://underwriting.org/creditScore> "720" .
|
|
798
|
+
<http://underwriting.org/APP001> <http://underwriting.org/yearsInBusiness> "15" .
|
|
799
|
+
|
|
800
|
+
<http://underwriting.org/COMP001> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://underwriting.org/Applicant> .
|
|
801
|
+
<http://underwriting.org/COMP001> <http://underwriting.org/industry> "Manufacturing" .
|
|
802
|
+
<http://underwriting.org/COMP001> <http://underwriting.org/employees> "230" .
|
|
803
|
+
<http://underwriting.org/COMP001> <http://underwriting.org/premium> "625000" .
|
|
804
|
+
`, null);
|
|
805
|
+
|
|
806
|
+
// Optional: Add embeddings for similarity search
|
|
807
|
+
const embeddings = new EmbeddingService();
|
|
808
|
+
const appVector = new Array(384).fill(0).map((_, i) => Math.sin(i / 10));
|
|
809
|
+
embeddings.storeVector('APP001', appVector);
|
|
810
|
+
embeddings.storeVector('COMP001', appVector.map(x => x * 0.95));
|
|
811
|
+
|
|
812
|
+
// Create underwriting agent
|
|
813
|
+
const agent = new HyperMindAgent({
|
|
814
|
+
kg: db,
|
|
815
|
+
embeddings: embeddings, // Optional: for similarity search
|
|
816
|
+
name: 'underwriter',
|
|
817
|
+
apiKey: process.env.OPENAI_API_KEY
|
|
818
|
+
});
|
|
819
|
+
|
|
820
|
+
// Risk assessment via natural language
|
|
821
|
+
const risk = await agent.call("Assess the risk profile for Acme Corp");
|
|
822
|
+
|
|
823
|
+
console.log(risk.answer);
|
|
824
|
+
// "Acme Corp (APP001) Risk Assessment:
|
|
825
|
+
// - Credit score 720 (above 700 threshold)
|
|
826
|
+
// - 15 years in business (stable operations)
|
|
827
|
+
// - Comparable: COMP001 (230 employees, $625K premium)"
|
|
828
|
+
|
|
829
|
+
// Find similar accounts using embeddings
|
|
830
|
+
const similar = embeddings.findSimilar('APP001', 5, 0.7);
|
|
831
|
+
console.log('Similar accounts:', JSON.parse(similar));
|
|
832
|
+
|
|
833
|
+
// Direct SPARQL query for engineering teams
|
|
834
|
+
const comparables = db.querySelect(`
|
|
835
|
+
SELECT ?company ?employees ?premium WHERE {
|
|
836
|
+
?company <http://underwriting.org/industry> "Manufacturing" .
|
|
837
|
+
?company <http://underwriting.org/employees> ?employees .
|
|
838
|
+
OPTIONAL { ?company <http://underwriting.org/premium> ?premium }
|
|
839
|
+
}
|
|
840
|
+
`);
|
|
841
|
+
console.log('Comparables:', comparables);
|
|
842
|
+
```
|
|
843
|
+
|
|
364
844
|
## Real-World Examples
|
|
365
845
|
|
|
366
846
|
### Legal: Contract Analysis
|
|
@@ -424,8 +904,9 @@ const inferred = evaluateDatalog(datalog);
|
|
|
424
904
|
All measurements verified. Run them yourself:
|
|
425
905
|
|
|
426
906
|
```bash
|
|
427
|
-
node benchmark.js
|
|
428
|
-
node
|
|
907
|
+
node benchmark.js # Core engine benchmarks
|
|
908
|
+
node concurrency-benchmark.js # Multi-worker concurrency
|
|
909
|
+
node vanilla-vs-hypermind-benchmark.js # HyperMind vs vanilla LLM
|
|
429
910
|
```
|
|
430
911
|
|
|
431
912
|
### Rust Core Engine
|
|
@@ -436,30 +917,124 @@ node vanilla-vs-hypermind-benchmark.js
|
|
|
436
917
|
| Memory/Triple | 24 bytes | 32 bytes | 50-60 bytes |
|
|
437
918
|
| Bulk Insert | 146K/sec | 200K/sec | 50K/sec |
|
|
438
919
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
|
457
|
-
|
|
458
|
-
|
|
|
459
|
-
|
|
|
460
|
-
|
|
|
461
|
-
|
|
|
462
|
-
|
|
920
|
+
Sources:
|
|
921
|
+
- rust-kgdb: Criterion benchmarks on LUBM(1) dataset, Apple Silicon
|
|
922
|
+
- RDFox: [Oxford Semantic Technologies benchmarks](https://www.oxfordsemantic.tech/product)
|
|
923
|
+
- Apache Jena: [Jena performance documentation](https://jena.apache.org/documentation/tdb/performance.html)
|
|
924
|
+
|
|
925
|
+
### Concurrency Scaling (darwin-x64)
|
|
926
|
+
|
|
927
|
+
| Operation | 1 Worker | 2 Workers | 4 Workers | 8 Workers | 16 Workers |
|
|
928
|
+
|-----------|----------|-----------|-----------|-----------|------------|
|
|
929
|
+
| Writes | 66K/sec | 79K/sec | 96K/sec | 111K/sec | 132K/sec |
|
|
930
|
+
| Reads | 290/sec | 305/sec | 307/sec | 282/sec | 302/sec |
|
|
931
|
+
| GraphFrame | 6.0K/sec | 6.5K/sec | 6.5K/sec | 6.7K/sec | 6.5K/sec |
|
|
932
|
+
|
|
933
|
+
Source: `node concurrency-benchmark.js` (100 ops/worker, LUBM data)
|
|
934
|
+
|
|
935
|
+
### HyperMind Agent Accuracy (LUBM Benchmark)
|
|
936
|
+
|
|
937
|
+
| Framework | Without Schema | With Schema |
|
|
938
|
+
|-----------|----------------|-------------|
|
|
939
|
+
| Vanilla LLM | 0% | - |
|
|
940
|
+
| LangChain | 0% | 71.4% |
|
|
941
|
+
| DSPy | 14.3% | 71.4% |
|
|
942
|
+
| HyperMind | - | 86.4% |
|
|
943
|
+
|
|
944
|
+
Source: `python3 benchmark-frameworks.py` with 7 LUBM queries
|
|
945
|
+
|
|
946
|
+
### Memory Retrieval (10K Queries)
|
|
947
|
+
|
|
948
|
+
| Metric | Value |
|
|
949
|
+
|--------|-------|
|
|
950
|
+
| Recall @ 10K | 94% |
|
|
951
|
+
| Search Speed | 16.7ms |
|
|
952
|
+
| Write Throughput | 132K ops/sec |
|
|
953
|
+
|
|
954
|
+
Source: `node memory-retrieval-benchmark.js`
|
|
955
|
+
|
|
956
|
+
## Complete Feature List
|
|
957
|
+
|
|
958
|
+
### Core Database
|
|
959
|
+
|
|
960
|
+
| Feature | Description | Performance |
|
|
961
|
+
|---------|-------------|-------------|
|
|
962
|
+
| SPARQL 1.1 Engine | Full query/update support | 449ns lookups |
|
|
963
|
+
| RDF 1.2 Support | Quoted triples, annotations | W3C compliant |
|
|
964
|
+
| Named Graphs | Quad store with graph isolation | O(1) graph switching |
|
|
965
|
+
| Triple Indexing | SPOC/POCS/OCSP/CSPO indexes | Sub-microsecond pattern match |
|
|
966
|
+
| Bulk Loading | Streaming Turtle/N-Triples parser | 146K triples/sec |
|
|
967
|
+
| Storage Backends | InMemory, RocksDB, LMDB | Pluggable persistence |
|
|
968
|
+
|
|
969
|
+
### Concurrency (Measured on 16 Workers)
|
|
970
|
+
|
|
971
|
+
| Operation | 1 Worker | 16 Workers | Scaling |
|
|
972
|
+
|-----------|----------|------------|---------|
|
|
973
|
+
| Writes | 66K ops/sec | 132K ops/sec | 1.99x |
|
|
974
|
+
| Reads | 290 ops/sec | 302 ops/sec | 1.04x |
|
|
975
|
+
| GraphFrame | 6.0K ops/sec | 6.5K ops/sec | 1.09x |
|
|
976
|
+
| Mixed R/W | 148K ops/sec | 642 ops/sec | - |
|
|
977
|
+
|
|
978
|
+
Source: `node concurrency-benchmark.js` on darwin-x64
|
|
979
|
+
|
|
980
|
+
### Graph Analytics (GraphFrame API)
|
|
981
|
+
|
|
982
|
+
| Algorithm | Complexity | Description |
|
|
983
|
+
|-----------|------------|-------------|
|
|
984
|
+
| PageRank | O(V + E) per iteration | Configurable damping, iterations |
|
|
985
|
+
| Connected Components | O(V + E) | Union-find implementation |
|
|
986
|
+
| Triangle Count | O(E^1.5) | Optimized edge iteration |
|
|
987
|
+
| Shortest Paths | O(V + E) | Single-source Dijkstra |
|
|
988
|
+
| Motif Finding | Pattern-dependent | DSL: `(a)-[e]->(b)` syntax |
|
|
989
|
+
|
|
990
|
+
### AI/ML Features
|
|
991
|
+
|
|
992
|
+
| Feature | Performance | Description |
|
|
993
|
+
|---------|-------------|-------------|
|
|
994
|
+
| HNSW Embeddings | 16ms/10K vectors | 384-dimensional vectors |
|
|
995
|
+
| Similarity Search | O(log n) | Approximate nearest neighbor |
|
|
996
|
+
| Agent Memory | 94% recall @ 10K depth | Episodic + semantic memory |
|
|
997
|
+
| Embedding Triggers | Auto on INSERT | OpenAI/Ollama/Anthropic providers |
|
|
998
|
+
| Semantic Deduplication | 2ms cache hit | Hash-based query caching |
|
|
999
|
+
|
|
1000
|
+
### Reasoning Engine
|
|
1001
|
+
|
|
1002
|
+
| Feature | Algorithm | Description |
|
|
1003
|
+
|---------|-----------|-------------|
|
|
1004
|
+
| Datalog | Semi-naive evaluation | Recursive rule support |
|
|
1005
|
+
| Transitive Closure | Fixpoint iteration | ancestor(X,Y) :- parent(X,Y) |
|
|
1006
|
+
| Negation | Stratified | NOT in rule bodies |
|
|
1007
|
+
| Aggregation | Group-by support | COUNT, SUM, AVG in rules |
|
|
1008
|
+
|
|
1009
|
+
### Security and Audit
|
|
1010
|
+
|
|
1011
|
+
| Feature | Implementation | Description |
|
|
1012
|
+
|---------|----------------|-------------|
|
|
1013
|
+
| WASM Sandbox | wasmtime + fuel metering | 1M ops max, 64MB memory |
|
|
1014
|
+
| Capability System | Set-based permissions | ReadKG, WriteKG, DatalogInfer |
|
|
1015
|
+
| ProofDAG | SHA-256 hash chains | Cryptographic audit trail |
|
|
1016
|
+
| Tool Validation | Type checking | Morphism composition verified |
|
|
1017
|
+
|
|
1018
|
+
### HyperAgent Framework
|
|
1019
|
+
|
|
1020
|
+
| Feature | Description |
|
|
1021
|
+
|---------|-------------|
|
|
1022
|
+
| Schema-Aware Query Gen | Uses YOUR ontology classes/properties |
|
|
1023
|
+
| Deterministic Planning | No LLM for query generation |
|
|
1024
|
+
| Multi-Step Execution | Chain SPARQL + Datalog + Motif |
|
|
1025
|
+
| Memory Hypergraph | Episodes link to KG entities |
|
|
1026
|
+
| Conversation Extraction | Auto-extract entities from chat |
|
|
1027
|
+
| Idempotent Responses | Same question = same answer |
|
|
1028
|
+
|
|
1029
|
+
### Standards Compliance
|
|
1030
|
+
|
|
1031
|
+
| Standard | Status | Notes |
|
|
1032
|
+
|----------|--------|-------|
|
|
1033
|
+
| SPARQL 1.1 Query | 100% | All query forms |
|
|
1034
|
+
| SPARQL 1.1 Update | 100% | INSERT/DELETE/LOAD/CLEAR |
|
|
1035
|
+
| RDF 1.2 | 100% | Quoted triples, annotations |
|
|
1036
|
+
| Turtle | 100% | Full grammar support |
|
|
1037
|
+
| N-Triples | 100% | Streaming parser |
|
|
463
1038
|
|
|
464
1039
|
## API Reference
|
|
465
1040
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rust-kgdb",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.66",
|
|
4
4
|
"description": "High-performance RDF/SPARQL database with AI agent framework. GraphDB (449ns lookups, 35x faster than RDFox), GraphFrames analytics (PageRank, motifs), Datalog reasoning, HNSW vector embeddings. HyperMindAgent for schema-aware query generation with audit trails. W3C SPARQL 1.1 compliant. Native performance via Rust + NAPI-RS.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|