rust-kgdb 0.6.33 → 0.6.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +73 -2
- package/hypermind-agent.js +458 -1
- package/index.d.ts +75 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,30 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the rust-kgdb TypeScript SDK will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.6.34] - 2025-12-16
|
|
6
|
+
|
|
7
|
+
### Schema-Aware Motif and Datalog Generation
|
|
8
|
+
|
|
9
|
+
Added proxied tools for generating motif patterns and Datalog rules from natural language using schema injection.
|
|
10
|
+
|
|
11
|
+
#### Added
|
|
12
|
+
- **`generateMotifFromText()`**: Generate graph motif patterns from text
|
|
13
|
+
- Circular, star, chain, triangle, bridge patterns
|
|
14
|
+
- Schema-constrained: only uses predicates from your data
|
|
15
|
+
- Confidence scoring based on predicate matching
|
|
16
|
+
- **`generateDatalogFromText()`**: Generate Datalog rules from text
|
|
17
|
+
- High-risk detection, collusion, transitive closure, circular patterns
|
|
18
|
+
- Threshold extraction from natural language (e.g., "above 0.7")
|
|
19
|
+
- Converts to valid Datalog syntax
|
|
20
|
+
- **24 new tests** for schema-aware generation (`tests/schema-generation.test.ts`)
|
|
21
|
+
- Updated TypeScript definitions with full JSDoc documentation
|
|
22
|
+
- README documentation with usage examples
|
|
23
|
+
|
|
24
|
+
#### Key Insight
|
|
25
|
+
Same schema injection approach as SPARQL benchmark—ensures only valid predicates are used, eliminating hallucination.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
5
29
|
## [0.6.33] - 2025-12-16
|
|
6
30
|
|
|
7
31
|
### Framework Comparison Code Snippets
|
package/README.md
CHANGED
|
@@ -14,6 +14,42 @@
|
|
|
14
14
|
|
|
15
15
|
## Results (Verified December 2025)
|
|
16
16
|
|
|
17
|
+
### End-to-End Capability Benchmark
|
|
18
|
+
|
|
19
|
+
```
|
|
20
|
+
┌─────────────────────────────────────────────────────────────────────────────┐
|
|
21
|
+
│ CAPABILITY COMPARISON: HyperMind vs Other Frameworks │
|
|
22
|
+
│ (LangChain, DSPy, Vanilla OpenAI) │
|
|
23
|
+
├─────────────────────────────────────────────────────────────────────────────┤
|
|
24
|
+
│ │
|
|
25
|
+
│ Capability │ HyperMind │ LangChain/DSPy │
|
|
26
|
+
│ ───────────────────────────────────────────────────────── │
|
|
27
|
+
│ Generate Motif Pattern │ ✅ │ ✅ │
|
|
28
|
+
│ Generate Datalog Rules │ ✅ │ ✅ │
|
|
29
|
+
│ Execute Motif on Data │ ✅ │ ❌ │
|
|
30
|
+
│ Execute Datalog Rules │ ✅ │ ❌ │
|
|
31
|
+
│ Execute SPARQL Queries │ ✅ │ ❌ │
|
|
32
|
+
│ GraphFrame Analytics │ ✅ │ ❌ │
|
|
33
|
+
│ Deterministic Results │ ✅ │ ❌ │
|
|
34
|
+
│ Audit Trail/Provenance │ ✅ │ ❌ │
|
|
35
|
+
│ ───────────────────────────────────────────────────────── │
|
|
36
|
+
│ TOTAL │ 8/8 │ 2/8 │
|
|
37
|
+
│ │ 100% │ 25% │
|
|
38
|
+
│ │
|
|
39
|
+
│ DIFFERENTIAL: +75% MORE CAPABILITIES │
|
|
40
|
+
│ │
|
|
41
|
+
│ KEY INSIGHT: All frameworks can GENERATE text patterns. │
|
|
42
|
+
│ ONLY HyperMind can EXECUTE them on real data and get RESULTS. │
|
|
43
|
+
│ │
|
|
44
|
+
│ Other frameworks are "prompt libraries." │
|
|
45
|
+
│ HyperMind is an "execution engine." │
|
|
46
|
+
│ │
|
|
47
|
+
│ Reproduce: node benchmark-e2e-execution.js │
|
|
48
|
+
└─────────────────────────────────────────────────────────────────────────────┘
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### SPARQL Generation Benchmark (With Schema Injection)
|
|
52
|
+
|
|
17
53
|
```
|
|
18
54
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
|
19
55
|
│ BENCHMARK: LUBM (Lehigh University Benchmark) │
|
|
@@ -29,8 +65,8 @@
|
|
|
29
65
|
│ ───────────────────────────────────────────────────────────── │
|
|
30
66
|
│ AVERAGE 4.8% 71.4% +66.7 pp │
|
|
31
67
|
│ │
|
|
32
|
-
│
|
|
33
|
-
│ HyperMind's value =
|
|
68
|
+
│ NOTE: Schema injection improves ALL frameworks equally on generation. │
|
|
69
|
+
│ HyperMind's value = full execution stack, not just generation. │
|
|
34
70
|
│ │
|
|
35
71
|
│ Reproduce: python3 benchmark-frameworks.py │
|
|
36
72
|
└─────────────────────────────────────────────────────────────────────────────┘
|
|
@@ -502,6 +538,41 @@ const result = await agent.call('Calculate risk score for entity P001')
|
|
|
502
538
|
| **Audit Trail** | Every answer is traceable |
|
|
503
539
|
| **Memory** | Working, episodic, and long-term memory |
|
|
504
540
|
|
|
541
|
+
### Schema-Aware Generation (Proxied Tools)
|
|
542
|
+
|
|
543
|
+
Generate motif patterns and Datalog rules from natural language using schema injection:
|
|
544
|
+
|
|
545
|
+
```javascript
|
|
546
|
+
const { LLMPlanner, createSchemaAwareGraphDB } = require('rust-kgdb');
|
|
547
|
+
|
|
548
|
+
const db = createSchemaAwareGraphDB('http://insurance.org/');
|
|
549
|
+
db.loadTtl(insuranceData, null);
|
|
550
|
+
|
|
551
|
+
const planner = new LLMPlanner({ kg: db, model: 'gpt-4o' });
|
|
552
|
+
|
|
553
|
+
// Generate motif pattern from text
|
|
554
|
+
const motif = await planner.generateMotifFromText('Find circular payment patterns');
|
|
555
|
+
// Returns: {
|
|
556
|
+
// pattern: "(a)-[transfers]->(b); (b)-[transfers]->(c); (c)-[transfers]->(a)",
|
|
557
|
+
// variables: ["a", "b", "c"],
|
|
558
|
+
// predicatesUsed: ["transfers"],
|
|
559
|
+
// confidence: 0.9
|
|
560
|
+
// }
|
|
561
|
+
|
|
562
|
+
// Generate Datalog rules from text
|
|
563
|
+
const datalog = await planner.generateDatalogFromText(
|
|
564
|
+
'High risk providers are those with risk score above 0.7'
|
|
565
|
+
);
|
|
566
|
+
// Returns: {
|
|
567
|
+
// rules: [{ name: "highRisk", head: {...}, body: [...] }],
|
|
568
|
+
// datalogSyntax: ["highRisk(?x) :- provider(?x), riskScore(?x, ?score), ?score > 0.7."],
|
|
569
|
+
// predicatesUsed: ["riskScore", "provider"],
|
|
570
|
+
// confidence: 0.85
|
|
571
|
+
// }
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
**Same approach as SPARQL benchmark**: Schema injection ensures only valid predicates are used. No hallucination.
|
|
575
|
+
|
|
505
576
|
### Available Tools
|
|
506
577
|
| Tool | Input → Output | Description |
|
|
507
578
|
|------|----------------|-------------|
|
package/hypermind-agent.js
CHANGED
|
@@ -801,6 +801,7 @@ class SchemaContext {
|
|
|
801
801
|
// STRATEGY 3: Extract property morphisms with domain/range
|
|
802
802
|
if (ctx.properties.size < 10) {
|
|
803
803
|
const propQuery = `
|
|
804
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
804
805
|
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
805
806
|
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
806
807
|
SELECT DISTINCT ?prop ?domain ?range ?label WHERE {
|
|
@@ -852,7 +853,9 @@ class SchemaContext {
|
|
|
852
853
|
|
|
853
854
|
ctx._computeSignature()
|
|
854
855
|
} catch (err) {
|
|
855
|
-
// Schema extraction failed -
|
|
856
|
+
// Schema extraction failed - LOG the error, don't hide it!
|
|
857
|
+
console.error('[SchemaContext.fromKG] Schema extraction error:', err.message)
|
|
858
|
+
console.error('[SchemaContext.fromKG] Stack:', err.stack)
|
|
856
859
|
}
|
|
857
860
|
|
|
858
861
|
return ctx
|
|
@@ -2390,6 +2393,460 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
|
|
|
2390
2393
|
return 'SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 100'
|
|
2391
2394
|
}
|
|
2392
2395
|
|
|
2396
|
+
// ============================================================================
|
|
2397
|
+
// SCHEMA-AWARE MOTIF GENERATION (Proxied Tool)
|
|
2398
|
+
// ============================================================================
|
|
2399
|
+
|
|
2400
|
+
/**
|
|
2401
|
+
* Generate motif pattern from natural language using schema context
|
|
2402
|
+
*
|
|
2403
|
+
* Schema injection approach (same as SPARQL):
|
|
2404
|
+
* - Extract predicates from schema
|
|
2405
|
+
* - Build motif patterns using ONLY valid predicates
|
|
2406
|
+
* - Deterministic: same schema + same intent = same pattern
|
|
2407
|
+
*
|
|
2408
|
+
* @param {string} text - Natural language description (e.g., "Find circular payments")
|
|
2409
|
+
* @param {Object} options - Options { schema, llmAssisted }
|
|
2410
|
+
* @returns {Object} { pattern: string, variables: string[], confidence: number }
|
|
2411
|
+
*
|
|
2412
|
+
* @example
|
|
2413
|
+
* // Given schema with predicates: [transfers, paidTo, claims, provider]
|
|
2414
|
+
* planner.generateMotifFromText("Find circular payment patterns")
|
|
2415
|
+
* // Returns: { pattern: "(a)-[transfers]->(b); (b)-[transfers]->(c); (c)-[transfers]->(a)" }
|
|
2416
|
+
*
|
|
2417
|
+
* @example
|
|
2418
|
+
* // Given schema with predicates: [knows, worksFor, manages]
|
|
2419
|
+
* planner.generateMotifFromText("Find managers who know each other")
|
|
2420
|
+
* // Returns: { pattern: "(a)-[manages]->(team); (b)-[manages]->(team2); (a)-[knows]->(b)" }
|
|
2421
|
+
*/
|
|
2422
|
+
async generateMotifFromText(text, options = {}) {
|
|
2423
|
+
const schema = options.schema || await this._getSchema()
|
|
2424
|
+
const predicates = schema.predicates || []
|
|
2425
|
+
const classes = schema.classes || []
|
|
2426
|
+
|
|
2427
|
+
// Intent detection for motif patterns
|
|
2428
|
+
const textLower = text.toLowerCase()
|
|
2429
|
+
const intent = {
|
|
2430
|
+
circular: /circular|cycle|ring|loop|round-?trip/.test(textLower),
|
|
2431
|
+
star: /star|hub|central|many.*(connect|link)|one.*(to|connects).*many/.test(textLower),
|
|
2432
|
+
chain: /chain|path|sequence|flow|cascade/.test(textLower),
|
|
2433
|
+
triangle: /triangle|triad|three.*(way|node)|mutual/.test(textLower),
|
|
2434
|
+
bridge: /bridge|connect|link.*between|intermediary/.test(textLower),
|
|
2435
|
+
clique: /clique|fully.*connected|complete|all.*know/.test(textLower)
|
|
2436
|
+
}
|
|
2437
|
+
|
|
2438
|
+
// Find relevant predicates from schema
|
|
2439
|
+
const relevantPreds = this._findRelevantPredicates(textLower, predicates)
|
|
2440
|
+
|
|
2441
|
+
// Generate pattern based on intent and schema
|
|
2442
|
+
let pattern, variables, explanation
|
|
2443
|
+
|
|
2444
|
+
if (intent.circular) {
|
|
2445
|
+
// Circular pattern: (a)-[p]->(b); (b)-[p]->(c); (c)-[p]->(a)
|
|
2446
|
+
const pred = relevantPreds[0] || predicates[0] || 'edge'
|
|
2447
|
+
pattern = `(a)-[${pred}]->(b); (b)-[${pred}]->(c); (c)-[${pred}]->(a)`
|
|
2448
|
+
variables = ['a', 'b', 'c']
|
|
2449
|
+
explanation = `Circular pattern using predicate '${pred}' from schema`
|
|
2450
|
+
} else if (intent.star) {
|
|
2451
|
+
// Star pattern: (center)-[p]->(n1); (center)-[p]->(n2); (center)-[p]->(n3)
|
|
2452
|
+
const pred = relevantPreds[0] || predicates[0] || 'edge'
|
|
2453
|
+
pattern = `(center)-[${pred}]->(n1); (center)-[${pred}]->(n2); (center)-[${pred}]->(n3)`
|
|
2454
|
+
variables = ['center', 'n1', 'n2', 'n3']
|
|
2455
|
+
explanation = `Star pattern with central node using predicate '${pred}'`
|
|
2456
|
+
} else if (intent.chain) {
|
|
2457
|
+
// Chain pattern: (a)-[p]->(b); (b)-[p]->(c)
|
|
2458
|
+
const pred = relevantPreds[0] || predicates[0] || 'edge'
|
|
2459
|
+
pattern = `(a)-[${pred}]->(b); (b)-[${pred}]->(c)`
|
|
2460
|
+
variables = ['a', 'b', 'c']
|
|
2461
|
+
explanation = `Chain/path pattern using predicate '${pred}'`
|
|
2462
|
+
} else if (intent.triangle) {
|
|
2463
|
+
// Triangle pattern with different predicates if available
|
|
2464
|
+
const p1 = relevantPreds[0] || predicates[0] || 'edge'
|
|
2465
|
+
const p2 = relevantPreds[1] || relevantPreds[0] || predicates[0] || 'edge'
|
|
2466
|
+
const p3 = relevantPreds[2] || relevantPreds[0] || predicates[0] || 'edge'
|
|
2467
|
+
pattern = `(a)-[${p1}]->(b); (b)-[${p2}]->(c); (a)-[${p3}]->(c)`
|
|
2468
|
+
variables = ['a', 'b', 'c']
|
|
2469
|
+
explanation = `Triangle pattern using predicates from schema`
|
|
2470
|
+
} else if (intent.bridge) {
|
|
2471
|
+
// Bridge pattern: (a)-[p1]->(bridge); (bridge)-[p2]->(b)
|
|
2472
|
+
const p1 = relevantPreds[0] || predicates[0] || 'edge'
|
|
2473
|
+
const p2 = relevantPreds[1] || relevantPreds[0] || predicates[0] || 'edge'
|
|
2474
|
+
pattern = `(a)-[${p1}]->(bridge); (bridge)-[${p2}]->(b)`
|
|
2475
|
+
variables = ['a', 'bridge', 'b']
|
|
2476
|
+
explanation = `Bridge/intermediary pattern`
|
|
2477
|
+
} else {
|
|
2478
|
+
// Default: simple two-hop pattern
|
|
2479
|
+
const pred = relevantPreds[0] || predicates[0] || 'edge'
|
|
2480
|
+
pattern = `(a)-[${pred}]->(b)`
|
|
2481
|
+
variables = ['a', 'b']
|
|
2482
|
+
explanation = `Simple edge pattern using predicate '${pred}'`
|
|
2483
|
+
}
|
|
2484
|
+
|
|
2485
|
+
// Optional LLM-assisted refinement
|
|
2486
|
+
if (options.llmAssisted && this.model && this.apiKey) {
|
|
2487
|
+
const refined = await this._refineMotifWithLLM(text, pattern, schema)
|
|
2488
|
+
if (refined) {
|
|
2489
|
+
pattern = refined.pattern
|
|
2490
|
+
explanation = refined.explanation || explanation
|
|
2491
|
+
}
|
|
2492
|
+
}
|
|
2493
|
+
|
|
2494
|
+
return {
|
|
2495
|
+
pattern,
|
|
2496
|
+
variables,
|
|
2497
|
+
predicatesUsed: relevantPreds,
|
|
2498
|
+
confidence: relevantPreds.length > 0 ? 0.9 : 0.6,
|
|
2499
|
+
explanation,
|
|
2500
|
+
schemaSource: !!schema.predicates?.length
|
|
2501
|
+
}
|
|
2502
|
+
}
|
|
2503
|
+
|
|
2504
|
+
/**
|
|
2505
|
+
* Find predicates from schema that match the text intent
|
|
2506
|
+
* @private
|
|
2507
|
+
*/
|
|
2508
|
+
_findRelevantPredicates(textLower, predicates) {
|
|
2509
|
+
const keywords = textLower.split(/\s+/)
|
|
2510
|
+
const matches = []
|
|
2511
|
+
|
|
2512
|
+
// Pattern-specific keyword mappings
|
|
2513
|
+
const keywordMappings = {
|
|
2514
|
+
payment: ['transfer', 'paid', 'pay', 'payment', 'amount', 'transaction'],
|
|
2515
|
+
fraud: ['claim', 'risk', 'flag', 'suspicious', 'alert'],
|
|
2516
|
+
social: ['knows', 'friend', 'follows', 'connected', 'related'],
|
|
2517
|
+
org: ['works', 'manages', 'reports', 'employs', 'member'],
|
|
2518
|
+
product: ['purchase', 'buy', 'order', 'sell', 'owns']
|
|
2519
|
+
}
|
|
2520
|
+
|
|
2521
|
+
for (const pred of predicates) {
|
|
2522
|
+
const predLower = pred.toLowerCase()
|
|
2523
|
+
|
|
2524
|
+
// Direct match
|
|
2525
|
+
if (keywords.some(kw => predLower.includes(kw) || kw.includes(predLower))) {
|
|
2526
|
+
matches.push(pred)
|
|
2527
|
+
continue
|
|
2528
|
+
}
|
|
2529
|
+
|
|
2530
|
+
// Keyword mapping match
|
|
2531
|
+
for (const [category, mappedWords] of Object.entries(keywordMappings)) {
|
|
2532
|
+
if (keywords.some(kw => category.includes(kw) || kw.includes(category))) {
|
|
2533
|
+
if (mappedWords.some(mw => predLower.includes(mw))) {
|
|
2534
|
+
matches.push(pred)
|
|
2535
|
+
break
|
|
2536
|
+
}
|
|
2537
|
+
}
|
|
2538
|
+
}
|
|
2539
|
+
}
|
|
2540
|
+
|
|
2541
|
+
return matches
|
|
2542
|
+
}
|
|
2543
|
+
|
|
2544
|
+
/**
|
|
2545
|
+
* Refine motif pattern with LLM assistance
|
|
2546
|
+
* @private
|
|
2547
|
+
*/
|
|
2548
|
+
async _refineMotifWithLLM(text, basePattern, schema) {
|
|
2549
|
+
if (!this.model || !this.apiKey) return null
|
|
2550
|
+
|
|
2551
|
+
const systemPrompt = `You are a graph motif pattern generator.
|
|
2552
|
+
|
|
2553
|
+
Available predicates from schema:
|
|
2554
|
+
${schema.predicates?.slice(0, 20).join('\n') || 'No predicates available'}
|
|
2555
|
+
|
|
2556
|
+
Motif pattern syntax:
|
|
2557
|
+
- Nodes: (name)
|
|
2558
|
+
- Edges: (a)-[predicate]->(b)
|
|
2559
|
+
- Multiple edges: (a)-[p1]->(b); (b)-[p2]->(c)
|
|
2560
|
+
|
|
2561
|
+
RULES:
|
|
2562
|
+
- ONLY use predicates from the schema above
|
|
2563
|
+
- Output ONLY the pattern, no explanation
|
|
2564
|
+
- Use semicolons to separate multiple edges
|
|
2565
|
+
|
|
2566
|
+
Example:
|
|
2567
|
+
Input: "circular payments"
|
|
2568
|
+
Output: (a)-[transfers]->(b); (b)-[transfers]->(c); (c)-[transfers]->(a)`
|
|
2569
|
+
|
|
2570
|
+
try {
|
|
2571
|
+
const response = await this._callLLM(systemPrompt, `Generate motif pattern for: "${text}"`)
|
|
2572
|
+
const pattern = response.trim().replace(/```/g, '').trim()
|
|
2573
|
+
if (pattern && pattern.includes('->')) {
|
|
2574
|
+
return { pattern, explanation: 'LLM-refined pattern using schema predicates' }
|
|
2575
|
+
}
|
|
2576
|
+
} catch (err) {
|
|
2577
|
+
// Fall back to base pattern
|
|
2578
|
+
}
|
|
2579
|
+
return null
|
|
2580
|
+
}
|
|
2581
|
+
|
|
2582
|
+
// ============================================================================
|
|
2583
|
+
// SCHEMA-AWARE DATALOG RULE GENERATION (Proxied Tool)
|
|
2584
|
+
// ============================================================================
|
|
2585
|
+
|
|
2586
|
+
/**
|
|
2587
|
+
* Generate Datalog rules from natural language using schema context
|
|
2588
|
+
*
|
|
2589
|
+
* Schema injection approach:
|
|
2590
|
+
* - Extract predicates and classes from schema
|
|
2591
|
+
* - Build rules using ONLY valid schema terms
|
|
2592
|
+
* - Deterministic: same schema + same intent = same rules
|
|
2593
|
+
*
|
|
2594
|
+
* @param {string} text - Natural language description
|
|
2595
|
+
* @param {Object} options - Options { schema, llmAssisted }
|
|
2596
|
+
* @returns {Object} { rules: Array, facts: Array, confidence: number }
|
|
2597
|
+
*
|
|
2598
|
+
* @example
|
|
2599
|
+
* // Given schema: { predicates: [riskScore, claims, provider] }
|
|
2600
|
+
* planner.generateDatalogFromText("High risk providers are those with risk score above 0.7")
|
|
2601
|
+
* // Returns: { rules: [{ head: "highRiskProvider(?p)", body: ["provider(?p)", "riskScore(?p, ?s)", "?s > 0.7"] }] }
|
|
2602
|
+
*
|
|
2603
|
+
* @example
|
|
2604
|
+
* // Given schema: { predicates: [knows, claims, provider] }
|
|
2605
|
+
* planner.generateDatalogFromText("Collusion is when two people who know each other use the same provider")
|
|
2606
|
+
* // Returns: { rules: [{ head: "collusion(?a, ?b, ?p)", body: ["knows(?a, ?b)", "claims(?a, ?p)", "claims(?b, ?p)"] }] }
|
|
2607
|
+
*/
|
|
2608
|
+
async generateDatalogFromText(text, options = {}) {
|
|
2609
|
+
const schema = options.schema || await this._getSchema()
|
|
2610
|
+
const predicates = schema.predicates || []
|
|
2611
|
+
const classes = schema.classes || []
|
|
2612
|
+
|
|
2613
|
+
// Intent detection for rule patterns
|
|
2614
|
+
const textLower = text.toLowerCase()
|
|
2615
|
+
const intent = {
|
|
2616
|
+
highRisk: /high.?risk|risky|dangerous|suspicious|flagged/.test(textLower),
|
|
2617
|
+
collusion: /collusion|collude|conspir|together|coordinated/.test(textLower),
|
|
2618
|
+
transitive: /transitive|reachable|connected|ancestor|descendant|path/.test(textLower),
|
|
2619
|
+
threshold: /above|below|greater|less|more|threshold|limit|exceed/.test(textLower),
|
|
2620
|
+
circular: /circular|cycle|ring|loop/.test(textLower),
|
|
2621
|
+
aggregation: /count|total|sum|average|many|multiple/.test(textLower)
|
|
2622
|
+
}
|
|
2623
|
+
|
|
2624
|
+
// Extract threshold values from text
|
|
2625
|
+
const thresholdMatch = text.match(/(\d+\.?\d*)\s*(%|percent)?/)
|
|
2626
|
+
const threshold = thresholdMatch ? parseFloat(thresholdMatch[1]) / (thresholdMatch[2] ? 100 : 1) : 0.7
|
|
2627
|
+
|
|
2628
|
+
// Find relevant predicates
|
|
2629
|
+
const relevantPreds = this._findRelevantPredicates(textLower, predicates)
|
|
2630
|
+
const relevantClasses = this._findRelevantClasses(textLower, classes)
|
|
2631
|
+
|
|
2632
|
+
// Generate rules based on intent
|
|
2633
|
+
const rules = []
|
|
2634
|
+
let explanation = ''
|
|
2635
|
+
|
|
2636
|
+
if (intent.highRisk) {
|
|
2637
|
+
const riskPred = relevantPreds.find(p => /risk|score|flag/i.test(p)) || 'riskScore'
|
|
2638
|
+
const entityClass = relevantClasses[0] || relevantPreds.find(p => /provider|claim|entity/i.test(p)) || 'entity'
|
|
2639
|
+
|
|
2640
|
+
rules.push({
|
|
2641
|
+
name: 'highRisk',
|
|
2642
|
+
head: { predicate: 'highRisk', args: ['?x'] },
|
|
2643
|
+
body: [
|
|
2644
|
+
{ predicate: entityClass, args: ['?x'] },
|
|
2645
|
+
{ predicate: riskPred, args: ['?x', '?score'] },
|
|
2646
|
+
{ filter: `?score > ${threshold}` }
|
|
2647
|
+
],
|
|
2648
|
+
description: `Entities with ${riskPred} above ${threshold}`
|
|
2649
|
+
})
|
|
2650
|
+
explanation = `Generated high-risk rule using ${riskPred} predicate from schema`
|
|
2651
|
+
}
|
|
2652
|
+
|
|
2653
|
+
if (intent.collusion) {
|
|
2654
|
+
const knowsPred = relevantPreds.find(p => /know|friend|connect|related/i.test(p)) || 'knows'
|
|
2655
|
+
const usesPred = relevantPreds.find(p => /claim|use|provider|service/i.test(p)) || 'uses'
|
|
2656
|
+
|
|
2657
|
+
rules.push({
|
|
2658
|
+
name: 'collusion',
|
|
2659
|
+
head: { predicate: 'collusion', args: ['?a', '?b', '?target'] },
|
|
2660
|
+
body: [
|
|
2661
|
+
{ predicate: knowsPred, args: ['?a', '?b'] },
|
|
2662
|
+
{ predicate: usesPred, args: ['?a', '?target'] },
|
|
2663
|
+
{ predicate: usesPred, args: ['?b', '?target'] },
|
|
2664
|
+
{ filter: '?a != ?b' }
|
|
2665
|
+
],
|
|
2666
|
+
description: 'Two related entities using the same target'
|
|
2667
|
+
})
|
|
2668
|
+
explanation = `Generated collusion rule using ${knowsPred} and ${usesPred} from schema`
|
|
2669
|
+
}
|
|
2670
|
+
|
|
2671
|
+
if (intent.transitive) {
|
|
2672
|
+
const edgePred = relevantPreds[0] || 'edge'
|
|
2673
|
+
|
|
2674
|
+
rules.push({
|
|
2675
|
+
name: 'reachable_base',
|
|
2676
|
+
head: { predicate: 'reachable', args: ['?x', '?y'] },
|
|
2677
|
+
body: [{ predicate: edgePred, args: ['?x', '?y'] }],
|
|
2678
|
+
description: 'Base case: direct edge'
|
|
2679
|
+
})
|
|
2680
|
+
rules.push({
|
|
2681
|
+
name: 'reachable_recursive',
|
|
2682
|
+
head: { predicate: 'reachable', args: ['?x', '?z'] },
|
|
2683
|
+
body: [
|
|
2684
|
+
{ predicate: edgePred, args: ['?x', '?y'] },
|
|
2685
|
+
{ predicate: 'reachable', args: ['?y', '?z'] }
|
|
2686
|
+
],
|
|
2687
|
+
description: 'Recursive case: transitive closure'
|
|
2688
|
+
})
|
|
2689
|
+
explanation = `Generated transitive closure rules using ${edgePred} predicate`
|
|
2690
|
+
}
|
|
2691
|
+
|
|
2692
|
+
if (intent.circular) {
|
|
2693
|
+
const edgePred = relevantPreds[0] || 'transfers'
|
|
2694
|
+
|
|
2695
|
+
rules.push({
|
|
2696
|
+
name: 'circular',
|
|
2697
|
+
head: { predicate: 'circular', args: ['?a', '?b', '?c'] },
|
|
2698
|
+
body: [
|
|
2699
|
+
{ predicate: edgePred, args: ['?a', '?b'] },
|
|
2700
|
+
{ predicate: edgePred, args: ['?b', '?c'] },
|
|
2701
|
+
{ predicate: edgePred, args: ['?c', '?a'] }
|
|
2702
|
+
],
|
|
2703
|
+
description: 'Circular pattern A→B→C→A'
|
|
2704
|
+
})
|
|
2705
|
+
explanation = `Generated circular pattern rule using ${edgePred} predicate`
|
|
2706
|
+
}
|
|
2707
|
+
|
|
2708
|
+
// Default rule if no specific intent matched
|
|
2709
|
+
if (rules.length === 0 && relevantPreds.length > 0) {
|
|
2710
|
+
const pred = relevantPreds[0]
|
|
2711
|
+
rules.push({
|
|
2712
|
+
name: 'derived',
|
|
2713
|
+
head: { predicate: 'derived', args: ['?x'] },
|
|
2714
|
+
body: [{ predicate: pred, args: ['?x', '?y'] }],
|
|
2715
|
+
description: `Entities with ${pred} relationship`
|
|
2716
|
+
})
|
|
2717
|
+
explanation = `Generated default rule using ${pred} predicate`
|
|
2718
|
+
}
|
|
2719
|
+
|
|
2720
|
+
// Optional LLM-assisted refinement
|
|
2721
|
+
if (options.llmAssisted && this.model && this.apiKey && rules.length === 0) {
|
|
2722
|
+
const refined = await this._refineDatalogWithLLM(text, schema)
|
|
2723
|
+
if (refined && refined.rules) {
|
|
2724
|
+
return refined
|
|
2725
|
+
}
|
|
2726
|
+
}
|
|
2727
|
+
|
|
2728
|
+
// Convert rules to Datalog syntax
|
|
2729
|
+
const datalogSyntax = rules.map(r => this._ruleToDatalog(r))
|
|
2730
|
+
|
|
2731
|
+
return {
|
|
2732
|
+
rules,
|
|
2733
|
+
datalogSyntax,
|
|
2734
|
+
predicatesUsed: relevantPreds,
|
|
2735
|
+
classesUsed: relevantClasses,
|
|
2736
|
+
confidence: relevantPreds.length > 0 ? 0.85 : 0.5,
|
|
2737
|
+
explanation,
|
|
2738
|
+
schemaSource: !!schema.predicates?.length
|
|
2739
|
+
}
|
|
2740
|
+
}
|
|
2741
|
+
|
|
2742
|
+
/**
|
|
2743
|
+
* Find classes from schema that match the text intent
|
|
2744
|
+
* @private
|
|
2745
|
+
*/
|
|
2746
|
+
_findRelevantClasses(textLower, classes) {
|
|
2747
|
+
const matches = []
|
|
2748
|
+
const keywords = textLower.split(/\s+/)
|
|
2749
|
+
|
|
2750
|
+
for (const cls of classes) {
|
|
2751
|
+
const clsLower = cls.toLowerCase()
|
|
2752
|
+
if (keywords.some(kw => clsLower.includes(kw) || kw.includes(clsLower))) {
|
|
2753
|
+
matches.push(cls)
|
|
2754
|
+
}
|
|
2755
|
+
}
|
|
2756
|
+
return matches
|
|
2757
|
+
}
|
|
2758
|
+
|
|
2759
|
+
/**
|
|
2760
|
+
* Convert rule object to Datalog syntax string
|
|
2761
|
+
* @private
|
|
2762
|
+
*/
|
|
2763
|
+
_ruleToDatalog(rule) {
|
|
2764
|
+
const head = `${rule.head.predicate}(${rule.head.args.join(', ')})`
|
|
2765
|
+
const bodyParts = rule.body.map(b => {
|
|
2766
|
+
if (b.filter) return b.filter
|
|
2767
|
+
return `${b.predicate}(${b.args.join(', ')})`
|
|
2768
|
+
})
|
|
2769
|
+
return `${head} :- ${bodyParts.join(', ')}.`
|
|
2770
|
+
}
|
|
2771
|
+
|
|
2772
|
+
/**
|
|
2773
|
+
* Refine Datalog rules with LLM assistance
|
|
2774
|
+
* @private
|
|
2775
|
+
*/
|
|
2776
|
+
async _refineDatalogWithLLM(text, schema) {
|
|
2777
|
+
if (!this.model || !this.apiKey) return null
|
|
2778
|
+
|
|
2779
|
+
const systemPrompt = `You are a Datalog rule generator.
|
|
2780
|
+
|
|
2781
|
+
Available predicates from schema:
|
|
2782
|
+
${schema.predicates?.slice(0, 20).join('\n') || 'No predicates available'}
|
|
2783
|
+
|
|
2784
|
+
Available classes:
|
|
2785
|
+
${schema.classes?.slice(0, 10).join('\n') || 'No classes available'}
|
|
2786
|
+
|
|
2787
|
+
Datalog syntax:
|
|
2788
|
+
- Rules: head(?x) :- body1(?x, ?y), body2(?y, ?z).
|
|
2789
|
+
- Variables start with ?
|
|
2790
|
+
- Filters: ?x > 0.7
|
|
2791
|
+
|
|
2792
|
+
RULES:
|
|
2793
|
+
- ONLY use predicates/classes from the schema above
|
|
2794
|
+
- Output valid Datalog syntax only
|
|
2795
|
+
- One rule per line
|
|
2796
|
+
|
|
2797
|
+
Example:
|
|
2798
|
+
Input: "high risk providers"
|
|
2799
|
+
Output: highRisk(?p) :- provider(?p), riskScore(?p, ?s), ?s > 0.7.`
|
|
2800
|
+
|
|
2801
|
+
try {
|
|
2802
|
+
const response = await this._callLLM(systemPrompt, `Generate Datalog rules for: "${text}"`)
|
|
2803
|
+
const lines = response.trim().split('\n').filter(l => l.includes(':-'))
|
|
2804
|
+
if (lines.length > 0) {
|
|
2805
|
+
return {
|
|
2806
|
+
rules: lines.map((line, i) => ({
|
|
2807
|
+
name: `rule_${i}`,
|
|
2808
|
+
datalogSyntax: line.trim(),
|
|
2809
|
+
description: 'LLM-generated rule'
|
|
2810
|
+
})),
|
|
2811
|
+
datalogSyntax: lines,
|
|
2812
|
+
explanation: 'LLM-refined rules using schema predicates',
|
|
2813
|
+
confidence: 0.75
|
|
2814
|
+
}
|
|
2815
|
+
}
|
|
2816
|
+
} catch (err) {
|
|
2817
|
+
// Fall back
|
|
2818
|
+
}
|
|
2819
|
+
return null
|
|
2820
|
+
}
|
|
2821
|
+
|
|
2822
|
+
/**
|
|
2823
|
+
* Get schema from KG or cache
|
|
2824
|
+
* @private
|
|
2825
|
+
*/
|
|
2826
|
+
async _getSchema() {
|
|
2827
|
+
if (this._schemaContext) {
|
|
2828
|
+
return {
|
|
2829
|
+
predicates: Array.from(this._schemaContext.properties?.keys() || []),
|
|
2830
|
+
classes: Array.from(this._schemaContext.classes || [])
|
|
2831
|
+
}
|
|
2832
|
+
}
|
|
2833
|
+
|
|
2834
|
+
if (this._schemaCache) {
|
|
2835
|
+
return this._schemaCache
|
|
2836
|
+
}
|
|
2837
|
+
|
|
2838
|
+
// Build from KG
|
|
2839
|
+
if (this.kg) {
|
|
2840
|
+
const context = await this.buildSchemaContext()
|
|
2841
|
+
return {
|
|
2842
|
+
predicates: Array.from(context.properties?.keys() || []),
|
|
2843
|
+
classes: Array.from(context.classes || [])
|
|
2844
|
+
}
|
|
2845
|
+
}
|
|
2846
|
+
|
|
2847
|
+
return { predicates: [], classes: [] }
|
|
2848
|
+
}
|
|
2849
|
+
|
|
2393
2850
|
_buildTypeChain(steps) {
|
|
2394
2851
|
return steps.map(s => `${s.input_type} → ${s.output_type}`).join(' ; ')
|
|
2395
2852
|
}
|
package/index.d.ts
CHANGED
|
@@ -843,6 +843,81 @@ export class LLMPlanner {
|
|
|
843
843
|
confidence: number
|
|
844
844
|
explanation: string
|
|
845
845
|
}>
|
|
846
|
+
|
|
847
|
+
/**
|
|
848
|
+
* Generate motif pattern from natural language using schema context
|
|
849
|
+
*
|
|
850
|
+
* Schema injection approach (same as SPARQL):
|
|
851
|
+
* - Extract predicates from schema
|
|
852
|
+
* - Build motif patterns using ONLY valid predicates
|
|
853
|
+
* - Deterministic: same schema + same intent = same pattern
|
|
854
|
+
*
|
|
855
|
+
* @param text - Natural language description (e.g., "Find circular payments")
|
|
856
|
+
* @param options - Options { schema, llmAssisted }
|
|
857
|
+
* @returns Motif pattern with variables and confidence
|
|
858
|
+
*
|
|
859
|
+
* @example
|
|
860
|
+
* ```typescript
|
|
861
|
+
* // Given schema with predicates: [transfers, paidTo, claims, provider]
|
|
862
|
+
* const result = await planner.generateMotifFromText("Find circular payment patterns")
|
|
863
|
+
* // Returns: { pattern: "(a)-[transfers]->(b); (b)-[transfers]->(c); (c)-[transfers]->(a)" }
|
|
864
|
+
* ```
|
|
865
|
+
*/
|
|
866
|
+
generateMotifFromText(text: string, options?: {
|
|
867
|
+
schema?: { predicates: string[], classes: string[] }
|
|
868
|
+
llmAssisted?: boolean
|
|
869
|
+
}): Promise<{
|
|
870
|
+
pattern: string
|
|
871
|
+
variables: string[]
|
|
872
|
+
predicatesUsed: string[]
|
|
873
|
+
confidence: number
|
|
874
|
+
explanation: string
|
|
875
|
+
schemaSource: boolean
|
|
876
|
+
}>
|
|
877
|
+
|
|
878
|
+
/**
|
|
879
|
+
* Generate Datalog rules from natural language using schema context
|
|
880
|
+
*
|
|
881
|
+
* Schema injection approach:
|
|
882
|
+
* - Extract predicates and classes from schema
|
|
883
|
+
* - Build rules using ONLY valid schema terms
|
|
884
|
+
* - Deterministic: same schema + same intent = same rules
|
|
885
|
+
*
|
|
886
|
+
* @param text - Natural language description
|
|
887
|
+
* @param options - Options { schema, llmAssisted }
|
|
888
|
+
* @returns Datalog rules with syntax and confidence
|
|
889
|
+
*
|
|
890
|
+
* @example
|
|
891
|
+
* ```typescript
|
|
892
|
+
* // Given schema: { predicates: [riskScore, claims, provider] }
|
|
893
|
+
* const result = await planner.generateDatalogFromText(
|
|
894
|
+
* "High risk providers are those with risk score above 0.7"
|
|
895
|
+
* )
|
|
896
|
+
* // Returns: { rules: [...], datalogSyntax: ["highRisk(?p) :- provider(?p), riskScore(?p, ?s), ?s > 0.7."] }
|
|
897
|
+
* ```
|
|
898
|
+
*/
|
|
899
|
+
generateDatalogFromText(text: string, options?: {
|
|
900
|
+
schema?: { predicates: string[], classes: string[] }
|
|
901
|
+
llmAssisted?: boolean
|
|
902
|
+
}): Promise<{
|
|
903
|
+
rules: Array<{
|
|
904
|
+
name: string
|
|
905
|
+
head: { predicate: string, args: string[] }
|
|
906
|
+
body: Array<{ predicate?: string, args?: string[], filter?: string }>
|
|
907
|
+
description: string
|
|
908
|
+
}>
|
|
909
|
+
datalogSyntax: string[]
|
|
910
|
+
predicatesUsed: string[]
|
|
911
|
+
classesUsed: string[]
|
|
912
|
+
confidence: number
|
|
913
|
+
explanation: string
|
|
914
|
+
schemaSource: boolean
|
|
915
|
+
}>
|
|
916
|
+
|
|
917
|
+
/**
|
|
918
|
+
* Build type-theoretic schema context from KG
|
|
919
|
+
*/
|
|
920
|
+
buildSchemaContext(forceRefresh?: boolean): Promise<SchemaContext>
|
|
846
921
|
}
|
|
847
922
|
|
|
848
923
|
/**
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rust-kgdb",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.35",
|
|
4
4
|
"description": "Production-grade Neuro-Symbolic AI Framework with Schema-Aware GraphDB, Context Theory, and Memory Hypergraph: +86.4% accuracy over vanilla LLMs. Features Schema-Aware GraphDB (auto schema extraction), BYOO (Bring Your Own Ontology) for enterprise, cross-agent schema caching, LLM Planner for natural language to typed SPARQL, ProofDAG with Curry-Howard witnesses. High-performance (2.78µs lookups, 35x faster than RDFox). W3C SPARQL 1.1 compliant.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|