rust-kgdb 0.8.22 → 0.8.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hypermind-agent.js +161 -154
- package/package.json +1 -1
- package/rust-kgdb-napi.darwin-arm64.node +0 -0
- package/rust-kgdb-napi.darwin-x64.node +0 -0
- package/rust-kgdb-napi.node +0 -0
package/hypermind-agent.js
CHANGED
|
@@ -47,27 +47,41 @@ function loadNativeBindingDirect() {
|
|
|
47
47
|
|
|
48
48
|
const native = loadNativeBindingDirect()
|
|
49
49
|
|
|
50
|
-
// Native Rust bindings - SDK is THIN,
|
|
50
|
+
// Native Rust bindings - SDK is THIN, only pure functions
|
|
51
|
+
// GraphDB handles all RDF/SPARQL operations
|
|
52
|
+
// tokenizeIdentifier and computeSimilarity are pure utility functions
|
|
51
53
|
const {
|
|
52
|
-
OlogSchema,
|
|
53
|
-
PredicateResolverService,
|
|
54
|
-
SchemaValidatorService,
|
|
55
|
-
ThinkingReasoner: NativeThinkingReasoner,
|
|
56
|
-
computeSimilarity,
|
|
57
54
|
tokenizeIdentifier,
|
|
58
|
-
|
|
59
|
-
extractKeywords: nativeExtractKeywords
|
|
55
|
+
computeSimilarity
|
|
60
56
|
} = native
|
|
61
57
|
|
|
62
58
|
/**
|
|
63
|
-
* Extract keywords from natural language prompt
|
|
64
|
-
*
|
|
59
|
+
* Extract keywords from natural language prompt
|
|
60
|
+
* Uses schema predicates for domain-aware extraction
|
|
65
61
|
* @param {string} prompt - Natural language prompt
|
|
62
|
+
* @param {string[]} schemaPredicates - Optional schema predicates for domain hints
|
|
66
63
|
* @returns {string[]} Extracted keywords
|
|
67
64
|
*/
|
|
68
|
-
function extractKeywords(prompt) {
|
|
65
|
+
function extractKeywords(prompt, schemaPredicates = []) {
|
|
69
66
|
if (!prompt) return []
|
|
70
|
-
|
|
67
|
+
// Split on whitespace and filter short words
|
|
68
|
+
const words = prompt.toLowerCase()
|
|
69
|
+
.replace(/[^\w\s]/g, ' ')
|
|
70
|
+
.split(/\s+/)
|
|
71
|
+
.filter(w => w.length > 2)
|
|
72
|
+
|
|
73
|
+
// If schema predicates provided, boost domain-relevant keywords
|
|
74
|
+
if (schemaPredicates.length > 0) {
|
|
75
|
+
const predicateWords = new Set()
|
|
76
|
+
for (const pred of schemaPredicates) {
|
|
77
|
+
const tokens = tokenizeIdentifier ? tokenizeIdentifier(pred.split('/').pop().split('#').pop()) : []
|
|
78
|
+
tokens.forEach(t => predicateWords.add(t.toLowerCase()))
|
|
79
|
+
}
|
|
80
|
+
// Return words that match schema or are content words
|
|
81
|
+
return words.filter(w => predicateWords.has(w) || w.length > 3)
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return words
|
|
71
85
|
}
|
|
72
86
|
|
|
73
87
|
// ============================================================================
|
|
@@ -2938,100 +2952,18 @@ class LLMPlanner {
|
|
|
2938
2952
|
o: r.bindings?.o || r.o
|
|
2939
2953
|
}))
|
|
2940
2954
|
|
|
2941
|
-
//
|
|
2942
|
-
|
|
2943
|
-
|
|
2944
|
-
|
|
2945
|
-
|
|
2946
|
-
|
|
2947
|
-
|
|
2948
|
-
|
|
2949
|
-
|
|
2950
|
-
|
|
2951
|
-
|
|
2952
|
-
const localName = cls.split('/').pop().split('#').pop()
|
|
2953
|
-
olog.addClass(localName)
|
|
2954
|
-
} catch (e) { /* skip invalid class */ }
|
|
2955
|
-
}
|
|
2956
|
-
|
|
2957
|
-
// Add properties with aliases extracted from local names
|
|
2958
|
-
for (const prop of (schema.predicates || [])) {
|
|
2959
|
-
try {
|
|
2960
|
-
const localName = prop.split('/').pop().split('#').pop()
|
|
2961
|
-
// Generate aliases from tokenized form
|
|
2962
|
-
const tokens = native.tokenizeIdentifier(localName)
|
|
2963
|
-
const aliases = tokens.length > 1 ? [tokens.join(''), tokens.join('_')] : []
|
|
2964
|
-
olog.addProperty(prop, 'Thing', 'Thing', [localName, ...aliases])
|
|
2965
|
-
} catch (e) { /* skip invalid property */ }
|
|
2966
|
-
}
|
|
2967
|
-
|
|
2968
|
-
olog.build()
|
|
2969
|
-
|
|
2970
|
-
// ============================================================
|
|
2971
|
-
// ENTITY RESOLUTION: Populate Olog with entities from RDF data
|
|
2972
|
-
// This enables NL entity references like "Fifth Amendment" to
|
|
2973
|
-
// resolve to canonical URIs like "legal:FifthAmendment"
|
|
2974
|
-
//
|
|
2975
|
-
// CRITICAL FIX (2025-12-23): Use SchemaContext's extracted entities
|
|
2976
|
-
// The entities extracted with Strategy 6 (rdfs:label) are now used
|
|
2977
|
-
// to populate the Olog's label_to_entity map for O(1) lookup.
|
|
2978
|
-
// ============================================================
|
|
2979
|
-
try {
|
|
2980
|
-
let entityCount = 0
|
|
2981
|
-
|
|
2982
|
-
// PRIMARY: Use SchemaContext entities (extracted via Strategy 6)
|
|
2983
|
-
// These are the entities with rdfs:label that we need for resolution
|
|
2984
|
-
// Use this._schemaContext if available (from getSchemaContext())
|
|
2985
|
-
const schemaCtx = this._schemaContext || await this.getSchemaContext?.()
|
|
2986
|
-
if (schemaCtx && schemaCtx.entities && schemaCtx.entities.size > 0) {
|
|
2987
|
-
const entityTriples = []
|
|
2988
|
-
for (const [uri, info] of schemaCtx.entities) {
|
|
2989
|
-
// Create triples for entity labels
|
|
2990
|
-
if (info.label) {
|
|
2991
|
-
entityTriples.push([uri, 'http://www.w3.org/2000/01/rdf-schema#label', info.label])
|
|
2992
|
-
}
|
|
2993
|
-
if (info.type) {
|
|
2994
|
-
entityTriples.push([uri, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', info.type])
|
|
2995
|
-
}
|
|
2996
|
-
}
|
|
2997
|
-
if (entityTriples.length > 0 && olog.populateEntitiesFromTriples) {
|
|
2998
|
-
entityCount = olog.populateEntitiesFromTriples(JSON.stringify(entityTriples))
|
|
2999
|
-
schema._entityCount = entityCount
|
|
3000
|
-
schema._entitySource = 'schemaContext'
|
|
3001
|
-
}
|
|
3002
|
-
}
|
|
3003
|
-
|
|
3004
|
-
// FALLBACK: Query triples if SchemaContext has no entities
|
|
3005
|
-
if (entityCount === 0 && this.kg && typeof this.kg.querySelect === 'function') {
|
|
3006
|
-
const allTriples = this.kg.querySelect('SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10000')
|
|
3007
|
-
if (allTriples && allTriples.length > 0) {
|
|
3008
|
-
// Convert to triple array format expected by Rust
|
|
3009
|
-
const triplesArray = allTriples.map(result => [
|
|
3010
|
-
result.bindings?.s || result.bindings?.subject || '',
|
|
3011
|
-
result.bindings?.p || result.bindings?.predicate || '',
|
|
3012
|
-
result.bindings?.o || result.bindings?.object || ''
|
|
3013
|
-
]).filter(t => t[0] && t[1] && t[2])
|
|
3014
|
-
|
|
3015
|
-
// Populate entities in Olog using Rust extraction
|
|
3016
|
-
if (triplesArray.length > 0 && olog.populateEntitiesFromTriples) {
|
|
3017
|
-
entityCount = olog.populateEntitiesFromTriples(JSON.stringify(triplesArray))
|
|
3018
|
-
schema._entityCount = entityCount
|
|
3019
|
-
schema._entitySource = 'triples'
|
|
3020
|
-
}
|
|
3021
|
-
}
|
|
3022
|
-
}
|
|
3023
|
-
} catch (entityErr) {
|
|
3024
|
-
// Entity extraction is optional - continue without it
|
|
3025
|
-
schema._entityExtractionError = entityErr.message
|
|
2955
|
+
// Build predicate index for fast keyword→URI lookup
|
|
2956
|
+
// NO STATE - computed fresh from schema.predicates
|
|
2957
|
+
schema._predicateIndex = new Map()
|
|
2958
|
+
for (const pred of (schema.predicates || [])) {
|
|
2959
|
+
const localName = pred.split('/').pop().split('#').pop()
|
|
2960
|
+
const tokens = tokenizeIdentifier ? tokenizeIdentifier(localName) : [localName.toLowerCase()]
|
|
2961
|
+
// Index by local name and tokens
|
|
2962
|
+
schema._predicateIndex.set(localName.toLowerCase(), pred)
|
|
2963
|
+
for (const token of tokens) {
|
|
2964
|
+
if (token.length > 2) {
|
|
2965
|
+
schema._predicateIndex.set(token.toLowerCase(), pred)
|
|
3026
2966
|
}
|
|
3027
|
-
|
|
3028
|
-
schema._nativeResolver = new native.PredicateResolverService(olog, threshold)
|
|
3029
|
-
schema._nativeOlog = olog
|
|
3030
|
-
} catch (e) {
|
|
3031
|
-
// NO FALLBACKS - propagate error with context
|
|
3032
|
-
console.error('[extractSchema] Native resolver initialization failed:', e.message)
|
|
3033
|
-
schema._nativeResolverError = e.message
|
|
3034
|
-
schema._nativeResolver = null
|
|
3035
2967
|
}
|
|
3036
2968
|
}
|
|
3037
2969
|
|
|
@@ -3210,10 +3142,15 @@ ${schemaText}
|
|
|
3210
3142
|
${memoryText}
|
|
3211
3143
|
|
|
3212
3144
|
RULES:
|
|
3213
|
-
-
|
|
3214
|
-
-
|
|
3215
|
-
-
|
|
3216
|
-
-
|
|
3145
|
+
- Use predicates from the schema to construct SPARQL queries
|
|
3146
|
+
- For pattern queries (fraud rings, collusion, networks, relationships):
|
|
3147
|
+
- Map semantic intent to relationship predicates (e.g., 'knows', 'referredBy', 'claimsWith')
|
|
3148
|
+
- Generate triangle/cycle patterns: ?a :knows ?b . ?b :knows ?c . ?c :knows ?a
|
|
3149
|
+
- A "fraud ring" = entities connected in cycles via relationship predicates
|
|
3150
|
+
- For risk queries, use 'riskScore' with FILTER (e.g., FILTER(?score > 0.7))
|
|
3151
|
+
- For similarity queries, look for shared attributes (same address, overlapping claims)
|
|
3152
|
+
- Always return valid SPARQL using actual schema predicates
|
|
3153
|
+
- Use proper SPARQL 1.1 syntax with correct prefixes
|
|
3217
3154
|
|
|
3218
3155
|
Respond in JSON:
|
|
3219
3156
|
{
|
|
@@ -3328,14 +3265,26 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
|
|
|
3328
3265
|
|
|
3329
3266
|
// Generate SPARQL based on intent and schema
|
|
3330
3267
|
if (intent.query || intent.compliance || intent.aggregate) {
|
|
3331
|
-
|
|
3332
|
-
|
|
3333
|
-
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3268
|
+
let sparql = null
|
|
3269
|
+
|
|
3270
|
+
// Try schema-driven SPARQL generation first (fast, deterministic)
|
|
3271
|
+
try {
|
|
3272
|
+
sparql = this._generateSchemaSparql(intent, schema, context)
|
|
3273
|
+
} catch (schemaErr) {
|
|
3274
|
+
// Keyword matching failed - return empty steps (let LLM handle in call())
|
|
3275
|
+
// This is NOT a fallback - complex queries go through LLM path
|
|
3276
|
+
sparql = null
|
|
3277
|
+
}
|
|
3278
|
+
|
|
3279
|
+
if (sparql) {
|
|
3280
|
+
steps.push({
|
|
3281
|
+
id: stepId++,
|
|
3282
|
+
tool: 'kg.sparql.query',
|
|
3283
|
+
input_type: 'Query',
|
|
3284
|
+
output_type: 'BindingSet',
|
|
3285
|
+
args: { sparql }
|
|
3286
|
+
})
|
|
3287
|
+
}
|
|
3339
3288
|
}
|
|
3340
3289
|
|
|
3341
3290
|
if (intent.pattern) {
|
|
@@ -3422,39 +3371,77 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
|
|
|
3422
3371
|
}
|
|
3423
3372
|
|
|
3424
3373
|
// ============================================================
|
|
3425
|
-
//
|
|
3426
|
-
//
|
|
3427
|
-
//
|
|
3428
|
-
// The Rust PredicateResolver.generate_federated_sql() handles:
|
|
3429
|
-
// - Entity resolution (NL → URI)
|
|
3430
|
-
// - Predicate resolution (NL → schema morphism)
|
|
3431
|
-
// - SPARQL pattern generation (category theory composition)
|
|
3432
|
-
// - SQL wrapping with HyperFederate UDFs
|
|
3374
|
+
// SCHEMA-DRIVEN SPARQL GENERATION
|
|
3375
|
+
// Uses schema._predicateIndex built during extractSchema
|
|
3376
|
+
// NO FALLBACKS - requires valid schema match
|
|
3433
3377
|
// ============================================================
|
|
3434
3378
|
|
|
3435
|
-
|
|
3379
|
+
const predicateIndex = schema._predicateIndex || new Map()
|
|
3380
|
+
const predicates = schema.predicates || []
|
|
3381
|
+
|
|
3382
|
+
if (predicates.length === 0) {
|
|
3436
3383
|
throw new Error(JSON.stringify({
|
|
3437
|
-
type: '
|
|
3438
|
-
message: '
|
|
3439
|
-
suggestion: '
|
|
3440
|
-
recoverable:
|
|
3384
|
+
type: 'SchemaError',
|
|
3385
|
+
message: 'No schema predicates available.',
|
|
3386
|
+
suggestion: 'Load data with valid RDF predicates first.',
|
|
3387
|
+
recoverable: true
|
|
3441
3388
|
}))
|
|
3442
3389
|
}
|
|
3443
3390
|
|
|
3444
|
-
|
|
3445
|
-
|
|
3391
|
+
// Extract keywords from prompt
|
|
3392
|
+
const keywords = extractKeywords(prompt, predicates)
|
|
3393
|
+
|
|
3394
|
+
// Find ALL matching predicates using tokenized comparison
|
|
3395
|
+
const matches = []
|
|
3396
|
+
for (const keyword of keywords) {
|
|
3397
|
+
const kwLower = keyword.toLowerCase()
|
|
3398
|
+
|
|
3399
|
+
// Direct index lookup (index has tokenized predicate names)
|
|
3400
|
+
if (predicateIndex.has(kwLower)) {
|
|
3401
|
+
matches.push({ predicate: predicateIndex.get(kwLower), score: 1.0, keyword })
|
|
3402
|
+
continue
|
|
3403
|
+
}
|
|
3446
3404
|
|
|
3447
|
-
|
|
3448
|
-
|
|
3449
|
-
|
|
3450
|
-
|
|
3451
|
-
|
|
3452
|
-
|
|
3453
|
-
|
|
3405
|
+
// Token-based matching: tokenize predicate and check for substring/exact match
|
|
3406
|
+
for (const pred of predicates) {
|
|
3407
|
+
const localName = pred.split('/').pop().split('#').pop()
|
|
3408
|
+
const tokens = tokenizeIdentifier ? tokenizeIdentifier(localName) : [localName.toLowerCase()]
|
|
3409
|
+
|
|
3410
|
+
for (const token of tokens) {
|
|
3411
|
+
if (token === kwLower) {
|
|
3412
|
+
matches.push({ predicate: pred, score: 1.0, keyword })
|
|
3413
|
+
break
|
|
3414
|
+
} else if (token.includes(kwLower) || kwLower.includes(token)) {
|
|
3415
|
+
matches.push({ predicate: pred, score: 0.7, keyword })
|
|
3416
|
+
break
|
|
3417
|
+
}
|
|
3418
|
+
}
|
|
3454
3419
|
}
|
|
3455
3420
|
}
|
|
3456
3421
|
|
|
3457
|
-
|
|
3422
|
+
if (matches.length === 0) {
|
|
3423
|
+
throw new Error(JSON.stringify({
|
|
3424
|
+
type: 'NoMatchError',
|
|
3425
|
+
message: `No schema predicates match prompt: "${prompt}"`,
|
|
3426
|
+
keywords,
|
|
3427
|
+
availablePredicates: predicates.slice(0, 10),
|
|
3428
|
+
suggestion: 'Rephrase query using predicates from schema.',
|
|
3429
|
+
recoverable: true
|
|
3430
|
+
}))
|
|
3431
|
+
}
|
|
3432
|
+
|
|
3433
|
+
// Sort by score and get best match
|
|
3434
|
+
matches.sort((a, b) => b.score - a.score)
|
|
3435
|
+
const bestMatch = matches[0]
|
|
3436
|
+
|
|
3437
|
+
// Build SPARQL from schema predicate
|
|
3438
|
+
const sparql = `SELECT ?subject ?object WHERE { ?subject <${bestMatch.predicate}> ?object } LIMIT ${limit}`
|
|
3439
|
+
context._matchedPredicate = bestMatch.predicate
|
|
3440
|
+
context._matchConfidence = bestMatch.score
|
|
3441
|
+
context._matchKeyword = bestMatch.keyword
|
|
3442
|
+
context._allMatches = matches.slice(0, 5)
|
|
3443
|
+
|
|
3444
|
+
return sparql
|
|
3458
3445
|
}
|
|
3459
3446
|
|
|
3460
3447
|
/**
|
|
@@ -4487,19 +4474,9 @@ class ThinkingReasoner {
|
|
|
4487
4474
|
this.contextId = config.contextId || `thinking-${Date.now()}`
|
|
4488
4475
|
this.actorId = config.actorId || 'hypermind-agent'
|
|
4489
4476
|
|
|
4490
|
-
//
|
|
4491
|
-
//
|
|
4492
|
-
|
|
4493
|
-
if (NativeThinkingReasoner) {
|
|
4494
|
-
try {
|
|
4495
|
-
this._native = new NativeThinkingReasoner()
|
|
4496
|
-
this._hasNative = true
|
|
4497
|
-
} catch (e) {
|
|
4498
|
-
this._hasNative = false
|
|
4499
|
-
}
|
|
4500
|
-
} else {
|
|
4501
|
-
this._hasNative = false
|
|
4502
|
-
}
|
|
4477
|
+
// Native reasoning not available in thin SDK
|
|
4478
|
+
// All reasoning handled via SPARQL and schema-driven approach
|
|
4479
|
+
this._hasNative = false
|
|
4503
4480
|
|
|
4504
4481
|
// Fallback stores (only used if native not available)
|
|
4505
4482
|
this.events = []
|
|
@@ -5770,7 +5747,37 @@ class HyperMindAgent {
|
|
|
5770
5747
|
trace.addStep({ type: 'intent_classification', intent })
|
|
5771
5748
|
|
|
5772
5749
|
// 3. Generate typed execution plan
|
|
5773
|
-
|
|
5750
|
+
let plan
|
|
5751
|
+
try {
|
|
5752
|
+
plan = this._generatePlan(intent, prompt)
|
|
5753
|
+
} catch (planErr) {
|
|
5754
|
+
// Schema-based SPARQL generation failed - try LLM for semantic understanding
|
|
5755
|
+
// This is NOT a fallback, it's the proper path for complex queries
|
|
5756
|
+
if (this.apiKey && this.planner && this.planner.model) {
|
|
5757
|
+
const schema = this.planner._schemaCache || { predicates: [], classes: [] }
|
|
5758
|
+
const llmResult = await this.planner._planWithLLM(prompt, schema, memories)
|
|
5759
|
+
if (llmResult && llmResult.sparql) {
|
|
5760
|
+
// Create plan with LLM-generated SPARQL
|
|
5761
|
+
plan = {
|
|
5762
|
+
id: `plan_llm_${Date.now()}`,
|
|
5763
|
+
intent: llmResult.type || intent.type,
|
|
5764
|
+
steps: [{
|
|
5765
|
+
id: 1,
|
|
5766
|
+
tool: 'kg.sparql.query',
|
|
5767
|
+
args: { sparql: llmResult.sparql }
|
|
5768
|
+
}],
|
|
5769
|
+
type_chain: 'kg.sparql.query',
|
|
5770
|
+
_llmGenerated: true
|
|
5771
|
+
}
|
|
5772
|
+
trace.addStep({ type: 'llm_sparql_generation', sparql: llmResult.sparql })
|
|
5773
|
+
}
|
|
5774
|
+
}
|
|
5775
|
+
|
|
5776
|
+
// If still no plan, throw the original error
|
|
5777
|
+
if (!plan) {
|
|
5778
|
+
throw planErr
|
|
5779
|
+
}
|
|
5780
|
+
}
|
|
5774
5781
|
trace.addStep({ type: 'execution_plan', plan })
|
|
5775
5782
|
|
|
5776
5783
|
// 4. Execute plan in WASM sandbox
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rust-kgdb",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.23",
|
|
4
4
|
"description": "High-performance RDF/SPARQL database with AI agent framework and cross-database federation. GraphDB (449ns lookups, 5-11x faster than RDFox), HyperFederate (KGDB + Snowflake + BigQuery), GraphFrames analytics, Datalog reasoning, HNSW vector embeddings. HyperMindAgent for schema-aware query generation with audit trails. W3C SPARQL 1.1 compliant. Native performance via Rust + NAPI-RS.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|
|
Binary file
|
|
Binary file
|
package/rust-kgdb-napi.node
CHANGED
|
Binary file
|