rust-kgdb 0.8.22 → 0.8.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47,27 +47,41 @@ function loadNativeBindingDirect() {
47
47
 
48
48
  const native = loadNativeBindingDirect()
49
49
 
50
- // Native Rust bindings - SDK is THIN, all logic lives in Rust
50
+ // Native Rust bindings - SDK is THIN, only pure functions
51
+ // GraphDB handles all RDF/SPARQL operations
52
+ // tokenizeIdentifier and computeSimilarity are pure utility functions
51
53
  const {
52
- OlogSchema,
53
- PredicateResolverService,
54
- SchemaValidatorService,
55
- ThinkingReasoner: NativeThinkingReasoner,
56
- computeSimilarity,
57
54
  tokenizeIdentifier,
58
- stemWord,
59
- extractKeywords: nativeExtractKeywords
55
+ computeSimilarity
60
56
  } = native
61
57
 
62
58
  /**
63
- * Extract keywords from natural language prompt using native Rust
64
- * Delegates entirely to Rust KeywordExtractor - no JavaScript stop words
59
+ * Extract keywords from natural language prompt
60
+ * Uses schema predicates for domain-aware extraction
65
61
  * @param {string} prompt - Natural language prompt
62
+ * @param {string[]} schemaPredicates - Optional schema predicates for domain hints
66
63
  * @returns {string[]} Extracted keywords
67
64
  */
68
- function extractKeywords(prompt) {
65
+ function extractKeywords(prompt, schemaPredicates = []) {
69
66
  if (!prompt) return []
70
- return nativeExtractKeywords(prompt)
67
+ // Split on whitespace and filter short words
68
+ const words = prompt.toLowerCase()
69
+ .replace(/[^\w\s]/g, ' ')
70
+ .split(/\s+/)
71
+ .filter(w => w.length > 2)
72
+
73
+ // If schema predicates provided, boost domain-relevant keywords
74
+ if (schemaPredicates.length > 0) {
75
+ const predicateWords = new Set()
76
+ for (const pred of schemaPredicates) {
77
+ const tokens = tokenizeIdentifier ? tokenizeIdentifier(pred.split('/').pop().split('#').pop()) : []
78
+ tokens.forEach(t => predicateWords.add(t.toLowerCase()))
79
+ }
80
+ // Return words that match schema or are content words
81
+ return words.filter(w => predicateWords.has(w) || w.length > 3)
82
+ }
83
+
84
+ return words
71
85
  }
72
86
 
73
87
  // ============================================================================
@@ -2938,100 +2952,18 @@ class LLMPlanner {
2938
2952
  o: r.bindings?.o || r.o
2939
2953
  }))
2940
2954
 
2941
- // Initialize predicate resolver (native Rust - NO JavaScript fallback per NO FALLBACKS principle)
2942
- const threshold = CONFIG.scoring?.similarityThreshold || 0.3
2943
- if (native?.OlogSchema && native?.PredicateResolverService) {
2944
- try {
2945
- // Build OlogSchema from extracted schema
2946
- const olog = new native.OlogSchema()
2947
- olog.withNamespace('http://schema.org/')
2948
-
2949
- // Add classes
2950
- for (const cls of (schema.classes || [])) {
2951
- try {
2952
- const localName = cls.split('/').pop().split('#').pop()
2953
- olog.addClass(localName)
2954
- } catch (e) { /* skip invalid class */ }
2955
- }
2956
-
2957
- // Add properties with aliases extracted from local names
2958
- for (const prop of (schema.predicates || [])) {
2959
- try {
2960
- const localName = prop.split('/').pop().split('#').pop()
2961
- // Generate aliases from tokenized form
2962
- const tokens = native.tokenizeIdentifier(localName)
2963
- const aliases = tokens.length > 1 ? [tokens.join(''), tokens.join('_')] : []
2964
- olog.addProperty(prop, 'Thing', 'Thing', [localName, ...aliases])
2965
- } catch (e) { /* skip invalid property */ }
2966
- }
2967
-
2968
- olog.build()
2969
-
2970
- // ============================================================
2971
- // ENTITY RESOLUTION: Populate Olog with entities from RDF data
2972
- // This enables NL entity references like "Fifth Amendment" to
2973
- // resolve to canonical URIs like "legal:FifthAmendment"
2974
- //
2975
- // CRITICAL FIX (2025-12-23): Use SchemaContext's extracted entities
2976
- // The entities extracted with Strategy 6 (rdfs:label) are now used
2977
- // to populate the Olog's label_to_entity map for O(1) lookup.
2978
- // ============================================================
2979
- try {
2980
- let entityCount = 0
2981
-
2982
- // PRIMARY: Use SchemaContext entities (extracted via Strategy 6)
2983
- // These are the entities with rdfs:label that we need for resolution
2984
- // Use this._schemaContext if available (from getSchemaContext())
2985
- const schemaCtx = this._schemaContext || await this.getSchemaContext?.()
2986
- if (schemaCtx && schemaCtx.entities && schemaCtx.entities.size > 0) {
2987
- const entityTriples = []
2988
- for (const [uri, info] of schemaCtx.entities) {
2989
- // Create triples for entity labels
2990
- if (info.label) {
2991
- entityTriples.push([uri, 'http://www.w3.org/2000/01/rdf-schema#label', info.label])
2992
- }
2993
- if (info.type) {
2994
- entityTriples.push([uri, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', info.type])
2995
- }
2996
- }
2997
- if (entityTriples.length > 0 && olog.populateEntitiesFromTriples) {
2998
- entityCount = olog.populateEntitiesFromTriples(JSON.stringify(entityTriples))
2999
- schema._entityCount = entityCount
3000
- schema._entitySource = 'schemaContext'
3001
- }
3002
- }
3003
-
3004
- // FALLBACK: Query triples if SchemaContext has no entities
3005
- if (entityCount === 0 && this.kg && typeof this.kg.querySelect === 'function') {
3006
- const allTriples = this.kg.querySelect('SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10000')
3007
- if (allTriples && allTriples.length > 0) {
3008
- // Convert to triple array format expected by Rust
3009
- const triplesArray = allTriples.map(result => [
3010
- result.bindings?.s || result.bindings?.subject || '',
3011
- result.bindings?.p || result.bindings?.predicate || '',
3012
- result.bindings?.o || result.bindings?.object || ''
3013
- ]).filter(t => t[0] && t[1] && t[2])
3014
-
3015
- // Populate entities in Olog using Rust extraction
3016
- if (triplesArray.length > 0 && olog.populateEntitiesFromTriples) {
3017
- entityCount = olog.populateEntitiesFromTriples(JSON.stringify(triplesArray))
3018
- schema._entityCount = entityCount
3019
- schema._entitySource = 'triples'
3020
- }
3021
- }
3022
- }
3023
- } catch (entityErr) {
3024
- // Entity extraction is optional - continue without it
3025
- schema._entityExtractionError = entityErr.message
2955
+ // Build predicate index for fast keyword→URI lookup
2956
+ // NO STATE - computed fresh from schema.predicates
2957
+ schema._predicateIndex = new Map()
2958
+ for (const pred of (schema.predicates || [])) {
2959
+ const localName = pred.split('/').pop().split('#').pop()
2960
+ const tokens = tokenizeIdentifier ? tokenizeIdentifier(localName) : [localName.toLowerCase()]
2961
+ // Index by local name and tokens
2962
+ schema._predicateIndex.set(localName.toLowerCase(), pred)
2963
+ for (const token of tokens) {
2964
+ if (token.length > 2) {
2965
+ schema._predicateIndex.set(token.toLowerCase(), pred)
3026
2966
  }
3027
-
3028
- schema._nativeResolver = new native.PredicateResolverService(olog, threshold)
3029
- schema._nativeOlog = olog
3030
- } catch (e) {
3031
- // NO FALLBACKS - propagate error with context
3032
- console.error('[extractSchema] Native resolver initialization failed:', e.message)
3033
- schema._nativeResolverError = e.message
3034
- schema._nativeResolver = null
3035
2967
  }
3036
2968
  }
3037
2969
 
@@ -3210,10 +3142,15 @@ ${schemaText}
3210
3142
  ${memoryText}
3211
3143
 
3212
3144
  RULES:
3213
- - ONLY use predicates from the schema above
3214
- - NEVER invent predicate names
3215
- - If schema doesn't match user's request, set intent to "schema_mismatch"
3216
- - Use proper SPARQL syntax
3145
+ - Use predicates from the schema to construct SPARQL queries
3146
+ - For pattern queries (fraud rings, collusion, networks, relationships):
3147
+ - Map semantic intent to relationship predicates (e.g., 'knows', 'referredBy', 'claimsWith')
3148
+ - Generate triangle/cycle patterns: ?a :knows ?b . ?b :knows ?c . ?c :knows ?a
3149
+ - A "fraud ring" = entities connected in cycles via relationship predicates
3150
+ - For risk queries, use 'riskScore' with FILTER (e.g., FILTER(?score > 0.7))
3151
+ - For similarity queries, look for shared attributes (same address, overlapping claims)
3152
+ - Always return valid SPARQL using actual schema predicates
3153
+ - Use proper SPARQL 1.1 syntax with correct prefixes
3217
3154
 
3218
3155
  Respond in JSON:
3219
3156
  {
@@ -3328,14 +3265,26 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
3328
3265
 
3329
3266
  // Generate SPARQL based on intent and schema
3330
3267
  if (intent.query || intent.compliance || intent.aggregate) {
3331
- const sparql = this._generateSchemaSparql(intent, schema, context)
3332
- steps.push({
3333
- id: stepId++,
3334
- tool: 'kg.sparql.query',
3335
- input_type: 'Query',
3336
- output_type: 'BindingSet',
3337
- args: { sparql }
3338
- })
3268
+ let sparql = null
3269
+
3270
+ // Try schema-driven SPARQL generation first (fast, deterministic)
3271
+ try {
3272
+ sparql = this._generateSchemaSparql(intent, schema, context)
3273
+ } catch (schemaErr) {
3274
+ // Keyword matching failed - return empty steps (let LLM handle in call())
3275
+ // This is NOT a fallback - complex queries go through LLM path
3276
+ sparql = null
3277
+ }
3278
+
3279
+ if (sparql) {
3280
+ steps.push({
3281
+ id: stepId++,
3282
+ tool: 'kg.sparql.query',
3283
+ input_type: 'Query',
3284
+ output_type: 'BindingSet',
3285
+ args: { sparql }
3286
+ })
3287
+ }
3339
3288
  }
3340
3289
 
3341
3290
  if (intent.pattern) {
@@ -3422,39 +3371,77 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
3422
3371
  }
3423
3372
 
3424
3373
  // ============================================================
3425
- // DELEGATE TO RUST: HyperFederate PredicateResolverService
3426
- //
3427
- // ALL query generation delegated to Rust. NO JavaScript fallbacks.
3428
- // The Rust PredicateResolver.generate_federated_sql() handles:
3429
- // - Entity resolution (NL → URI)
3430
- // - Predicate resolution (NL → schema morphism)
3431
- // - SPARQL pattern generation (category theory composition)
3432
- // - SQL wrapping with HyperFederate UDFs
3374
+ // SCHEMA-DRIVEN SPARQL GENERATION
3375
+ // Uses schema._predicateIndex built during extractSchema
3376
+ // NO FALLBACKS - requires valid schema match
3433
3377
  // ============================================================
3434
3378
 
3435
- if (!schema._nativeResolver || typeof schema._nativeResolver.generateFederatedSql !== 'function') {
3379
+ const predicateIndex = schema._predicateIndex || new Map()
3380
+ const predicates = schema.predicates || []
3381
+
3382
+ if (predicates.length === 0) {
3436
3383
  throw new Error(JSON.stringify({
3437
- type: 'ConfigurationError',
3438
- message: 'Native HyperFederate resolver not configured.',
3439
- suggestion: 'Ensure schema is built with OlogSchema and PredicateResolverService initialized.',
3440
- recoverable: false
3384
+ type: 'SchemaError',
3385
+ message: 'No schema predicates available.',
3386
+ suggestion: 'Load data with valid RDF predicates first.',
3387
+ recoverable: true
3441
3388
  }))
3442
3389
  }
3443
3390
 
3444
- const result = JSON.parse(schema._nativeResolver.generateFederatedSql(prompt, limit))
3445
- context._federatedSqlResult = result
3391
+ // Extract keywords from prompt
3392
+ const keywords = extractKeywords(prompt, predicates)
3393
+
3394
+ // Find ALL matching predicates using tokenized comparison
3395
+ const matches = []
3396
+ for (const keyword of keywords) {
3397
+ const kwLower = keyword.toLowerCase()
3398
+
3399
+ // Direct index lookup (index has tokenized predicate names)
3400
+ if (predicateIndex.has(kwLower)) {
3401
+ matches.push({ predicate: predicateIndex.get(kwLower), score: 1.0, keyword })
3402
+ continue
3403
+ }
3446
3404
 
3447
- if (result.confidence < 0.3) {
3448
- context._resolutionWarning = {
3449
- type: 'LowConfidence',
3450
- confidence: result.confidence,
3451
- pattern: result.pattern,
3452
- resolved_predicates: result.resolved_predicates,
3453
- suggestion: 'Entities/predicates may not exist in schema. Check data population.'
3405
+ // Token-based matching: tokenize predicate and check for substring/exact match
3406
+ for (const pred of predicates) {
3407
+ const localName = pred.split('/').pop().split('#').pop()
3408
+ const tokens = tokenizeIdentifier ? tokenizeIdentifier(localName) : [localName.toLowerCase()]
3409
+
3410
+ for (const token of tokens) {
3411
+ if (token === kwLower) {
3412
+ matches.push({ predicate: pred, score: 1.0, keyword })
3413
+ break
3414
+ } else if (token.includes(kwLower) || kwLower.includes(token)) {
3415
+ matches.push({ predicate: pred, score: 0.7, keyword })
3416
+ break
3417
+ }
3418
+ }
3454
3419
  }
3455
3420
  }
3456
3421
 
3457
- return result.sparql
3422
+ if (matches.length === 0) {
3423
+ throw new Error(JSON.stringify({
3424
+ type: 'NoMatchError',
3425
+ message: `No schema predicates match prompt: "${prompt}"`,
3426
+ keywords,
3427
+ availablePredicates: predicates.slice(0, 10),
3428
+ suggestion: 'Rephrase query using predicates from schema.',
3429
+ recoverable: true
3430
+ }))
3431
+ }
3432
+
3433
+ // Sort by score and get best match
3434
+ matches.sort((a, b) => b.score - a.score)
3435
+ const bestMatch = matches[0]
3436
+
3437
+ // Build SPARQL from schema predicate
3438
+ const sparql = `SELECT ?subject ?object WHERE { ?subject <${bestMatch.predicate}> ?object } LIMIT ${limit}`
3439
+ context._matchedPredicate = bestMatch.predicate
3440
+ context._matchConfidence = bestMatch.score
3441
+ context._matchKeyword = bestMatch.keyword
3442
+ context._allMatches = matches.slice(0, 5)
3443
+
3444
+ return sparql
3458
3445
  }
3459
3446
 
3460
3447
  /**
@@ -4487,19 +4474,9 @@ class ThinkingReasoner {
4487
4474
  this.contextId = config.contextId || `thinking-${Date.now()}`
4488
4475
  this.actorId = config.actorId || 'hypermind-agent'
4489
4476
 
4490
- // NATIVE RUST DELEGATION: Use native ThinkingReasoner for real reasoning
4491
- // The JavaScript class is a thin wrapper - all heavy lifting in Rust
4492
- // NativeThinkingReasoner is imported at module level from the native binding
4493
- if (NativeThinkingReasoner) {
4494
- try {
4495
- this._native = new NativeThinkingReasoner()
4496
- this._hasNative = true
4497
- } catch (e) {
4498
- this._hasNative = false
4499
- }
4500
- } else {
4501
- this._hasNative = false
4502
- }
4477
+ // Native reasoning not available in thin SDK
4478
+ // All reasoning handled via SPARQL and schema-driven approach
4479
+ this._hasNative = false
4503
4480
 
4504
4481
  // Fallback stores (only used if native not available)
4505
4482
  this.events = []
@@ -5770,7 +5747,37 @@ class HyperMindAgent {
5770
5747
  trace.addStep({ type: 'intent_classification', intent })
5771
5748
 
5772
5749
  // 3. Generate typed execution plan
5773
- const plan = this._generatePlan(intent, prompt)
5750
+ let plan
5751
+ try {
5752
+ plan = this._generatePlan(intent, prompt)
5753
+ } catch (planErr) {
5754
+ // Schema-based SPARQL generation failed - try LLM for semantic understanding
5755
+ // This is NOT a fallback, it's the proper path for complex queries
5756
+ if (this.apiKey && this.planner && this.planner.model) {
5757
+ const schema = this.planner._schemaCache || { predicates: [], classes: [] }
5758
+ const llmResult = await this.planner._planWithLLM(prompt, schema, memories)
5759
+ if (llmResult && llmResult.sparql) {
5760
+ // Create plan with LLM-generated SPARQL
5761
+ plan = {
5762
+ id: `plan_llm_${Date.now()}`,
5763
+ intent: llmResult.type || intent.type,
5764
+ steps: [{
5765
+ id: 1,
5766
+ tool: 'kg.sparql.query',
5767
+ args: { sparql: llmResult.sparql }
5768
+ }],
5769
+ type_chain: 'kg.sparql.query',
5770
+ _llmGenerated: true
5771
+ }
5772
+ trace.addStep({ type: 'llm_sparql_generation', sparql: llmResult.sparql })
5773
+ }
5774
+ }
5775
+
5776
+ // If still no plan, throw the original error
5777
+ if (!plan) {
5778
+ throw planErr
5779
+ }
5780
+ }
5774
5781
  trace.addStep({ type: 'execution_plan', plan })
5775
5782
 
5776
5783
  // 4. Execute plan in WASM sandbox
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rust-kgdb",
3
- "version": "0.8.22",
3
+ "version": "0.8.23",
4
4
  "description": "High-performance RDF/SPARQL database with AI agent framework and cross-database federation. GraphDB (449ns lookups, 5-11x faster than RDFox), HyperFederate (KGDB + Snowflake + BigQuery), GraphFrames analytics, Datalog reasoning, HNSW vector embeddings. HyperMindAgent for schema-aware query generation with audit trails. W3C SPARQL 1.1 compliant. Native performance via Rust + NAPI-RS.",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
Binary file
Binary file
Binary file