npm - rust-kgdb - Versions diffs - 0.8.21 → 0.8.23 - Mend

rust-kgdb 0.8.21 → 0.8.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/hypermind-agent.js +193 -190
package/package.json +1 -1
package/rust-kgdb-napi.darwin-arm64.node +0 -0
package/rust-kgdb-napi.darwin-x64.node +0 -0
package/rust-kgdb-napi.node +0 -0

package/hypermind-agent.js CHANGED Viewed

@@ -46,26 +46,42 @@ function loadNativeBindingDirect() {
 }
 const native = loadNativeBindingDirect()
+// Native Rust bindings - SDK is THIN, only pure functions
+// GraphDB handles all RDF/SPARQL operations
+// tokenizeIdentifier and computeSimilarity are pure utility functions
 const {
-  OlogSchema,
-  PredicateResolverService,
-  SchemaValidatorService,
-  ThinkingReasoner: NativeThinkingReasoner,
-  computeSimilarity,
   tokenizeIdentifier,
-  stemWord,
-  extractKeywords: nativeExtractKeywords
+  computeSimilarity
 } = native
 /**
- * Extract keywords from natural language prompt using native Rust
- * Delegates entirely to Rust KeywordExtractor - no JavaScript stop words
+ * Extract keywords from natural language prompt
+ * Uses schema predicates for domain-aware extraction
  * @param {string} prompt - Natural language prompt
+ * @param {string[]} schemaPredicates - Optional schema predicates for domain hints
  * @returns {string[]} Extracted keywords
  */
-function extractKeywords(prompt) {
+function extractKeywords(prompt, schemaPredicates = []) {
   if (!prompt) return []
-  return nativeExtractKeywords(prompt)
+  // Split on whitespace and filter short words
+  const words = prompt.toLowerCase()
+    .replace(/[^\w\s]/g, ' ')
+    .split(/\s+/)
+    .filter(w => w.length > 2)
+  // If schema predicates provided, boost domain-relevant keywords
+  if (schemaPredicates.length > 0) {
+    const predicateWords = new Set()
+    for (const pred of schemaPredicates) {
+      const tokens = tokenizeIdentifier ? tokenizeIdentifier(pred.split('/').pop().split('#').pop()) : []
+      tokens.forEach(t => predicateWords.add(t.toLowerCase()))
+    }
+    // Return words that match schema or are content words
+    return words.filter(w => predicateWords.has(w) || w.length > 3)
+  }
+  return words
 }
 // ============================================================================
@@ -838,12 +854,12 @@ class SchemaContext {
       // STRATEGY 2: Extract RDFS/OWL explicit schema (if VoID incomplete)
       if (ctx.classes.size < 10) {
         const classQuery = `
-          PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
-          PREFIX owl: <http://www.w3.org/2002/07/owl#>
           SELECT DISTINCT ?class ?super ?label WHERE {
-            { ?class a rdfs:Class } UNION { ?class a owl:Class }
-            OPTIONAL { ?class rdfs:subClassOf ?super }
-            OPTIONAL { ?class rdfs:label ?label }
+            { ?class <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class> }
+            UNION
+            { ?class <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class> }
+            OPTIONAL { ?class <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?super }
+            OPTIONAL { ?class <http://www.w3.org/2000/01/rdf-schema#label> ?label }
           } LIMIT ${config.maxClasses}
         `
         const classResults = kg.querySelect(classQuery)
@@ -860,14 +876,15 @@ class SchemaContext {
       // STRATEGY 3: Extract property morphisms with domain/range
       if (ctx.properties.size < 10) {
         const propQuery = `
-          PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-          PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
-          PREFIX owl: <http://www.w3.org/2002/07/owl#>
           SELECT DISTINCT ?prop ?domain ?range ?label WHERE {
-            { ?prop a rdf:Property } UNION { ?prop a owl:ObjectProperty } UNION { ?prop a owl:DatatypeProperty }
-            OPTIONAL { ?prop rdfs:domain ?domain }
-            OPTIONAL { ?prop rdfs:range ?range }
-            OPTIONAL { ?prop rdfs:label ?label }
+            { ?prop <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property> }
+            UNION
+            { ?prop <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> }
+            UNION
+            { ?prop <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty> }
+            OPTIONAL { ?prop <http://www.w3.org/2000/01/rdf-schema#domain> ?domain }
+            OPTIONAL { ?prop <http://www.w3.org/2000/01/rdf-schema#range> ?range }
+            OPTIONAL { ?prop <http://www.w3.org/2000/01/rdf-schema#label> ?label }
           } LIMIT ${config.maxProperties}
         `
         const propResults = kg.querySelect(propQuery)
@@ -922,11 +939,9 @@ class SchemaContext {
       //
       // ALWAYS extract entities - they are essential for entity resolution
       const entityQuery = `
-        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
-        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
         SELECT DISTINCT ?entity ?label ?type WHERE {
-          ?entity rdfs:label ?label .
-          OPTIONAL { ?entity a ?type }
+          ?entity <http://www.w3.org/2000/01/rdf-schema#label> ?label .
+          OPTIONAL { ?entity <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?type }
         } LIMIT ${config.maxEntities || 1000}
       `
       try {
@@ -1037,15 +1052,13 @@ class SchemaContext {
       try {
         // Extract classes (Objects in schema category)
         const classQuery = `
-          PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
-          PREFIX owl: <http://www.w3.org/2002/07/owl#>
-          PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
           SELECT DISTINCT ?class ?super ?label ?comment WHERE {
-            { ?class a rdfs:Class }
-            UNION { ?class a owl:Class }
-            OPTIONAL { ?class rdfs:subClassOf ?super }
-            OPTIONAL { ?class rdfs:label ?label }
-            OPTIONAL { ?class rdfs:comment ?comment }
+            { ?class <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class> }
+            UNION
+            { ?class <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class> }
+            OPTIONAL { ?class <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?super }
+            OPTIONAL { ?class <http://www.w3.org/2000/01/rdf-schema#label> ?label }
+            OPTIONAL { ?class <http://www.w3.org/2000/01/rdf-schema#comment> ?comment }
           } LIMIT ${CONFIG.schema.maxClasses}
         `
         const classResults = loadedKg.querySelect(classQuery)
@@ -1069,17 +1082,15 @@ class SchemaContext {
         // Extract properties (Morphisms with domain/range)
         const propQuery = `
-          PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
-          PREFIX owl: <http://www.w3.org/2002/07/owl#>
-          PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-          SELECT DISTINCT ?prop ?domain ?range ?label ?functional WHERE {
-            { ?prop a rdf:Property }
-            UNION { ?prop a owl:ObjectProperty }
-            UNION { ?prop a owl:DatatypeProperty }
-            OPTIONAL { ?prop rdfs:domain ?domain }
-            OPTIONAL { ?prop rdfs:range ?range }
-            OPTIONAL { ?prop rdfs:label ?label }
-            OPTIONAL { ?prop a owl:FunctionalProperty . BIND(true AS ?functional) }
+          SELECT DISTINCT ?prop ?domain ?range ?label WHERE {
+            { ?prop <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property> }
+            UNION
+            { ?prop <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> }
+            UNION
+            { ?prop <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty> }
+            OPTIONAL { ?prop <http://www.w3.org/2000/01/rdf-schema#domain> ?domain }
+            OPTIONAL { ?prop <http://www.w3.org/2000/01/rdf-schema#range> ?range }
+            OPTIONAL { ?prop <http://www.w3.org/2000/01/rdf-schema#label> ?label }
           } LIMIT ${CONFIG.schema.maxProperties}
         `
         const propResults = loadedKg.querySelect(propQuery)
@@ -1088,14 +1099,13 @@ class SchemaContext {
           const domain = r.bindings?.domain || r.domain
           const range = r.bindings?.range || r.range
           const label = r.bindings?.label || r.label
-          const functional = r.bindings?.functional || r.functional
           if (prop) {
             ctx.properties.set(prop, {
               uri: prop,
               domain: domain || null,
               range: range || null,
               label: label || null,
-              functional: !!functional,
+              functional: false,
               source
             })
           }
@@ -2942,100 +2952,18 @@ class LLMPlanner {
         o: r.bindings?.o || r.o
       }))
-      // Initialize predicate resolver (native Rust - NO JavaScript fallback per NO FALLBACKS principle)
-      const threshold = CONFIG.scoring?.similarityThreshold || 0.3
-      if (native?.OlogSchema && native?.PredicateResolverService) {
-        try {
-          // Build OlogSchema from extracted schema
-          const olog = new native.OlogSchema()
-          olog.withNamespace('http://schema.org/')
-          // Add classes
-          for (const cls of (schema.classes || [])) {
-            try {
-              const localName = cls.split('/').pop().split('#').pop()
-              olog.addClass(localName)
-            } catch (e) { /* skip invalid class */ }
-          }
-          // Add properties with aliases extracted from local names
-          for (const prop of (schema.predicates || [])) {
-            try {
-              const localName = prop.split('/').pop().split('#').pop()
-              // Generate aliases from tokenized form
-              const tokens = native.tokenizeIdentifier(localName)
-              const aliases = tokens.length > 1 ? [tokens.join(''), tokens.join('_')] : []
-              olog.addProperty(prop, 'Thing', 'Thing', [localName, ...aliases])
-            } catch (e) { /* skip invalid property */ }
-          }
-          olog.build()
-          // ============================================================
-          // ENTITY RESOLUTION: Populate Olog with entities from RDF data
-          // This enables NL entity references like "Fifth Amendment" to
-          // resolve to canonical URIs like "legal:FifthAmendment"
-          //
-          // CRITICAL FIX (2025-12-23): Use SchemaContext's extracted entities
-          // The entities extracted with Strategy 6 (rdfs:label) are now used
-          // to populate the Olog's label_to_entity map for O(1) lookup.
-          // ============================================================
-          try {
-            let entityCount = 0
-            // PRIMARY: Use SchemaContext entities (extracted via Strategy 6)
-            // These are the entities with rdfs:label that we need for resolution
-            // Use this._schemaContext if available (from getSchemaContext())
-            const schemaCtx = this._schemaContext || await this.getSchemaContext?.()
-            if (schemaCtx && schemaCtx.entities && schemaCtx.entities.size > 0) {
-              const entityTriples = []
-              for (const [uri, info] of schemaCtx.entities) {
-                // Create triples for entity labels
-                if (info.label) {
-                  entityTriples.push([uri, 'http://www.w3.org/2000/01/rdf-schema#label', info.label])
-                }
-                if (info.type) {
-                  entityTriples.push([uri, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', info.type])
-                }
-              }
-              if (entityTriples.length > 0 && olog.populateEntitiesFromTriples) {
-                entityCount = olog.populateEntitiesFromTriples(JSON.stringify(entityTriples))
-                schema._entityCount = entityCount
-                schema._entitySource = 'schemaContext'
-              }
-            }
-            // FALLBACK: Query triples if SchemaContext has no entities
-            if (entityCount === 0 && this.kg && typeof this.kg.querySelect === 'function') {
-              const allTriples = this.kg.querySelect('SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10000')
-              if (allTriples && allTriples.length > 0) {
-                // Convert to triple array format expected by Rust
-                const triplesArray = allTriples.map(result => [
-                  result.bindings?.s || result.bindings?.subject || '',
-                  result.bindings?.p || result.bindings?.predicate || '',
-                  result.bindings?.o || result.bindings?.object || ''
-                ]).filter(t => t[0] && t[1] && t[2])
-                // Populate entities in Olog using Rust extraction
-                if (triplesArray.length > 0 && olog.populateEntitiesFromTriples) {
-                  entityCount = olog.populateEntitiesFromTriples(JSON.stringify(triplesArray))
-                  schema._entityCount = entityCount
-                  schema._entitySource = 'triples'
-                }
-              }
-            }
-          } catch (entityErr) {
-            // Entity extraction is optional - continue without it
-            schema._entityExtractionError = entityErr.message
+      // Build predicate index for fast keyword→URI lookup
+      // NO STATE - computed fresh from schema.predicates
+      schema._predicateIndex = new Map()
+      for (const pred of (schema.predicates || [])) {
+        const localName = pred.split('/').pop().split('#').pop()
+        const tokens = tokenizeIdentifier ? tokenizeIdentifier(localName) : [localName.toLowerCase()]
+        // Index by local name and tokens
+        schema._predicateIndex.set(localName.toLowerCase(), pred)
+        for (const token of tokens) {
+          if (token.length > 2) {
+            schema._predicateIndex.set(token.toLowerCase(), pred)
           }
-          schema._nativeResolver = new native.PredicateResolverService(olog, threshold)
-          schema._nativeOlog = olog
-        } catch (e) {
-          // NO FALLBACKS - propagate error with context
-          console.error('[extractSchema] Native resolver initialization failed:', e.message)
-          schema._nativeResolverError = e.message
-          schema._nativeResolver = null
         }
       }
@@ -3214,10 +3142,15 @@ ${schemaText}
 ${memoryText}
 RULES:
-- ONLY use predicates from the schema above
-- NEVER invent predicate names
-- If schema doesn't match user's request, set intent to "schema_mismatch"
-- Use proper SPARQL syntax
+- Use predicates from the schema to construct SPARQL queries
+- For pattern queries (fraud rings, collusion, networks, relationships):
+  - Map semantic intent to relationship predicates (e.g., 'knows', 'referredBy', 'claimsWith')
+  - Generate triangle/cycle patterns: ?a :knows ?b . ?b :knows ?c . ?c :knows ?a
+  - A "fraud ring" = entities connected in cycles via relationship predicates
+- For risk queries, use 'riskScore' with FILTER (e.g., FILTER(?score > 0.7))
+- For similarity queries, look for shared attributes (same address, overlapping claims)
+- Always return valid SPARQL using actual schema predicates
+- Use proper SPARQL 1.1 syntax with correct prefixes
 Respond in JSON:
 {
@@ -3332,14 +3265,26 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
     // Generate SPARQL based on intent and schema
     if (intent.query || intent.compliance || intent.aggregate) {
-      const sparql = this._generateSchemaSparql(intent, schema, context)
-      steps.push({
-        id: stepId++,
-        tool: 'kg.sparql.query',
-        input_type: 'Query',
-        output_type: 'BindingSet',
-        args: { sparql }
-      })
+      let sparql = null
+      // Try schema-driven SPARQL generation first (fast, deterministic)
+      try {
+        sparql = this._generateSchemaSparql(intent, schema, context)
+      } catch (schemaErr) {
+        // Keyword matching failed - return empty steps (let LLM handle in call())
+        // This is NOT a fallback - complex queries go through LLM path
+        sparql = null
+      }
+      if (sparql) {
+        steps.push({
+          id: stepId++,
+          tool: 'kg.sparql.query',
+          input_type: 'Query',
+          output_type: 'BindingSet',
+          args: { sparql }
+        })
+      }
     }
     if (intent.pattern) {
@@ -3426,39 +3371,77 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
     }
     // ============================================================
-    // DELEGATE TO RUST: HyperFederate PredicateResolverService
-    //
-    // ALL query generation delegated to Rust. NO JavaScript fallbacks.
-    // The Rust PredicateResolver.generate_federated_sql() handles:
-    // - Entity resolution (NL → URI)
-    // - Predicate resolution (NL → schema morphism)
-    // - SPARQL pattern generation (category theory composition)
-    // - SQL wrapping with HyperFederate UDFs
+    // SCHEMA-DRIVEN SPARQL GENERATION
+    // Uses schema._predicateIndex built during extractSchema
+    // NO FALLBACKS - requires valid schema match
     // ============================================================
-    if (!schema._nativeResolver || typeof schema._nativeResolver.generateFederatedSql !== 'function') {
+    const predicateIndex = schema._predicateIndex || new Map()
+    const predicates = schema.predicates || []
+    if (predicates.length === 0) {
       throw new Error(JSON.stringify({
-        type: 'ConfigurationError',
-        message: 'Native HyperFederate resolver not configured.',
-        suggestion: 'Ensure schema is built with OlogSchema and PredicateResolverService initialized.',
-        recoverable: false
+        type: 'SchemaError',
+        message: 'No schema predicates available.',
+        suggestion: 'Load data with valid RDF predicates first.',
+        recoverable: true
       }))
     }
-    const result = JSON.parse(schema._nativeResolver.generateFederatedSql(prompt, limit))
-    context._federatedSqlResult = result
+    // Extract keywords from prompt
+    const keywords = extractKeywords(prompt, predicates)
+    // Find ALL matching predicates using tokenized comparison
+    const matches = []
+    for (const keyword of keywords) {
+      const kwLower = keyword.toLowerCase()
-    if (result.confidence < 0.3) {
-      context._resolutionWarning = {
-        type: 'LowConfidence',
-        confidence: result.confidence,
-        pattern: result.pattern,
-        resolved_predicates: result.resolved_predicates,
-        suggestion: 'Entities/predicates may not exist in schema. Check data population.'
+      // Direct index lookup (index has tokenized predicate names)
+      if (predicateIndex.has(kwLower)) {
+        matches.push({ predicate: predicateIndex.get(kwLower), score: 1.0, keyword })
+        continue
       }
+      // Token-based matching: tokenize predicate and check for substring/exact match
+      for (const pred of predicates) {
+        const localName = pred.split('/').pop().split('#').pop()
+        const tokens = tokenizeIdentifier ? tokenizeIdentifier(localName) : [localName.toLowerCase()]
+        for (const token of tokens) {
+          if (token === kwLower) {
+            matches.push({ predicate: pred, score: 1.0, keyword })
+            break
+          } else if (token.includes(kwLower) || kwLower.includes(token)) {
+            matches.push({ predicate: pred, score: 0.7, keyword })
+            break
+          }
+        }
+      }
+    }
+    if (matches.length === 0) {
+      throw new Error(JSON.stringify({
+        type: 'NoMatchError',
+        message: `No schema predicates match prompt: "${prompt}"`,
+        keywords,
+        availablePredicates: predicates.slice(0, 10),
+        suggestion: 'Rephrase query using predicates from schema.',
+        recoverable: true
+      }))
     }
-    return result.sparql
+    // Sort by score and get best match
+    matches.sort((a, b) => b.score - a.score)
+    const bestMatch = matches[0]
+    // Build SPARQL from schema predicate
+    const sparql = `SELECT ?subject ?object WHERE { ?subject <${bestMatch.predicate}> ?object } LIMIT ${limit}`
+    context._matchedPredicate = bestMatch.predicate
+    context._matchConfidence = bestMatch.score
+    context._matchKeyword = bestMatch.keyword
+    context._allMatches = matches.slice(0, 5)
+    return sparql
   }
   /**
@@ -4491,19 +4474,9 @@ class ThinkingReasoner {
     this.contextId = config.contextId || `thinking-${Date.now()}`
     this.actorId = config.actorId || 'hypermind-agent'
-    // NATIVE RUST DELEGATION: Use native ThinkingReasoner for real reasoning
-    // The JavaScript class is a thin wrapper - all heavy lifting in Rust
-    // NativeThinkingReasoner is imported at module level from the native binding
-    if (NativeThinkingReasoner) {
-      try {
-        this._native = new NativeThinkingReasoner()
-        this._hasNative = true
-      } catch (e) {
-        this._hasNative = false
-      }
-    } else {
-      this._hasNative = false
-    }
+    // Native reasoning not available in thin SDK
+    // All reasoning handled via SPARQL and schema-driven approach
+    this._hasNative = false
     // Fallback stores (only used if native not available)
     this.events = []
@@ -5774,7 +5747,37 @@ class HyperMindAgent {
     trace.addStep({ type: 'intent_classification', intent })
     // 3. Generate typed execution plan
-    const plan = this._generatePlan(intent, prompt)
+    let plan
+    try {
+      plan = this._generatePlan(intent, prompt)
+    } catch (planErr) {
+      // Schema-based SPARQL generation failed - try LLM for semantic understanding
+      // This is NOT a fallback, it's the proper path for complex queries
+      if (this.apiKey && this.planner && this.planner.model) {
+        const schema = this.planner._schemaCache || { predicates: [], classes: [] }
+        const llmResult = await this.planner._planWithLLM(prompt, schema, memories)
+        if (llmResult && llmResult.sparql) {
+          // Create plan with LLM-generated SPARQL
+          plan = {
+            id: `plan_llm_${Date.now()}`,
+            intent: llmResult.type || intent.type,
+            steps: [{
+              id: 1,
+              tool: 'kg.sparql.query',
+              args: { sparql: llmResult.sparql }
+            }],
+            type_chain: 'kg.sparql.query',
+            _llmGenerated: true
+          }
+          trace.addStep({ type: 'llm_sparql_generation', sparql: llmResult.sparql })
+        }
+      }
+      // If still no plan, throw the original error
+      if (!plan) {
+        throw planErr
+      }
+    }
     trace.addStep({ type: 'execution_plan', plan })
     // 4. Execute plan in WASM sandbox

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "rust-kgdb",
-  "version": "0.8.21",
+  "version": "0.8.23",
   "description": "High-performance RDF/SPARQL database with AI agent framework and cross-database federation. GraphDB (449ns lookups, 5-11x faster than RDFox), HyperFederate (KGDB + Snowflake + BigQuery), GraphFrames analytics, Datalog reasoning, HNSW vector embeddings. HyperMindAgent for schema-aware query generation with audit trails. W3C SPARQL 1.1 compliant. Native performance via Rust + NAPI-RS.",
   "main": "index.js",
   "types": "index.d.ts",

package/rust-kgdb-napi.darwin-arm64.node ADDED Viewed

Binary file

package/rust-kgdb-napi.darwin-x64.node ADDED Viewed

Binary file

package/rust-kgdb-napi.node CHANGED Viewed

Binary file