npm - rust-kgdb - Versions diffs - 0.8.22 → 0.8.23 - Mend

rust-kgdb 0.8.22 → 0.8.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/hypermind-agent.js +161 -154
package/package.json +1 -1
package/rust-kgdb-napi.darwin-arm64.node +0 -0
package/rust-kgdb-napi.darwin-x64.node +0 -0
package/rust-kgdb-napi.node +0 -0

package/hypermind-agent.js CHANGED Viewed

@@ -47,27 +47,41 @@ function loadNativeBindingDirect() {
 const native = loadNativeBindingDirect()
-// Native Rust bindings - SDK is THIN, all logic lives in Rust
+// Native Rust bindings - SDK is THIN, only pure functions
+// GraphDB handles all RDF/SPARQL operations
+// tokenizeIdentifier and computeSimilarity are pure utility functions
 const {
-  OlogSchema,
-  PredicateResolverService,
-  SchemaValidatorService,
-  ThinkingReasoner: NativeThinkingReasoner,
-  computeSimilarity,
   tokenizeIdentifier,
-  stemWord,
-  extractKeywords: nativeExtractKeywords
+  computeSimilarity
 } = native
 /**
- * Extract keywords from natural language prompt using native Rust
- * Delegates entirely to Rust KeywordExtractor - no JavaScript stop words
+ * Extract keywords from natural language prompt
+ * Uses schema predicates for domain-aware extraction
  * @param {string} prompt - Natural language prompt
+ * @param {string[]} schemaPredicates - Optional schema predicates for domain hints
  * @returns {string[]} Extracted keywords
  */
-function extractKeywords(prompt) {
+function extractKeywords(prompt, schemaPredicates = []) {
   if (!prompt) return []
-  return nativeExtractKeywords(prompt)
+  // Split on whitespace and filter short words
+  const words = prompt.toLowerCase()
+    .replace(/[^\w\s]/g, ' ')
+    .split(/\s+/)
+    .filter(w => w.length > 2)
+  // If schema predicates provided, boost domain-relevant keywords
+  if (schemaPredicates.length > 0) {
+    const predicateWords = new Set()
+    for (const pred of schemaPredicates) {
+      const tokens = tokenizeIdentifier ? tokenizeIdentifier(pred.split('/').pop().split('#').pop()) : []
+      tokens.forEach(t => predicateWords.add(t.toLowerCase()))
+    }
+    // Return words that match schema or are content words
+    return words.filter(w => predicateWords.has(w) || w.length > 3)
+  }
+  return words
 }
 // ============================================================================
@@ -2938,100 +2952,18 @@ class LLMPlanner {
         o: r.bindings?.o || r.o
       }))
-      // Initialize predicate resolver (native Rust - NO JavaScript fallback per NO FALLBACKS principle)
-      const threshold = CONFIG.scoring?.similarityThreshold || 0.3
-      if (native?.OlogSchema && native?.PredicateResolverService) {
-        try {
-          // Build OlogSchema from extracted schema
-          const olog = new native.OlogSchema()
-          olog.withNamespace('http://schema.org/')
-          // Add classes
-          for (const cls of (schema.classes || [])) {
-            try {
-              const localName = cls.split('/').pop().split('#').pop()
-              olog.addClass(localName)
-            } catch (e) { /* skip invalid class */ }
-          }
-          // Add properties with aliases extracted from local names
-          for (const prop of (schema.predicates || [])) {
-            try {
-              const localName = prop.split('/').pop().split('#').pop()
-              // Generate aliases from tokenized form
-              const tokens = native.tokenizeIdentifier(localName)
-              const aliases = tokens.length > 1 ? [tokens.join(''), tokens.join('_')] : []
-              olog.addProperty(prop, 'Thing', 'Thing', [localName, ...aliases])
-            } catch (e) { /* skip invalid property */ }
-          }
-          olog.build()
-          // ============================================================
-          // ENTITY RESOLUTION: Populate Olog with entities from RDF data
-          // This enables NL entity references like "Fifth Amendment" to
-          // resolve to canonical URIs like "legal:FifthAmendment"
-          //
-          // CRITICAL FIX (2025-12-23): Use SchemaContext's extracted entities
-          // The entities extracted with Strategy 6 (rdfs:label) are now used
-          // to populate the Olog's label_to_entity map for O(1) lookup.
-          // ============================================================
-          try {
-            let entityCount = 0
-            // PRIMARY: Use SchemaContext entities (extracted via Strategy 6)
-            // These are the entities with rdfs:label that we need for resolution
-            // Use this._schemaContext if available (from getSchemaContext())
-            const schemaCtx = this._schemaContext || await this.getSchemaContext?.()
-            if (schemaCtx && schemaCtx.entities && schemaCtx.entities.size > 0) {
-              const entityTriples = []
-              for (const [uri, info] of schemaCtx.entities) {
-                // Create triples for entity labels
-                if (info.label) {
-                  entityTriples.push([uri, 'http://www.w3.org/2000/01/rdf-schema#label', info.label])
-                }
-                if (info.type) {
-                  entityTriples.push([uri, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', info.type])
-                }
-              }
-              if (entityTriples.length > 0 && olog.populateEntitiesFromTriples) {
-                entityCount = olog.populateEntitiesFromTriples(JSON.stringify(entityTriples))
-                schema._entityCount = entityCount
-                schema._entitySource = 'schemaContext'
-              }
-            }
-            // FALLBACK: Query triples if SchemaContext has no entities
-            if (entityCount === 0 && this.kg && typeof this.kg.querySelect === 'function') {
-              const allTriples = this.kg.querySelect('SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10000')
-              if (allTriples && allTriples.length > 0) {
-                // Convert to triple array format expected by Rust
-                const triplesArray = allTriples.map(result => [
-                  result.bindings?.s || result.bindings?.subject || '',
-                  result.bindings?.p || result.bindings?.predicate || '',
-                  result.bindings?.o || result.bindings?.object || ''
-                ]).filter(t => t[0] && t[1] && t[2])
-                // Populate entities in Olog using Rust extraction
-                if (triplesArray.length > 0 && olog.populateEntitiesFromTriples) {
-                  entityCount = olog.populateEntitiesFromTriples(JSON.stringify(triplesArray))
-                  schema._entityCount = entityCount
-                  schema._entitySource = 'triples'
-                }
-              }
-            }
-          } catch (entityErr) {
-            // Entity extraction is optional - continue without it
-            schema._entityExtractionError = entityErr.message
+      // Build predicate index for fast keyword→URI lookup
+      // NO STATE - computed fresh from schema.predicates
+      schema._predicateIndex = new Map()
+      for (const pred of (schema.predicates || [])) {
+        const localName = pred.split('/').pop().split('#').pop()
+        const tokens = tokenizeIdentifier ? tokenizeIdentifier(localName) : [localName.toLowerCase()]
+        // Index by local name and tokens
+        schema._predicateIndex.set(localName.toLowerCase(), pred)
+        for (const token of tokens) {
+          if (token.length > 2) {
+            schema._predicateIndex.set(token.toLowerCase(), pred)
           }
-          schema._nativeResolver = new native.PredicateResolverService(olog, threshold)
-          schema._nativeOlog = olog
-        } catch (e) {
-          // NO FALLBACKS - propagate error with context
-          console.error('[extractSchema] Native resolver initialization failed:', e.message)
-          schema._nativeResolverError = e.message
-          schema._nativeResolver = null
         }
       }
@@ -3210,10 +3142,15 @@ ${schemaText}
 ${memoryText}
 RULES:
-- ONLY use predicates from the schema above
-- NEVER invent predicate names
-- If schema doesn't match user's request, set intent to "schema_mismatch"
-- Use proper SPARQL syntax
+- Use predicates from the schema to construct SPARQL queries
+- For pattern queries (fraud rings, collusion, networks, relationships):
+  - Map semantic intent to relationship predicates (e.g., 'knows', 'referredBy', 'claimsWith')
+  - Generate triangle/cycle patterns: ?a :knows ?b . ?b :knows ?c . ?c :knows ?a
+  - A "fraud ring" = entities connected in cycles via relationship predicates
+- For risk queries, use 'riskScore' with FILTER (e.g., FILTER(?score > 0.7))
+- For similarity queries, look for shared attributes (same address, overlapping claims)
+- Always return valid SPARQL using actual schema predicates
+- Use proper SPARQL 1.1 syntax with correct prefixes
 Respond in JSON:
 {
@@ -3328,14 +3265,26 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
     // Generate SPARQL based on intent and schema
     if (intent.query || intent.compliance || intent.aggregate) {
-      const sparql = this._generateSchemaSparql(intent, schema, context)
-      steps.push({
-        id: stepId++,
-        tool: 'kg.sparql.query',
-        input_type: 'Query',
-        output_type: 'BindingSet',
-        args: { sparql }
-      })
+      let sparql = null
+      // Try schema-driven SPARQL generation first (fast, deterministic)
+      try {
+        sparql = this._generateSchemaSparql(intent, schema, context)
+      } catch (schemaErr) {
+        // Keyword matching failed - return empty steps (let LLM handle in call())
+        // This is NOT a fallback - complex queries go through LLM path
+        sparql = null
+      }
+      if (sparql) {
+        steps.push({
+          id: stepId++,
+          tool: 'kg.sparql.query',
+          input_type: 'Query',
+          output_type: 'BindingSet',
+          args: { sparql }
+        })
+      }
     }
     if (intent.pattern) {
@@ -3422,39 +3371,77 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
     }
     // ============================================================
-    // DELEGATE TO RUST: HyperFederate PredicateResolverService
-    //
-    // ALL query generation delegated to Rust. NO JavaScript fallbacks.
-    // The Rust PredicateResolver.generate_federated_sql() handles:
-    // - Entity resolution (NL → URI)
-    // - Predicate resolution (NL → schema morphism)
-    // - SPARQL pattern generation (category theory composition)
-    // - SQL wrapping with HyperFederate UDFs
+    // SCHEMA-DRIVEN SPARQL GENERATION
+    // Uses schema._predicateIndex built during extractSchema
+    // NO FALLBACKS - requires valid schema match
     // ============================================================
-    if (!schema._nativeResolver || typeof schema._nativeResolver.generateFederatedSql !== 'function') {
+    const predicateIndex = schema._predicateIndex || new Map()
+    const predicates = schema.predicates || []
+    if (predicates.length === 0) {
       throw new Error(JSON.stringify({
-        type: 'ConfigurationError',
-        message: 'Native HyperFederate resolver not configured.',
-        suggestion: 'Ensure schema is built with OlogSchema and PredicateResolverService initialized.',
-        recoverable: false
+        type: 'SchemaError',
+        message: 'No schema predicates available.',
+        suggestion: 'Load data with valid RDF predicates first.',
+        recoverable: true
       }))
     }
-    const result = JSON.parse(schema._nativeResolver.generateFederatedSql(prompt, limit))
-    context._federatedSqlResult = result
+    // Extract keywords from prompt
+    const keywords = extractKeywords(prompt, predicates)
+    // Find ALL matching predicates using tokenized comparison
+    const matches = []
+    for (const keyword of keywords) {
+      const kwLower = keyword.toLowerCase()
+      // Direct index lookup (index has tokenized predicate names)
+      if (predicateIndex.has(kwLower)) {
+        matches.push({ predicate: predicateIndex.get(kwLower), score: 1.0, keyword })
+        continue
+      }
-    if (result.confidence < 0.3) {
-      context._resolutionWarning = {
-        type: 'LowConfidence',
-        confidence: result.confidence,
-        pattern: result.pattern,
-        resolved_predicates: result.resolved_predicates,
-        suggestion: 'Entities/predicates may not exist in schema. Check data population.'
+      // Token-based matching: tokenize predicate and check for substring/exact match
+      for (const pred of predicates) {
+        const localName = pred.split('/').pop().split('#').pop()
+        const tokens = tokenizeIdentifier ? tokenizeIdentifier(localName) : [localName.toLowerCase()]
+        for (const token of tokens) {
+          if (token === kwLower) {
+            matches.push({ predicate: pred, score: 1.0, keyword })
+            break
+          } else if (token.includes(kwLower) || kwLower.includes(token)) {
+            matches.push({ predicate: pred, score: 0.7, keyword })
+            break
+          }
+        }
       }
     }
-    return result.sparql
+    if (matches.length === 0) {
+      throw new Error(JSON.stringify({
+        type: 'NoMatchError',
+        message: `No schema predicates match prompt: "${prompt}"`,
+        keywords,
+        availablePredicates: predicates.slice(0, 10),
+        suggestion: 'Rephrase query using predicates from schema.',
+        recoverable: true
+      }))
+    }
+    // Sort by score and get best match
+    matches.sort((a, b) => b.score - a.score)
+    const bestMatch = matches[0]
+    // Build SPARQL from schema predicate
+    const sparql = `SELECT ?subject ?object WHERE { ?subject <${bestMatch.predicate}> ?object } LIMIT ${limit}`
+    context._matchedPredicate = bestMatch.predicate
+    context._matchConfidence = bestMatch.score
+    context._matchKeyword = bestMatch.keyword
+    context._allMatches = matches.slice(0, 5)
+    return sparql
   }
   /**
@@ -4487,19 +4474,9 @@ class ThinkingReasoner {
     this.contextId = config.contextId || `thinking-${Date.now()}`
     this.actorId = config.actorId || 'hypermind-agent'
-    // NATIVE RUST DELEGATION: Use native ThinkingReasoner for real reasoning
-    // The JavaScript class is a thin wrapper - all heavy lifting in Rust
-    // NativeThinkingReasoner is imported at module level from the native binding
-    if (NativeThinkingReasoner) {
-      try {
-        this._native = new NativeThinkingReasoner()
-        this._hasNative = true
-      } catch (e) {
-        this._hasNative = false
-      }
-    } else {
-      this._hasNative = false
-    }
+    // Native reasoning not available in thin SDK
+    // All reasoning handled via SPARQL and schema-driven approach
+    this._hasNative = false
     // Fallback stores (only used if native not available)
     this.events = []
@@ -5770,7 +5747,37 @@ class HyperMindAgent {
     trace.addStep({ type: 'intent_classification', intent })
     // 3. Generate typed execution plan
-    const plan = this._generatePlan(intent, prompt)
+    let plan
+    try {
+      plan = this._generatePlan(intent, prompt)
+    } catch (planErr) {
+      // Schema-based SPARQL generation failed - try LLM for semantic understanding
+      // This is NOT a fallback, it's the proper path for complex queries
+      if (this.apiKey && this.planner && this.planner.model) {
+        const schema = this.planner._schemaCache || { predicates: [], classes: [] }
+        const llmResult = await this.planner._planWithLLM(prompt, schema, memories)
+        if (llmResult && llmResult.sparql) {
+          // Create plan with LLM-generated SPARQL
+          plan = {
+            id: `plan_llm_${Date.now()}`,
+            intent: llmResult.type || intent.type,
+            steps: [{
+              id: 1,
+              tool: 'kg.sparql.query',
+              args: { sparql: llmResult.sparql }
+            }],
+            type_chain: 'kg.sparql.query',
+            _llmGenerated: true
+          }
+          trace.addStep({ type: 'llm_sparql_generation', sparql: llmResult.sparql })
+        }
+      }
+      // If still no plan, throw the original error
+      if (!plan) {
+        throw planErr
+      }
+    }
     trace.addStep({ type: 'execution_plan', plan })
     // 4. Execute plan in WASM sandbox

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "rust-kgdb",
-  "version": "0.8.22",
+  "version": "0.8.23",
   "description": "High-performance RDF/SPARQL database with AI agent framework and cross-database federation. GraphDB (449ns lookups, 5-11x faster than RDFox), HyperFederate (KGDB + Snowflake + BigQuery), GraphFrames analytics, Datalog reasoning, HNSW vector embeddings. HyperMindAgent for schema-aware query generation with audit trails. W3C SPARQL 1.1 compliant. Native performance via Rust + NAPI-RS.",
   "main": "index.js",
   "types": "index.d.ts",

package/rust-kgdb-napi.darwin-arm64.node CHANGED Viewed

Binary file

package/rust-kgdb-napi.darwin-x64.node CHANGED Viewed

Binary file

package/rust-kgdb-napi.node CHANGED Viewed

Binary file