npm - rust-kgdb - Versions diffs - 0.6.40 → 0.6.43 - Mend

rust-kgdb 0.6.40 → 0.6.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/CHANGELOG.md +81 -0
package/README.md +69 -24
package/examples/quadstore-capabilities-demo.js +407 -0
package/hypermind-agent.js +432 -48
package/index.d.ts +28 -0
package/index.js +6 -0
package/package.json +2 -2
package/rust-kgdb-napi.darwin-x64.node +0 -0
package/vanilla-vs-hypermind-benchmark.js +164 -12

package/hypermind-agent.js CHANGED Viewed

@@ -13,6 +13,59 @@
  */
 const crypto = require('crypto')
+const os = require('os')
+// Native Rust FFI for predicate resolution (via NAPI-RS)
+// ALL predicate resolution happens in Rust - no JavaScript duplication
+// IMPORTANT: Load native binding directly to avoid circular dependency with index.js
+function loadNativeBindingDirect() {
+  const platform = os.platform()
+  const arch = os.arch()
+  let nativeBinding
+  if (platform === 'darwin') {
+    if (arch === 'x64') {
+      nativeBinding = require('./rust-kgdb-napi.darwin-x64.node')
+    } else if (arch === 'arm64') {
+      nativeBinding = require('./rust-kgdb-napi.darwin-arm64.node')
+    }
+  } else if (platform === 'linux') {
+    if (arch === 'x64') {
+      nativeBinding = require('./rust-kgdb-napi.linux-x64-gnu.node')
+    } else if (arch === 'arm64') {
+      nativeBinding = require('./rust-kgdb-napi.linux-arm64-gnu.node')
+    }
+  } else if (platform === 'win32' && arch === 'x64') {
+    nativeBinding = require('./rust-kgdb-napi.win32-x64-msvc.node')
+  }
+  if (!nativeBinding) {
+    throw new Error(`Unsupported platform: ${platform}-${arch}. Please contact support.`)
+  }
+  return nativeBinding
+}
+const native = loadNativeBindingDirect()
+const {
+  OlogSchema,
+  PredicateResolverService,
+  SchemaValidatorService,
+  computeSimilarity,
+  tokenizeIdentifier,
+  stemWord,
+  extractKeywords: nativeExtractKeywords
+} = native
+/**
+ * Extract keywords from natural language prompt using native Rust
+ * Delegates entirely to Rust KeywordExtractor - no JavaScript stop words
+ * @param {string} prompt - Natural language prompt
+ * @returns {string[]} Extracted keywords
+ */
+function extractKeywords(prompt) {
+  if (!prompt) return []
+  return nativeExtractKeywords(prompt)
+}
 // ============================================================================
 // CONFIGURATION - All tunable parameters (NO hardcoding)
@@ -1979,7 +2032,13 @@ class LLMPlanner {
   }
   /**
-   * Extract schema from knowledge graph
+   * Extract schema from knowledge graph with pagination
+   *
+   * Improvement over MCP YAML tools:
+   * - NO hard limits - extracts ALL predicates via pagination
+   * - Schema is used for deterministic query generation
+   * - Enables predicate ranking for accurate matching
+   *
    * @returns {Object} Schema with predicates, classes, examples
    */
   async extractSchema(forceRefresh = false) {
@@ -1990,33 +2049,124 @@ class LLMPlanner {
       return this._schemaCache
     }
-    const schema = { predicates: [], classes: [], examples: [], timestamp: new Date().toISOString() }
+    const schema = {
+      predicates: [],
+      classes: [],
+      examples: [],
+      timestamp: new Date().toISOString(),
+      extractionMethod: 'paginated'  // Track extraction method
+    }
+    const pageSize = CONFIG.schema.maxProperties || 500
     try {
-      // Get unique predicates
-      const predResults = this.kg.querySelect('SELECT DISTINCT ?p WHERE { ?s ?p ?o } LIMIT 200')
-      schema.predicates = predResults.map(r => r.bindings?.p || r.p).filter(Boolean)
-      // Get RDF types
-      const typeResults = this.kg.querySelect(`
-        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-        SELECT DISTINCT ?type WHERE { ?s rdf:type ?type } LIMIT 100
-      `)
-      schema.classes = typeResults.map(r => r.bindings?.type || r.type).filter(Boolean)
-      // Get sample triples
-      const sampleResults = this.kg.querySelect('SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 30')
+      // Extract predicates with pagination - NO hard limit
+      const predicateSet = new Set()
+      let offset = 0
+      let hasMore = true
+      while (hasMore) {
+        const query = `SELECT DISTINCT ?p WHERE { ?s ?p ?o } LIMIT ${pageSize} OFFSET ${offset}`
+        const results = this.kg.querySelect(query)
+        if (results.length === 0) {
+          hasMore = false
+        } else {
+          results.forEach(r => {
+            const pred = r.bindings?.p || r.p
+            if (pred) predicateSet.add(pred)
+          })
+          offset += pageSize
+          // Safety limit to prevent infinite loops on very large graphs
+          if (offset > 10000) {
+            hasMore = false
+            schema.truncated = true
+          }
+        }
+      }
+      schema.predicates = Array.from(predicateSet)
+      // Extract classes with pagination
+      const classSet = new Set()
+      offset = 0
+      hasMore = true
+      while (hasMore) {
+        const query = `
+          PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+          SELECT DISTINCT ?type WHERE { ?s rdf:type ?type } LIMIT ${pageSize} OFFSET ${offset}
+        `
+        const results = this.kg.querySelect(query)
+        if (results.length === 0) {
+          hasMore = false
+        } else {
+          results.forEach(r => {
+            const type = r.bindings?.type || r.type
+            if (type) classSet.add(type)
+          })
+          offset += pageSize
+          if (offset > 5000) {
+            hasMore = false
+          }
+        }
+      }
+      schema.classes = Array.from(classSet)
+      // Get sample triples for examples
+      const sampleResults = this.kg.querySelect(
+        `SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT ${CONFIG.schema.maxSamples || 30}`
+      )
       schema.examples = sampleResults.map(r => ({
         s: r.bindings?.s || r.s,
         p: r.bindings?.p || r.p,
         o: r.bindings?.o || r.o
       }))
+      // Initialize predicate resolver (native Rust or JS fallback)
+      const threshold = CONFIG.scoring?.similarityThreshold || 0.3
+      if (nativeResolver?.OlogSchema && nativeResolver?.PredicateResolverService) {
+        try {
+          // Build OlogSchema from extracted schema
+          const olog = new nativeResolver.OlogSchema()
+          olog.withNamespace('http://schema.org/')
+          // Add classes
+          for (const cls of (schema.classes || [])) {
+            try {
+              const localName = cls.split('/').pop().split('#').pop()
+              olog.addClass(localName)
+            } catch (e) { /* skip invalid class */ }
+          }
+          // Add properties with aliases extracted from local names
+          for (const prop of (schema.predicates || [])) {
+            try {
+              const localName = prop.split('/').pop().split('#').pop()
+              // Generate aliases from tokenized form
+              const tokens = nativeResolver.tokenizeIdentifier(localName)
+              const aliases = tokens.length > 1 ? [tokens.join(''), tokens.join('_')] : []
+              olog.addProperty(localName, 'Thing', 'Thing', aliases)
+            } catch (e) { /* skip invalid property */ }
+          }
+          olog.build()
+          schema._nativeResolver = new nativeResolver.PredicateResolverService(olog, threshold)
+          schema._nativeOlog = olog
+        } catch (e) {
+          // Fallback to JS ranker on error
+          schema._nativeResolver = null
+        }
+      }
     } catch (err) {
       schema.error = err.message
     }
     this._schemaCache = schema
-    this._schemaCacheExpiry = now + 5 * 60 * 1000  // 5 minute cache
+    this._schemaCacheExpiry = now + CONFIG.schema.cacheExpiryMs
     return schema
   }
@@ -2368,29 +2518,132 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
     return steps
   }
+  /**
+   * Generate SPARQL query using schema-aware predicate ranking
+   *
+   * Improvement over MCP YAML tools:
+   * - Uses ensemble similarity for predicate matching
+   * - NO hardcoded domain keywords
+   * - Validates predicates exist in schema before using
+   * - Returns query with confidence score
+   *
+   * @private
+   */
   _generateSchemaSparql(intent, schema, context) {
-    // Use schema-aware SPARQL generation
+    // Use explicit SPARQL if provided
     if (context.sparql) return context.sparql
-    // Check if schema has relevant predicates
     const predicates = schema.predicates || []
+    const prompt = context.originalPrompt || ''
+    // Aggregate queries don't need specific predicates
     if (intent.aggregate) {
       return 'SELECT (COUNT(*) as ?count) WHERE { ?s ?p ?o }'
     }
-    // Try to match predicates based on intent
-    const riskPreds = predicates.filter(p => p.toLowerCase().includes('risk') || p.toLowerCase().includes('score'))
-    const typePreds = predicates.filter(p => p.includes('type') || p.includes('Type'))
+    // Use ranker to find relevant predicates from prompt
+    const rankedPreds = this._findRelevantPredicatesRanked
+      ? this._findRelevantPredicatesRanked(prompt.toLowerCase(), predicates, { threshold: 0.3 })
+      : []
+    // If we have high-confidence predicate matches, use them
+    if (rankedPreds.length > 0 && rankedPreds[0].score >= 0.5) {
+      const bestPred = rankedPreds[0]
+      // Check if it looks like a numeric property (for ordering)
+      const localName = bestPred.localName || ''
+      const isNumeric = /score|amount|value|count|total|number|rank|rating|level|degree/i.test(localName)
+      if (isNumeric) {
+        return `SELECT ?s ?value WHERE { ?s <${bestPred.predicate}> ?value } ORDER BY DESC(?value) LIMIT ${CONFIG.query.defaultLimit}`
+      }
+      // Object property - return subject-object pairs
+      return `SELECT ?s ?o WHERE { ?s <${bestPred.predicate}> ?o } LIMIT ${CONFIG.query.defaultLimit}`
+    }
+    // If we have type-related predicates, use for class queries
+    if (intent.query || intent.compliance) {
+      const typePredsRanked = this._findRelevantPredicatesRanked
+        ? this._findRelevantPredicatesRanked('type class', predicates, { threshold: 0.4 })
+        : []
-    if (intent.pattern || intent.rank) {
-      if (riskPreds.length > 0) {
-        return `SELECT ?s ?score WHERE { ?s <${riskPreds[0]}> ?score } ORDER BY DESC(?score) LIMIT 100`
+      if (typePredsRanked.length > 0) {
+        return `SELECT ?s ?type WHERE { ?s <${typePredsRanked[0].predicate}> ?type } LIMIT ${CONFIG.query.defaultLimit}`
       }
     }
-    // Default: return all triples
-    return 'SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 100'
+    // Default: return sample triples
+    return `SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT ${CONFIG.query.defaultLimit}`
+  }
+  /**
+   * Validate that a SPARQL query only uses predicates from schema
+   *
+   * @param {string} sparql - SPARQL query string
+   * @param {Object} schema - Schema context with predicates
+   * @returns {Object} { valid: boolean, errors: [], warnings: [] }
+   */
+  _validateQueryPredicates(sparql, schema) {
+    const result = { valid: true, errors: [], warnings: [], predicatesUsed: [] }
+    if (!sparql || !schema?.predicates) return result
+    const predicateSet = new Set(schema.predicates)
+    // Extract URIs from query (simple regex - handles <uri> and prefix:local)
+    const uriPattern = /<([^>]+)>/g
+    let match
+    while ((match = uriPattern.exec(sparql)) !== null) {
+      const uri = match[1]
+      // Skip common RDF/RDFS/OWL URIs
+      if (uri.startsWith('http://www.w3.org/') ||
+          uri.startsWith('http://xmlns.com/') ||
+          uri.includes('rdf-syntax-ns') ||
+          uri.includes('rdf-schema')) {
+        continue
+      }
+      result.predicatesUsed.push(uri)
+      // Check if this predicate exists in schema
+      if (!predicateSet.has(uri) && !predicateSet.has(`<${uri}>`)) {
+        // Try fuzzy match using native Rust similarity (no JS fallback)
+        let bestMatch = null
+        let bestScore = 0.8  // threshold
+        {
+          const uriLocalName = uri.split('/').pop().split('#').pop().toLowerCase()
+          for (const pred of schema.predicates) {
+            const predLocalName = pred.split('/').pop().split('#').pop().toLowerCase()
+            const score = computeSimilarity(uriLocalName, predLocalName)
+            if (score > bestScore) {
+              bestScore = score
+              bestMatch = { predicate: pred, score }
+            }
+          }
+        }
+        if (bestMatch) {
+          result.warnings.push({
+            predicate: uri,
+            message: `Predicate not in schema. Did you mean: ${bestMatch.predicate}?`,
+            suggestion: bestMatch.predicate
+          })
+        } else {
+          result.warnings.push({
+            predicate: uri,
+            message: `Predicate not found in schema: ${uri}`
+          })
+        }
+      }
+    }
+    // If we have errors (strict mode), mark as invalid
+    if (result.errors.length > 0) {
+      result.valid = false
+    }
+    return result
   }
   // ============================================================================
@@ -2503,42 +2756,173 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
   /**
    * Find predicates from schema that match the text intent
+   *
+   * Improvement over MCP YAML tools:
+   * - NO hardcoded domain mappings (works with ANY ontology)
+   * - Uses ensemble similarity (Jaro-Winkler, N-gram, token overlap)
+   * - Returns RANKED matches with confidence scores
+   * - Generic: same algorithm works for LUBM, fraud, social, etc.
+   *
    * @private
+   * @param {string} textLower - Natural language text (lowercase)
+   * @param {string[]} predicates - Schema predicates
+   * @param {Object} options - Options { threshold, maxResults }
+   * @returns {Array} Ranked predicates with scores
    */
-  _findRelevantPredicates(textLower, predicates) {
-    const keywords = textLower.split(/\s+/)
-    const matches = []
+  _findRelevantPredicates(textLower, predicates, options = {}) {
+    if (!predicates || predicates.length === 0) return []
+    const threshold = options.threshold ?? CONFIG.scoring?.similarityThreshold ?? 0.3
+    const maxResults = options.maxResults ?? 5
+    // Extract meaningful keywords (generic - no domain-specific stopwords)
+    const keywords = extractKeywords(textLower)
+    if (keywords.length === 0) return []
+    // Use native Rust similarity with stemming and tokenization
+    // Multi-method ranking: direct + stemmed + token-based
+    const allMatches = new Map()  // predicate -> { predicate, score }
+    for (const keyword of keywords) {
+      // Stem the keyword once
+      const stemmedKeyword = stemWord(keyword)
+      for (const pred of predicates) {
+        // Extract local name from predicate URI
+        const localName = pred.split('/').pop().split('#').pop()
+        const localNameLower = localName.toLowerCase()
+        // Method 1: Direct string similarity
+        const directScore = computeSimilarity(keyword, localNameLower)
+        // Method 2: Stemmed similarity
+        const stemmedLocalName = stemWord(localNameLower)
+        const stemmedScore = computeSimilarity(stemmedKeyword, stemmedLocalName)
+        // Method 3: Token-based matching (CamelCase/snake_case decomposition)
+        const tokens = tokenizeIdentifier(localName)
+        let tokenScore = 0
+        for (const token of tokens) {
+          const tokenLower = token.toLowerCase()
+          const directTokenScore = computeSimilarity(keyword, tokenLower)
+          const stemmedTokenScore = computeSimilarity(stemmedKeyword, stemWord(tokenLower))
+          tokenScore = Math.max(tokenScore, directTokenScore, stemmedTokenScore)
+        }
-    // Pattern-specific keyword mappings
-    const keywordMappings = {
-      payment: ['transfer', 'paid', 'pay', 'payment', 'amount', 'transaction'],
-      fraud: ['claim', 'risk', 'flag', 'suspicious', 'alert'],
-      social: ['knows', 'friend', 'follows', 'connected', 'related'],
-      org: ['works', 'manages', 'reports', 'employs', 'member'],
-      product: ['purchase', 'buy', 'order', 'sell', 'owns']
+        // Take the best score from all methods
+        const bestScore = Math.max(directScore, stemmedScore, tokenScore)
+        if (bestScore >= threshold) {
+          const existing = allMatches.get(pred)
+          if (!existing || bestScore > existing.score) {
+            allMatches.set(pred, { predicate: pred, score: bestScore, localName })
+          }
+        }
+      }
     }
+    // Also try full text match (for compound queries)
     for (const pred of predicates) {
-      const predLower = pred.toLowerCase()
+      const localName = pred.split('/').pop().split('#').pop()
+      const localNameLower = localName.toLowerCase()
-      // Direct match
-      if (keywords.some(kw => predLower.includes(kw) || kw.includes(predLower))) {
-        matches.push(pred)
-        continue
+      // Direct full text
+      const directScore = computeSimilarity(textLower, localNameLower)
+      // Stemmed full text
+      const stemmedText = textLower.split(/\s+/).map(w => stemWord(w)).join(' ')
+      const stemmedLocal = stemWord(localNameLower)
+      const stemmedScore = computeSimilarity(stemmedText, stemmedLocal)
+      const bestScore = Math.max(directScore, stemmedScore)
+      if (bestScore >= threshold) {
+        const existing = allMatches.get(pred)
+        if (!existing || bestScore > existing.score) {
+          allMatches.set(pred, { predicate: pred, score: bestScore, localName })
+        }
       }
+    }
-      // Keyword mapping match
-      for (const [category, mappedWords] of Object.entries(keywordMappings)) {
-        if (keywords.some(kw => category.includes(kw) || kw.includes(category))) {
-          if (mappedWords.some(mw => predLower.includes(mw))) {
-            matches.push(pred)
-            break
+    // Sort by score and return top matches
+    const sorted = Array.from(allMatches.values())
+      .sort((a, b) => b.score - a.score)
+      .slice(0, maxResults)
+    // Return just predicate URIs for backward compatibility
+    // (callers expect string[] not object[])
+    return sorted.map(m => m.predicate)
+  }
+  /**
+   * Find predicates with full ranking info (for advanced use)
+   * Uses native Rust ensemble similarity with stemming and tokenization
+   *
+   * Algorithm:
+   * 1. Direct similarity: keyword vs localName (Jaro-Winkler + Levenshtein + N-gram)
+   * 2. Stemmed similarity: stem(keyword) vs stem(localName) - handles "professor" → "profess"
+   * 3. Token similarity: keyword vs each token of CamelCase/snake_case name
+   *
+   * Final score = max(direct, stemmed, tokenMatch) - takes best match method
+   *
+   * @private
+   */
+  _findRelevantPredicatesRanked(textLower, predicates, options = {}) {
+    if (!predicates || predicates.length === 0) return []
+    const threshold = options.threshold ?? CONFIG.scoring?.similarityThreshold ?? 0.3
+    const keywords = extractKeywords(textLower)
+    // Use native Rust similarity with stemming and tokenization
+    const allMatches = new Map()
+    for (const keyword of keywords) {
+      // Stem the keyword once
+      const stemmedKeyword = stemWord(keyword)
+      for (const pred of predicates) {
+        const localName = pred.split('/').pop().split('#').pop()
+        const localNameLower = localName.toLowerCase()
+        // Method 1: Direct string similarity
+        const directScore = computeSimilarity(keyword, localNameLower)
+        // Method 2: Stemmed similarity (handles "professor" vs "fullProfessor")
+        const stemmedLocalName = stemWord(localNameLower)
+        const stemmedScore = computeSimilarity(stemmedKeyword, stemmedLocalName)
+        // Method 3: Token-based matching (CamelCase/snake_case decomposition)
+        // "fullProfessor" → ["full", "professor"]
+        const tokens = tokenizeIdentifier(localName)
+        let tokenScore = 0
+        for (const token of tokens) {
+          const tokenLower = token.toLowerCase()
+          const directTokenScore = computeSimilarity(keyword, tokenLower)
+          const stemmedTokenScore = computeSimilarity(stemmedKeyword, stemWord(tokenLower))
+          tokenScore = Math.max(tokenScore, directTokenScore, stemmedTokenScore)
+        }
+        // Take the best score from all methods
+        const bestScore = Math.max(directScore, stemmedScore, tokenScore)
+        if (bestScore >= threshold) {
+          const existing = allMatches.get(pred)
+          if (!existing || bestScore > existing.score) {
+            allMatches.set(pred, {
+              predicate: pred,
+              score: bestScore,
+              localName,
+              matchMethod: bestScore === directScore ? 'direct' :
+                          bestScore === stemmedScore ? 'stemmed' : 'token',
+              tokens
+            })
           }
         }
       }
     }
-    return matches
+    return Array.from(allMatches.values())
+      .sort((a, b) => b.score - a.score)
   }
   /**

package/index.d.ts CHANGED Viewed

@@ -2170,3 +2170,31 @@ export function tokenizeIdentifier(identifier: string): string[]
  * ```
  */
 export function stemWord(word: string): string
+/**
+ * Extract keywords from natural language text.
+ *
+ * Uses tokenization without hardcoded stop words.
+ * Ensemble similarity scoring naturally downweights generic words.
+ *
+ * Reference: Native Rust implementation in hypermind-tools
+ *
+ * @param text - Natural language text
+ * @returns Array of extracted keywords
+ *
+ * @example
+ * ```typescript
+ * extractKeywords('Find all teachers')           // ['find', 'teachers']
+ * extractKeywords('Get student email addresses') // ['student', 'email', 'addresses']
+ * ```
+ */
+export function extractKeywords(text: string): string[]
+// =============================================================================
+// NOTE: Query Memory Store, Hybrid Reranker, and Trigger System
+// have been moved to Rust core accessed via HyperAgentProxy/WASM runtime.
+// SDK remains thin - heavy logic stays in Rust core.
+// See: crates/hypermind-runtime/src/memory/query_store.rs
+// See: crates/hypermind-runtime/src/memory/reranker.rs
+// See: crates/embeddings/src/trigger/
+// =============================================================================

package/index.js CHANGED Viewed

@@ -59,6 +59,9 @@ const {
   computeSimilarity,
   tokenizeIdentifier,
   stemWord,
+  extractKeywords,
+  // NOTE: QueryMemoryStore, HybridReranker, TriggerManager moved to Rust core
+  // Access via HyperAgentProxy/WASM runtime (SDK remains thin)
 } = loadNativeBinding()
 // HyperMind Agentic Framework
@@ -178,4 +181,7 @@ module.exports = {
   computeSimilarity,        // Ensemble string similarity
   tokenizeIdentifier,       // CamelCase/snake_case tokenization
   stemWord,                 // Porter Stemmer
+  extractKeywords,          // Keyword extraction from natural language
+  // NOTE: QueryMemoryStore, HybridReranker, TriggerManager moved to Rust core
+  // Access via HyperAgentProxy/WASM runtime (SDK remains thin)
 }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "rust-kgdb",
-  "version": "0.6.40",
-  "description": "Neuro-Symbolic AI Framework: 85.7% accuracy on LUBM benchmark (+14.3pp over schema injection alone). Schema-aware predicate resolution using grammar-based parsing. Features: GraphDB (449ns lookups, 2.2M ops/sec, 156K inserts/sec), HyperMindAgent with audit trail, Datalog reasoning, GraphFrames analytics. W3C SPARQL 1.1 compliant. Benchmarked on Intel i9-9980HK with BSBM/LDBC methodology.",
+  "version": "0.6.43",
+  "description": "High-performance RDF/SPARQL database with AI agent framework. GraphDB (449ns lookups, 35x faster than RDFox), GraphFrames analytics (PageRank, motifs), Datalog reasoning, HNSW vector embeddings. HyperMindAgent for schema-aware query generation with audit trails. W3C SPARQL 1.1 compliant. Native performance via Rust + NAPI-RS.",
   "main": "index.js",
   "types": "index.d.ts",
   "napi": {

package/rust-kgdb-napi.darwin-x64.node CHANGED Viewed

Binary file