npm - rust-kgdb - Versions diffs - 0.6.40 → 0.6.42 - Mend

rust-kgdb 0.6.40 → 0.6.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/CHANGELOG.md +65 -0
package/README.md +32 -14
package/examples/quadstore-capabilities-demo.js +407 -0
package/hypermind-agent.js +432 -48
package/index.d.ts +28 -0
package/index.js +6 -0
package/package.json +2 -2
package/rust-kgdb-napi.darwin-x64.node +0 -0
package/vanilla-vs-hypermind-benchmark.js +164 -12

package/vanilla-vs-hypermind-benchmark.js CHANGED Viewed

@@ -24,10 +24,11 @@ const HARD_TEST_SUITE = [
   {
     id: 'A1',
     category: 'ambiguous',
-    question: 'Find all teachers',  // LUBM uses "teacherOf" not "teacher"
-    trap: 'Vanilla might use ub:teacher (wrong) instead of ub:teacherOf',
+    question: 'Find all teachers',  // LUBM uses "teacherOf" or "Professor" class
+    trap: 'Vanilla might use ub:teacher (wrong) instead of ub:teacherOf or ub:Professor',
     correctPattern: 'teacherOf',
-    wrongPatterns: ['teacher', 'teaches', 'instructor']
+    alternateCorrect: 'Professor',  // Professor class is also valid for "teachers"
+    wrongPatterns: ['teaches', 'instructor']  // removed 'teacher' - variable names OK
   },
   {
     id: 'A2',
@@ -188,6 +189,9 @@ async function callVanillaLLM(model, question) {
       })
     })
     const data = JSON.parse(response.data)
+    if (data.error) {
+      throw new Error(`OpenAI: ${data.error.message}`)
+    }
     return data.choices[0].message.content.trim()
   }
 }
@@ -248,10 +252,36 @@ OUTPUT FORMAT:
       })
     })
     const data = JSON.parse(response.data)
+    if (data.error) {
+      throw new Error(`OpenAI: ${data.error.message}`)
+    }
     return data.choices[0].message.content.trim()
   }
 }
+/**
+ * Extract predicates from SPARQL query (not variables)
+ * Returns array of predicate local names from ub: prefix and full URIs
+ */
+function extractPredicates(query) {
+  const predicates = []
+  // Match ub:predicate patterns (not after ?)
+  const ubPattern = /(?<!\?)\bub:([a-zA-Z]+)/g
+  let match
+  while ((match = ubPattern.exec(query)) !== null) {
+    predicates.push(match[1])
+  }
+  // Match full URI predicates in angle brackets
+  const uriPattern = /<http:\/\/[^>]*#([a-zA-Z]+)>/g
+  while ((match = uriPattern.exec(query)) !== null) {
+    predicates.push(match[1])
+  }
+  return predicates
+}
 /**
  * Analyze query for issues
  */
@@ -269,10 +299,17 @@ function analyzeQuery(query, test) {
     issues.push('Contains explanation text')
   }
-  // Check for wrong patterns (ambiguous tests)
+  // Check for wrong predicates in actual triple patterns (not variables)
   if (test.wrongPatterns) {
+    const predicates = extractPredicates(query)
     for (const wrong of test.wrongPatterns) {
-      if (queryLower.includes(wrong.toLowerCase()) && !queryLower.includes(test.correctPattern.toLowerCase())) {
+      // Check if wrong predicate is used AND neither correct nor alternate is present
+      const usesWrong = predicates.some(p => p.toLowerCase() === wrong.toLowerCase())
+      const usesCorrect = predicates.some(p => p.toLowerCase() === test.correctPattern.toLowerCase())
+      const usesAlternate = test.alternateCorrect
+        ? predicates.some(p => p.toLowerCase() === test.alternateCorrect.toLowerCase())
+        : false
+      if (usesWrong && !usesCorrect && !usesAlternate) {
         issues.push(`Used wrong predicate: ${wrong} instead of ${test.correctPattern}`)
       }
     }
@@ -280,8 +317,10 @@ function analyzeQuery(query, test) {
   // Check for required predicates (multi-hop tests)
   if (test.requiredPredicates) {
+    const predicates = extractPredicates(query)
     for (const pred of test.requiredPredicates) {
-      if (!queryLower.includes(pred.toLowerCase())) {
+      const hasIt = predicates.some(p => p.toLowerCase() === pred.toLowerCase())
+      if (!hasIt && !queryLower.includes(pred.toLowerCase())) {
         issues.push(`Missing required predicate: ${pred}`)
       }
     }
@@ -307,10 +346,12 @@ function analyzeQuery(query, test) {
     }
   }
-  // Check mustNotContain
+  // Check mustNotContain (use word boundary to avoid false positives like WHERE matching Here)
   if (test.mustNotContain) {
     for (const mustNot of test.mustNotContain) {
-      if (query.toLowerCase().includes(mustNot.toLowerCase())) {
+      // Use word boundary regex - match whole word only
+      const regex = new RegExp(`\\b${mustNot}\\b`, 'i')
+      if (regex.test(query)) {
         issues.push(`Contains forbidden: ${mustNot}`)
       }
     }
@@ -319,6 +360,86 @@ function analyzeQuery(query, test) {
   return issues
 }
+/**
+ * Load native Rust functions for predicate correction
+ * Use direct native require to avoid circular dependency with index.js
+ */
+let computeSimilarity, tokenizeIdentifier, stemWord
+try {
+  const os = require('os')
+  const platform = os.platform()
+  const arch = os.arch()
+  const nativePath = platform === 'darwin' && arch === 'arm64'
+    ? './rust-kgdb-napi.darwin-arm64.node'
+    : platform === 'darwin'
+      ? './rust-kgdb-napi.darwin-x64.node'
+      : './rust-kgdb-napi.linux-x64-gnu.node'
+  const native = require(nativePath)
+  computeSimilarity = native.computeSimilarity
+  tokenizeIdentifier = native.tokenizeIdentifier
+  stemWord = native.stemWord
+} catch (e) {
+  // Test-only fallback - simple string matching
+  computeSimilarity = (a, b) => a === b ? 1.0 : 0.0
+  tokenizeIdentifier = (s) => [s]
+  stemWord = (s) => s
+}
+// LUBM schema predicates (from schema context)
+const LUBM_PREDICATES = [
+  'worksFor', 'memberOf', 'advisor', 'takesCourse', 'teacherOf',
+  'publicationAuthor', 'subOrganizationOf', 'researchInterest', 'name',
+  'emailAddress', 'telephone', 'degreeFrom', 'headOf'
+]
+/**
+ * Correct predicates using native Rust similarity (simple, no bloat)
+ */
+function correctPredicates(query) {
+  let corrected = query
+  // Find ub: prefixed predicates
+  const predPattern = /ub:([a-zA-Z]+)/g
+  let match
+  while ((match = predPattern.exec(query)) !== null) {
+    const usedPred = match[1]
+    // Check if it's already a valid predicate
+    if (LUBM_PREDICATES.includes(usedPred)) continue
+    // Find best match using native Rust similarity
+    let bestMatch = null
+    let bestScore = 0.6  // minimum threshold
+    for (const schemaPred of LUBM_PREDICATES) {
+      // Direct similarity
+      const directScore = computeSimilarity(usedPred.toLowerCase(), schemaPred.toLowerCase())
+      // Token-based matching (e.g., "teacher" matches "teacherOf" via token)
+      const tokens = tokenizeIdentifier(schemaPred)
+      let tokenScore = 0
+      for (const token of tokens) {
+        const score = computeSimilarity(usedPred.toLowerCase(), token.toLowerCase())
+        tokenScore = Math.max(tokenScore, score)
+      }
+      const score = Math.max(directScore, tokenScore)
+      if (score > bestScore) {
+        bestScore = score
+        bestMatch = schemaPred
+      }
+    }
+    // Replace with best match if found
+    if (bestMatch && bestMatch !== usedPred) {
+      if (process.env.DEBUG) console.log(`       [DEBUG] Correcting ${usedPred} -> ${bestMatch}`)
+      corrected = corrected.replace(new RegExp(`ub:${usedPred}\\b`, 'g'), `ub:${bestMatch}`)
+    }
+  }
+  return corrected
+}
 /**
  * Clean SPARQL (HyperMind's cleaning)
  */
@@ -327,17 +448,31 @@ function cleanSparql(raw) {
     .replace(/```sparql\n?/gi, '')
     .replace(/```sql\n?/gi, '')
     .replace(/```\n?/g, '')
-    .replace(/^Here.*?:\s*/i, '')
-    .replace(/^This query.*?:\s*/i, '')
     .trim()
-  // Extract just the SPARQL part
+  // Remove common LLM explanation patterns before extracting SPARQL
+  // These patterns appear BEFORE the query
+  clean = clean.replace(/^Here\s+(is|are)\s+[^:\n]*:?\s*/gi, '')
+  clean = clean.replace(/^This\s+query\s+[^:\n]*:?\s*/gi, '')
+  clean = clean.replace(/^The\s+following\s+[^:\n]*:?\s*/gi, '')
+  clean = clean.replace(/^Sure[^:\n]*:?\s*/gi, '')
+  clean = clean.trim()
+  // Extract just the SPARQL part - find PREFIX or SELECT start
   const prefixMatch = clean.match(/PREFIX[\s\S]*/i)
   if (prefixMatch) clean = prefixMatch[0]
   const selectMatch = clean.match(/SELECT[\s\S]*/i)
   if (!clean.includes('PREFIX') && selectMatch) clean = selectMatch[0]
+  // Remove trailing explanation after query
+  clean = clean.replace(/\n\nThis\s+(query|will|returns)[\s\S]*/i, '')
+  clean = clean.replace(/\n\nNote:[\s\S]*/i, '')
+  clean = clean.trim()
+  // Correct predicates using native Rust similarity
+  clean = correctPredicates(clean)
   return clean
 }
@@ -362,7 +497,24 @@ async function runBenchmark() {
     hypermind: { claude: { pass: 0, fail: 0 }, gpt4o: { pass: 0, fail: 0 } }
   }
-  const models = ['claude-sonnet-4', 'gpt-4o']
+  const allModels = ['claude-sonnet-4', 'gpt-4o']
+  // Filter models based on available API keys
+  const models = allModels.filter(m => {
+    if (m.includes('claude') && !process.env.ANTHROPIC_API_KEY) {
+      console.log(`\n  ⚠️  Skipping ${m} (ANTHROPIC_API_KEY not set)`)
+      return false
+    }
+    if (m.includes('gpt') && !process.env.OPENAI_API_KEY) {
+      console.log(`\n  ⚠️  Skipping ${m} (OPENAI_API_KEY not set)`)
+      return false
+    }
+    return true
+  })
+  if (models.length === 0) {
+    console.log('\n  ❌ No API keys configured. Set OPENAI_API_KEY or ANTHROPIC_API_KEY')
+    return results
+  }
   for (const model of models) {
     const modelKey = model.includes('claude') ? 'claude' : 'gpt4o'