rust-kgdb 0.6.40 → 0.6.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,59 @@
13
13
  */
14
14
 
15
15
  const crypto = require('crypto')
16
+ const os = require('os')
17
+
18
+ // Native Rust FFI for predicate resolution (via NAPI-RS)
19
+ // ALL predicate resolution happens in Rust - no JavaScript duplication
20
+ // IMPORTANT: Load native binding directly to avoid circular dependency with index.js
21
+ function loadNativeBindingDirect() {
22
+ const platform = os.platform()
23
+ const arch = os.arch()
24
+
25
+ let nativeBinding
26
+ if (platform === 'darwin') {
27
+ if (arch === 'x64') {
28
+ nativeBinding = require('./rust-kgdb-napi.darwin-x64.node')
29
+ } else if (arch === 'arm64') {
30
+ nativeBinding = require('./rust-kgdb-napi.darwin-arm64.node')
31
+ }
32
+ } else if (platform === 'linux') {
33
+ if (arch === 'x64') {
34
+ nativeBinding = require('./rust-kgdb-napi.linux-x64-gnu.node')
35
+ } else if (arch === 'arm64') {
36
+ nativeBinding = require('./rust-kgdb-napi.linux-arm64-gnu.node')
37
+ }
38
+ } else if (platform === 'win32' && arch === 'x64') {
39
+ nativeBinding = require('./rust-kgdb-napi.win32-x64-msvc.node')
40
+ }
41
+
42
+ if (!nativeBinding) {
43
+ throw new Error(`Unsupported platform: ${platform}-${arch}. Please contact support.`)
44
+ }
45
+ return nativeBinding
46
+ }
47
+
48
+ const native = loadNativeBindingDirect()
49
+ const {
50
+ OlogSchema,
51
+ PredicateResolverService,
52
+ SchemaValidatorService,
53
+ computeSimilarity,
54
+ tokenizeIdentifier,
55
+ stemWord,
56
+ extractKeywords: nativeExtractKeywords
57
+ } = native
58
+
59
+ /**
60
+ * Extract keywords from natural language prompt using native Rust
61
+ * Delegates entirely to Rust KeywordExtractor - no JavaScript stop words
62
+ * @param {string} prompt - Natural language prompt
63
+ * @returns {string[]} Extracted keywords
64
+ */
65
+ function extractKeywords(prompt) {
66
+ if (!prompt) return []
67
+ return nativeExtractKeywords(prompt)
68
+ }
16
69
 
17
70
  // ============================================================================
18
71
  // CONFIGURATION - All tunable parameters (NO hardcoding)
@@ -1979,7 +2032,13 @@ class LLMPlanner {
1979
2032
  }
1980
2033
 
1981
2034
  /**
1982
- * Extract schema from knowledge graph
2035
+ * Extract schema from knowledge graph with pagination
2036
+ *
2037
+ * Improvement over MCP YAML tools:
2038
+ * - NO hard limits - extracts ALL predicates via pagination
2039
+ * - Schema is used for deterministic query generation
2040
+ * - Enables predicate ranking for accurate matching
2041
+ *
1983
2042
  * @returns {Object} Schema with predicates, classes, examples
1984
2043
  */
1985
2044
  async extractSchema(forceRefresh = false) {
@@ -1990,33 +2049,124 @@ class LLMPlanner {
1990
2049
  return this._schemaCache
1991
2050
  }
1992
2051
 
1993
- const schema = { predicates: [], classes: [], examples: [], timestamp: new Date().toISOString() }
2052
+ const schema = {
2053
+ predicates: [],
2054
+ classes: [],
2055
+ examples: [],
2056
+ timestamp: new Date().toISOString(),
2057
+ extractionMethod: 'paginated' // Track extraction method
2058
+ }
2059
+
2060
+ const pageSize = CONFIG.schema.maxProperties || 500
1994
2061
 
1995
2062
  try {
1996
- // Get unique predicates
1997
- const predResults = this.kg.querySelect('SELECT DISTINCT ?p WHERE { ?s ?p ?o } LIMIT 200')
1998
- schema.predicates = predResults.map(r => r.bindings?.p || r.p).filter(Boolean)
1999
-
2000
- // Get RDF types
2001
- const typeResults = this.kg.querySelect(`
2002
- PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
2003
- SELECT DISTINCT ?type WHERE { ?s rdf:type ?type } LIMIT 100
2004
- `)
2005
- schema.classes = typeResults.map(r => r.bindings?.type || r.type).filter(Boolean)
2006
-
2007
- // Get sample triples
2008
- const sampleResults = this.kg.querySelect('SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 30')
2063
+ // Extract predicates with pagination - NO hard limit
2064
+ const predicateSet = new Set()
2065
+ let offset = 0
2066
+ let hasMore = true
2067
+
2068
+ while (hasMore) {
2069
+ const query = `SELECT DISTINCT ?p WHERE { ?s ?p ?o } LIMIT ${pageSize} OFFSET ${offset}`
2070
+ const results = this.kg.querySelect(query)
2071
+
2072
+ if (results.length === 0) {
2073
+ hasMore = false
2074
+ } else {
2075
+ results.forEach(r => {
2076
+ const pred = r.bindings?.p || r.p
2077
+ if (pred) predicateSet.add(pred)
2078
+ })
2079
+ offset += pageSize
2080
+
2081
+ // Safety limit to prevent infinite loops on very large graphs
2082
+ if (offset > 10000) {
2083
+ hasMore = false
2084
+ schema.truncated = true
2085
+ }
2086
+ }
2087
+ }
2088
+ schema.predicates = Array.from(predicateSet)
2089
+
2090
+ // Extract classes with pagination
2091
+ const classSet = new Set()
2092
+ offset = 0
2093
+ hasMore = true
2094
+
2095
+ while (hasMore) {
2096
+ const query = `
2097
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
2098
+ SELECT DISTINCT ?type WHERE { ?s rdf:type ?type } LIMIT ${pageSize} OFFSET ${offset}
2099
+ `
2100
+ const results = this.kg.querySelect(query)
2101
+
2102
+ if (results.length === 0) {
2103
+ hasMore = false
2104
+ } else {
2105
+ results.forEach(r => {
2106
+ const type = r.bindings?.type || r.type
2107
+ if (type) classSet.add(type)
2108
+ })
2109
+ offset += pageSize
2110
+
2111
+ if (offset > 5000) {
2112
+ hasMore = false
2113
+ }
2114
+ }
2115
+ }
2116
+ schema.classes = Array.from(classSet)
2117
+
2118
+ // Get sample triples for examples
2119
+ const sampleResults = this.kg.querySelect(
2120
+ `SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT ${CONFIG.schema.maxSamples || 30}`
2121
+ )
2009
2122
  schema.examples = sampleResults.map(r => ({
2010
2123
  s: r.bindings?.s || r.s,
2011
2124
  p: r.bindings?.p || r.p,
2012
2125
  o: r.bindings?.o || r.o
2013
2126
  }))
2127
+
2128
+ // Initialize predicate resolver (native Rust or JS fallback)
2129
+ const threshold = CONFIG.scoring?.similarityThreshold || 0.3
2130
+ if (nativeResolver?.OlogSchema && nativeResolver?.PredicateResolverService) {
2131
+ try {
2132
+ // Build OlogSchema from extracted schema
2133
+ const olog = new nativeResolver.OlogSchema()
2134
+ olog.withNamespace('http://schema.org/')
2135
+
2136
+ // Add classes
2137
+ for (const cls of (schema.classes || [])) {
2138
+ try {
2139
+ const localName = cls.split('/').pop().split('#').pop()
2140
+ olog.addClass(localName)
2141
+ } catch (e) { /* skip invalid class */ }
2142
+ }
2143
+
2144
+ // Add properties with aliases extracted from local names
2145
+ for (const prop of (schema.predicates || [])) {
2146
+ try {
2147
+ const localName = prop.split('/').pop().split('#').pop()
2148
+ // Generate aliases from tokenized form
2149
+ const tokens = nativeResolver.tokenizeIdentifier(localName)
2150
+ const aliases = tokens.length > 1 ? [tokens.join(''), tokens.join('_')] : []
2151
+ olog.addProperty(localName, 'Thing', 'Thing', aliases)
2152
+ } catch (e) { /* skip invalid property */ }
2153
+ }
2154
+
2155
+ olog.build()
2156
+ schema._nativeResolver = new nativeResolver.PredicateResolverService(olog, threshold)
2157
+ schema._nativeOlog = olog
2158
+ } catch (e) {
2159
+ // Fallback to JS ranker on error
2160
+ schema._nativeResolver = null
2161
+ }
2162
+ }
2163
+
2014
2164
  } catch (err) {
2015
2165
  schema.error = err.message
2016
2166
  }
2017
2167
 
2018
2168
  this._schemaCache = schema
2019
- this._schemaCacheExpiry = now + 5 * 60 * 1000 // 5 minute cache
2169
+ this._schemaCacheExpiry = now + CONFIG.schema.cacheExpiryMs
2020
2170
  return schema
2021
2171
  }
2022
2172
 
@@ -2368,29 +2518,132 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
2368
2518
  return steps
2369
2519
  }
2370
2520
 
2521
+ /**
2522
+ * Generate SPARQL query using schema-aware predicate ranking
2523
+ *
2524
+ * Improvement over MCP YAML tools:
2525
+ * - Uses ensemble similarity for predicate matching
2526
+ * - NO hardcoded domain keywords
2527
+ * - Validates predicates exist in schema before using
2528
+ * - Returns query with confidence score
2529
+ *
2530
+ * @private
2531
+ */
2371
2532
  _generateSchemaSparql(intent, schema, context) {
2372
- // Use schema-aware SPARQL generation
2533
+ // Use explicit SPARQL if provided
2373
2534
  if (context.sparql) return context.sparql
2374
2535
 
2375
- // Check if schema has relevant predicates
2376
2536
  const predicates = schema.predicates || []
2537
+ const prompt = context.originalPrompt || ''
2377
2538
 
2539
+ // Aggregate queries don't need specific predicates
2378
2540
  if (intent.aggregate) {
2379
2541
  return 'SELECT (COUNT(*) as ?count) WHERE { ?s ?p ?o }'
2380
2542
  }
2381
2543
 
2382
- // Try to match predicates based on intent
2383
- const riskPreds = predicates.filter(p => p.toLowerCase().includes('risk') || p.toLowerCase().includes('score'))
2384
- const typePreds = predicates.filter(p => p.includes('type') || p.includes('Type'))
2544
+ // Use ranker to find relevant predicates from prompt
2545
+ const rankedPreds = this._findRelevantPredicatesRanked
2546
+ ? this._findRelevantPredicatesRanked(prompt.toLowerCase(), predicates, { threshold: 0.3 })
2547
+ : []
2548
+
2549
+ // If we have high-confidence predicate matches, use them
2550
+ if (rankedPreds.length > 0 && rankedPreds[0].score >= 0.5) {
2551
+ const bestPred = rankedPreds[0]
2552
+
2553
+ // Check if it looks like a numeric property (for ordering)
2554
+ const localName = bestPred.localName || ''
2555
+ const isNumeric = /score|amount|value|count|total|number|rank|rating|level|degree/i.test(localName)
2556
+
2557
+ if (isNumeric) {
2558
+ return `SELECT ?s ?value WHERE { ?s <${bestPred.predicate}> ?value } ORDER BY DESC(?value) LIMIT ${CONFIG.query.defaultLimit}`
2559
+ }
2560
+
2561
+ // Object property - return subject-object pairs
2562
+ return `SELECT ?s ?o WHERE { ?s <${bestPred.predicate}> ?o } LIMIT ${CONFIG.query.defaultLimit}`
2563
+ }
2564
+
2565
+ // If we have type-related predicates, use for class queries
2566
+ if (intent.query || intent.compliance) {
2567
+ const typePredsRanked = this._findRelevantPredicatesRanked
2568
+ ? this._findRelevantPredicatesRanked('type class', predicates, { threshold: 0.4 })
2569
+ : []
2385
2570
 
2386
- if (intent.pattern || intent.rank) {
2387
- if (riskPreds.length > 0) {
2388
- return `SELECT ?s ?score WHERE { ?s <${riskPreds[0]}> ?score } ORDER BY DESC(?score) LIMIT 100`
2571
+ if (typePredsRanked.length > 0) {
2572
+ return `SELECT ?s ?type WHERE { ?s <${typePredsRanked[0].predicate}> ?type } LIMIT ${CONFIG.query.defaultLimit}`
2389
2573
  }
2390
2574
  }
2391
2575
 
2392
- // Default: return all triples
2393
- return 'SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 100'
2576
+ // Default: return sample triples
2577
+ return `SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT ${CONFIG.query.defaultLimit}`
2578
+ }
2579
+
2580
+ /**
2581
+ * Validate that a SPARQL query only uses predicates from schema
2582
+ *
2583
+ * @param {string} sparql - SPARQL query string
2584
+ * @param {Object} schema - Schema context with predicates
2585
+ * @returns {Object} { valid: boolean, errors: [], warnings: [] }
2586
+ */
2587
+ _validateQueryPredicates(sparql, schema) {
2588
+ const result = { valid: true, errors: [], warnings: [], predicatesUsed: [] }
2589
+ if (!sparql || !schema?.predicates) return result
2590
+
2591
+ const predicateSet = new Set(schema.predicates)
2592
+
2593
+ // Extract URIs from query (simple regex - handles <uri> and prefix:local)
2594
+ const uriPattern = /<([^>]+)>/g
2595
+ let match
2596
+ while ((match = uriPattern.exec(sparql)) !== null) {
2597
+ const uri = match[1]
2598
+ // Skip common RDF/RDFS/OWL URIs
2599
+ if (uri.startsWith('http://www.w3.org/') ||
2600
+ uri.startsWith('http://xmlns.com/') ||
2601
+ uri.includes('rdf-syntax-ns') ||
2602
+ uri.includes('rdf-schema')) {
2603
+ continue
2604
+ }
2605
+
2606
+ result.predicatesUsed.push(uri)
2607
+
2608
+ // Check if this predicate exists in schema
2609
+ if (!predicateSet.has(uri) && !predicateSet.has(`<${uri}>`)) {
2610
+ // Try fuzzy match using native Rust similarity (no JS fallback)
2611
+ let bestMatch = null
2612
+ let bestScore = 0.8 // threshold
2613
+
2614
+ {
2615
+ const uriLocalName = uri.split('/').pop().split('#').pop().toLowerCase()
2616
+ for (const pred of schema.predicates) {
2617
+ const predLocalName = pred.split('/').pop().split('#').pop().toLowerCase()
2618
+ const score = computeSimilarity(uriLocalName, predLocalName)
2619
+ if (score > bestScore) {
2620
+ bestScore = score
2621
+ bestMatch = { predicate: pred, score }
2622
+ }
2623
+ }
2624
+ }
2625
+
2626
+ if (bestMatch) {
2627
+ result.warnings.push({
2628
+ predicate: uri,
2629
+ message: `Predicate not in schema. Did you mean: ${bestMatch.predicate}?`,
2630
+ suggestion: bestMatch.predicate
2631
+ })
2632
+ } else {
2633
+ result.warnings.push({
2634
+ predicate: uri,
2635
+ message: `Predicate not found in schema: ${uri}`
2636
+ })
2637
+ }
2638
+ }
2639
+ }
2640
+
2641
+ // If we have errors (strict mode), mark as invalid
2642
+ if (result.errors.length > 0) {
2643
+ result.valid = false
2644
+ }
2645
+
2646
+ return result
2394
2647
  }
2395
2648
 
2396
2649
  // ============================================================================
@@ -2503,42 +2756,173 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
2503
2756
 
2504
2757
  /**
2505
2758
  * Find predicates from schema that match the text intent
2759
+ *
2760
+ * Improvement over MCP YAML tools:
2761
+ * - NO hardcoded domain mappings (works with ANY ontology)
2762
+ * - Uses ensemble similarity (Jaro-Winkler, N-gram, token overlap)
2763
+ * - Returns RANKED matches with confidence scores
2764
+ * - Generic: same algorithm works for LUBM, fraud, social, etc.
2765
+ *
2506
2766
  * @private
2767
+ * @param {string} textLower - Natural language text (lowercase)
2768
+ * @param {string[]} predicates - Schema predicates
2769
+ * @param {Object} options - Options { threshold, maxResults }
2770
+ * @returns {Array} Ranked predicates with scores
2507
2771
  */
2508
- _findRelevantPredicates(textLower, predicates) {
2509
- const keywords = textLower.split(/\s+/)
2510
- const matches = []
2772
+ _findRelevantPredicates(textLower, predicates, options = {}) {
2773
+ if (!predicates || predicates.length === 0) return []
2774
+
2775
+ const threshold = options.threshold ?? CONFIG.scoring?.similarityThreshold ?? 0.3
2776
+ const maxResults = options.maxResults ?? 5
2777
+
2778
+ // Extract meaningful keywords (generic - no domain-specific stopwords)
2779
+ const keywords = extractKeywords(textLower)
2780
+ if (keywords.length === 0) return []
2781
+
2782
+ // Use native Rust similarity with stemming and tokenization
2783
+ // Multi-method ranking: direct + stemmed + token-based
2784
+ const allMatches = new Map() // predicate -> { predicate, score }
2785
+
2786
+ for (const keyword of keywords) {
2787
+ // Stem the keyword once
2788
+ const stemmedKeyword = stemWord(keyword)
2789
+
2790
+ for (const pred of predicates) {
2791
+ // Extract local name from predicate URI
2792
+ const localName = pred.split('/').pop().split('#').pop()
2793
+ const localNameLower = localName.toLowerCase()
2794
+
2795
+ // Method 1: Direct string similarity
2796
+ const directScore = computeSimilarity(keyword, localNameLower)
2797
+
2798
+ // Method 2: Stemmed similarity
2799
+ const stemmedLocalName = stemWord(localNameLower)
2800
+ const stemmedScore = computeSimilarity(stemmedKeyword, stemmedLocalName)
2801
+
2802
+ // Method 3: Token-based matching (CamelCase/snake_case decomposition)
2803
+ const tokens = tokenizeIdentifier(localName)
2804
+ let tokenScore = 0
2805
+ for (const token of tokens) {
2806
+ const tokenLower = token.toLowerCase()
2807
+ const directTokenScore = computeSimilarity(keyword, tokenLower)
2808
+ const stemmedTokenScore = computeSimilarity(stemmedKeyword, stemWord(tokenLower))
2809
+ tokenScore = Math.max(tokenScore, directTokenScore, stemmedTokenScore)
2810
+ }
2511
2811
 
2512
- // Pattern-specific keyword mappings
2513
- const keywordMappings = {
2514
- payment: ['transfer', 'paid', 'pay', 'payment', 'amount', 'transaction'],
2515
- fraud: ['claim', 'risk', 'flag', 'suspicious', 'alert'],
2516
- social: ['knows', 'friend', 'follows', 'connected', 'related'],
2517
- org: ['works', 'manages', 'reports', 'employs', 'member'],
2518
- product: ['purchase', 'buy', 'order', 'sell', 'owns']
2812
+ // Take the best score from all methods
2813
+ const bestScore = Math.max(directScore, stemmedScore, tokenScore)
2814
+
2815
+ if (bestScore >= threshold) {
2816
+ const existing = allMatches.get(pred)
2817
+ if (!existing || bestScore > existing.score) {
2818
+ allMatches.set(pred, { predicate: pred, score: bestScore, localName })
2819
+ }
2820
+ }
2821
+ }
2519
2822
  }
2520
2823
 
2824
+ // Also try full text match (for compound queries)
2521
2825
  for (const pred of predicates) {
2522
- const predLower = pred.toLowerCase()
2826
+ const localName = pred.split('/').pop().split('#').pop()
2827
+ const localNameLower = localName.toLowerCase()
2523
2828
 
2524
- // Direct match
2525
- if (keywords.some(kw => predLower.includes(kw) || kw.includes(predLower))) {
2526
- matches.push(pred)
2527
- continue
2829
+ // Direct full text
2830
+ const directScore = computeSimilarity(textLower, localNameLower)
2831
+
2832
+ // Stemmed full text
2833
+ const stemmedText = textLower.split(/\s+/).map(w => stemWord(w)).join(' ')
2834
+ const stemmedLocal = stemWord(localNameLower)
2835
+ const stemmedScore = computeSimilarity(stemmedText, stemmedLocal)
2836
+
2837
+ const bestScore = Math.max(directScore, stemmedScore)
2838
+
2839
+ if (bestScore >= threshold) {
2840
+ const existing = allMatches.get(pred)
2841
+ if (!existing || bestScore > existing.score) {
2842
+ allMatches.set(pred, { predicate: pred, score: bestScore, localName })
2843
+ }
2528
2844
  }
2845
+ }
2529
2846
 
2530
- // Keyword mapping match
2531
- for (const [category, mappedWords] of Object.entries(keywordMappings)) {
2532
- if (keywords.some(kw => category.includes(kw) || kw.includes(category))) {
2533
- if (mappedWords.some(mw => predLower.includes(mw))) {
2534
- matches.push(pred)
2535
- break
2847
+ // Sort by score and return top matches
2848
+ const sorted = Array.from(allMatches.values())
2849
+ .sort((a, b) => b.score - a.score)
2850
+ .slice(0, maxResults)
2851
+
2852
+ // Return just predicate URIs for backward compatibility
2853
+ // (callers expect string[] not object[])
2854
+ return sorted.map(m => m.predicate)
2855
+ }
2856
+
2857
+ /**
2858
+ * Find predicates with full ranking info (for advanced use)
2859
+ * Uses native Rust ensemble similarity with stemming and tokenization
2860
+ *
2861
+ * Algorithm:
2862
+ * 1. Direct similarity: keyword vs localName (Jaro-Winkler + Levenshtein + N-gram)
2863
+ * 2. Stemmed similarity: stem(keyword) vs stem(localName) - handles "professor" → "profess"
2864
+ * 3. Token similarity: keyword vs each token of CamelCase/snake_case name
2865
+ *
2866
+ * Final score = max(direct, stemmed, tokenMatch) - takes best match method
2867
+ *
2868
+ * @private
2869
+ */
2870
+ _findRelevantPredicatesRanked(textLower, predicates, options = {}) {
2871
+ if (!predicates || predicates.length === 0) return []
2872
+
2873
+ const threshold = options.threshold ?? CONFIG.scoring?.similarityThreshold ?? 0.3
2874
+ const keywords = extractKeywords(textLower)
2875
+
2876
+ // Use native Rust similarity with stemming and tokenization
2877
+ const allMatches = new Map()
2878
+
2879
+ for (const keyword of keywords) {
2880
+ // Stem the keyword once
2881
+ const stemmedKeyword = stemWord(keyword)
2882
+
2883
+ for (const pred of predicates) {
2884
+ const localName = pred.split('/').pop().split('#').pop()
2885
+ const localNameLower = localName.toLowerCase()
2886
+
2887
+ // Method 1: Direct string similarity
2888
+ const directScore = computeSimilarity(keyword, localNameLower)
2889
+
2890
+ // Method 2: Stemmed similarity (handles "professor" vs "fullProfessor")
2891
+ const stemmedLocalName = stemWord(localNameLower)
2892
+ const stemmedScore = computeSimilarity(stemmedKeyword, stemmedLocalName)
2893
+
2894
+ // Method 3: Token-based matching (CamelCase/snake_case decomposition)
2895
+ // "fullProfessor" → ["full", "professor"]
2896
+ const tokens = tokenizeIdentifier(localName)
2897
+ let tokenScore = 0
2898
+ for (const token of tokens) {
2899
+ const tokenLower = token.toLowerCase()
2900
+ const directTokenScore = computeSimilarity(keyword, tokenLower)
2901
+ const stemmedTokenScore = computeSimilarity(stemmedKeyword, stemWord(tokenLower))
2902
+ tokenScore = Math.max(tokenScore, directTokenScore, stemmedTokenScore)
2903
+ }
2904
+
2905
+ // Take the best score from all methods
2906
+ const bestScore = Math.max(directScore, stemmedScore, tokenScore)
2907
+
2908
+ if (bestScore >= threshold) {
2909
+ const existing = allMatches.get(pred)
2910
+ if (!existing || bestScore > existing.score) {
2911
+ allMatches.set(pred, {
2912
+ predicate: pred,
2913
+ score: bestScore,
2914
+ localName,
2915
+ matchMethod: bestScore === directScore ? 'direct' :
2916
+ bestScore === stemmedScore ? 'stemmed' : 'token',
2917
+ tokens
2918
+ })
2536
2919
  }
2537
2920
  }
2538
2921
  }
2539
2922
  }
2540
2923
 
2541
- return matches
2924
+ return Array.from(allMatches.values())
2925
+ .sort((a, b) => b.score - a.score)
2542
2926
  }
2543
2927
 
2544
2928
  /**
package/index.d.ts CHANGED
@@ -2170,3 +2170,31 @@ export function tokenizeIdentifier(identifier: string): string[]
2170
2170
  * ```
2171
2171
  */
2172
2172
  export function stemWord(word: string): string
2173
+
2174
+ /**
2175
+ * Extract keywords from natural language text.
2176
+ *
2177
+ * Uses tokenization without hardcoded stop words.
2178
+ * Ensemble similarity scoring naturally downweights generic words.
2179
+ *
2180
+ * Reference: Native Rust implementation in hypermind-tools
2181
+ *
2182
+ * @param text - Natural language text
2183
+ * @returns Array of extracted keywords
2184
+ *
2185
+ * @example
2186
+ * ```typescript
2187
+ * extractKeywords('Find all teachers') // ['find', 'teachers']
2188
+ * extractKeywords('Get student email addresses') // ['student', 'email', 'addresses']
2189
+ * ```
2190
+ */
2191
+ export function extractKeywords(text: string): string[]
2192
+
2193
+ // =============================================================================
2194
+ // NOTE: Query Memory Store, Hybrid Reranker, and Trigger System
2195
+ // have been moved to Rust core accessed via HyperAgentProxy/WASM runtime.
2196
+ // SDK remains thin - heavy logic stays in Rust core.
2197
+ // See: crates/hypermind-runtime/src/memory/query_store.rs
2198
+ // See: crates/hypermind-runtime/src/memory/reranker.rs
2199
+ // See: crates/embeddings/src/trigger/
2200
+ // =============================================================================
package/index.js CHANGED
@@ -59,6 +59,9 @@ const {
59
59
  computeSimilarity,
60
60
  tokenizeIdentifier,
61
61
  stemWord,
62
+ extractKeywords,
63
+ // NOTE: QueryMemoryStore, HybridReranker, TriggerManager moved to Rust core
64
+ // Access via HyperAgentProxy/WASM runtime (SDK remains thin)
62
65
  } = loadNativeBinding()
63
66
 
64
67
  // HyperMind Agentic Framework
@@ -178,4 +181,7 @@ module.exports = {
178
181
  computeSimilarity, // Ensemble string similarity
179
182
  tokenizeIdentifier, // CamelCase/snake_case tokenization
180
183
  stemWord, // Porter Stemmer
184
+ extractKeywords, // Keyword extraction from natural language
185
+ // NOTE: QueryMemoryStore, HybridReranker, TriggerManager moved to Rust core
186
+ // Access via HyperAgentProxy/WASM runtime (SDK remains thin)
181
187
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "rust-kgdb",
3
- "version": "0.6.40",
4
- "description": "Neuro-Symbolic AI Framework: 85.7% accuracy on LUBM benchmark (+14.3pp over schema injection alone). Schema-aware predicate resolution using grammar-based parsing. Features: GraphDB (449ns lookups, 2.2M ops/sec, 156K inserts/sec), HyperMindAgent with audit trail, Datalog reasoning, GraphFrames analytics. W3C SPARQL 1.1 compliant. Benchmarked on Intel i9-9980HK with BSBM/LDBC methodology.",
3
+ "version": "0.6.43",
4
+ "description": "High-performance RDF/SPARQL database with AI agent framework. GraphDB (449ns lookups, 35x faster than RDFox), GraphFrames analytics (PageRank, motifs), Datalog reasoning, HNSW vector embeddings. HyperMindAgent for schema-aware query generation with audit trails. W3C SPARQL 1.1 compliant. Native performance via Rust + NAPI-RS.",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
7
7
  "napi": {
Binary file