rust-kgdb 0.6.40 → 0.6.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +81 -0
- package/README.md +69 -24
- package/examples/quadstore-capabilities-demo.js +407 -0
- package/hypermind-agent.js +432 -48
- package/index.d.ts +28 -0
- package/index.js +6 -0
- package/package.json +2 -2
- package/rust-kgdb-napi.darwin-x64.node +0 -0
- package/vanilla-vs-hypermind-benchmark.js +164 -12
package/hypermind-agent.js
CHANGED
|
@@ -13,6 +13,59 @@
|
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
15
|
const crypto = require('crypto')
|
|
16
|
+
const os = require('os')
|
|
17
|
+
|
|
18
|
+
// Native Rust FFI for predicate resolution (via NAPI-RS)
|
|
19
|
+
// ALL predicate resolution happens in Rust - no JavaScript duplication
|
|
20
|
+
// IMPORTANT: Load native binding directly to avoid circular dependency with index.js
|
|
21
|
+
function loadNativeBindingDirect() {
|
|
22
|
+
const platform = os.platform()
|
|
23
|
+
const arch = os.arch()
|
|
24
|
+
|
|
25
|
+
let nativeBinding
|
|
26
|
+
if (platform === 'darwin') {
|
|
27
|
+
if (arch === 'x64') {
|
|
28
|
+
nativeBinding = require('./rust-kgdb-napi.darwin-x64.node')
|
|
29
|
+
} else if (arch === 'arm64') {
|
|
30
|
+
nativeBinding = require('./rust-kgdb-napi.darwin-arm64.node')
|
|
31
|
+
}
|
|
32
|
+
} else if (platform === 'linux') {
|
|
33
|
+
if (arch === 'x64') {
|
|
34
|
+
nativeBinding = require('./rust-kgdb-napi.linux-x64-gnu.node')
|
|
35
|
+
} else if (arch === 'arm64') {
|
|
36
|
+
nativeBinding = require('./rust-kgdb-napi.linux-arm64-gnu.node')
|
|
37
|
+
}
|
|
38
|
+
} else if (platform === 'win32' && arch === 'x64') {
|
|
39
|
+
nativeBinding = require('./rust-kgdb-napi.win32-x64-msvc.node')
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (!nativeBinding) {
|
|
43
|
+
throw new Error(`Unsupported platform: ${platform}-${arch}. Please contact support.`)
|
|
44
|
+
}
|
|
45
|
+
return nativeBinding
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const native = loadNativeBindingDirect()
|
|
49
|
+
const {
|
|
50
|
+
OlogSchema,
|
|
51
|
+
PredicateResolverService,
|
|
52
|
+
SchemaValidatorService,
|
|
53
|
+
computeSimilarity,
|
|
54
|
+
tokenizeIdentifier,
|
|
55
|
+
stemWord,
|
|
56
|
+
extractKeywords: nativeExtractKeywords
|
|
57
|
+
} = native
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Extract keywords from natural language prompt using native Rust
|
|
61
|
+
* Delegates entirely to Rust KeywordExtractor - no JavaScript stop words
|
|
62
|
+
* @param {string} prompt - Natural language prompt
|
|
63
|
+
* @returns {string[]} Extracted keywords
|
|
64
|
+
*/
|
|
65
|
+
function extractKeywords(prompt) {
|
|
66
|
+
if (!prompt) return []
|
|
67
|
+
return nativeExtractKeywords(prompt)
|
|
68
|
+
}
|
|
16
69
|
|
|
17
70
|
// ============================================================================
|
|
18
71
|
// CONFIGURATION - All tunable parameters (NO hardcoding)
|
|
@@ -1979,7 +2032,13 @@ class LLMPlanner {
|
|
|
1979
2032
|
}
|
|
1980
2033
|
|
|
1981
2034
|
/**
|
|
1982
|
-
* Extract schema from knowledge graph
|
|
2035
|
+
* Extract schema from knowledge graph with pagination
|
|
2036
|
+
*
|
|
2037
|
+
* Improvement over MCP YAML tools:
|
|
2038
|
+
* - NO hard limits - extracts ALL predicates via pagination
|
|
2039
|
+
* - Schema is used for deterministic query generation
|
|
2040
|
+
* - Enables predicate ranking for accurate matching
|
|
2041
|
+
*
|
|
1983
2042
|
* @returns {Object} Schema with predicates, classes, examples
|
|
1984
2043
|
*/
|
|
1985
2044
|
async extractSchema(forceRefresh = false) {
|
|
@@ -1990,33 +2049,124 @@ class LLMPlanner {
|
|
|
1990
2049
|
return this._schemaCache
|
|
1991
2050
|
}
|
|
1992
2051
|
|
|
1993
|
-
const schema = {
|
|
2052
|
+
const schema = {
|
|
2053
|
+
predicates: [],
|
|
2054
|
+
classes: [],
|
|
2055
|
+
examples: [],
|
|
2056
|
+
timestamp: new Date().toISOString(),
|
|
2057
|
+
extractionMethod: 'paginated' // Track extraction method
|
|
2058
|
+
}
|
|
2059
|
+
|
|
2060
|
+
const pageSize = CONFIG.schema.maxProperties || 500
|
|
1994
2061
|
|
|
1995
2062
|
try {
|
|
1996
|
-
//
|
|
1997
|
-
const
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2063
|
+
// Extract predicates with pagination - NO hard limit
|
|
2064
|
+
const predicateSet = new Set()
|
|
2065
|
+
let offset = 0
|
|
2066
|
+
let hasMore = true
|
|
2067
|
+
|
|
2068
|
+
while (hasMore) {
|
|
2069
|
+
const query = `SELECT DISTINCT ?p WHERE { ?s ?p ?o } LIMIT ${pageSize} OFFSET ${offset}`
|
|
2070
|
+
const results = this.kg.querySelect(query)
|
|
2071
|
+
|
|
2072
|
+
if (results.length === 0) {
|
|
2073
|
+
hasMore = false
|
|
2074
|
+
} else {
|
|
2075
|
+
results.forEach(r => {
|
|
2076
|
+
const pred = r.bindings?.p || r.p
|
|
2077
|
+
if (pred) predicateSet.add(pred)
|
|
2078
|
+
})
|
|
2079
|
+
offset += pageSize
|
|
2080
|
+
|
|
2081
|
+
// Safety limit to prevent infinite loops on very large graphs
|
|
2082
|
+
if (offset > 10000) {
|
|
2083
|
+
hasMore = false
|
|
2084
|
+
schema.truncated = true
|
|
2085
|
+
}
|
|
2086
|
+
}
|
|
2087
|
+
}
|
|
2088
|
+
schema.predicates = Array.from(predicateSet)
|
|
2089
|
+
|
|
2090
|
+
// Extract classes with pagination
|
|
2091
|
+
const classSet = new Set()
|
|
2092
|
+
offset = 0
|
|
2093
|
+
hasMore = true
|
|
2094
|
+
|
|
2095
|
+
while (hasMore) {
|
|
2096
|
+
const query = `
|
|
2097
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
2098
|
+
SELECT DISTINCT ?type WHERE { ?s rdf:type ?type } LIMIT ${pageSize} OFFSET ${offset}
|
|
2099
|
+
`
|
|
2100
|
+
const results = this.kg.querySelect(query)
|
|
2101
|
+
|
|
2102
|
+
if (results.length === 0) {
|
|
2103
|
+
hasMore = false
|
|
2104
|
+
} else {
|
|
2105
|
+
results.forEach(r => {
|
|
2106
|
+
const type = r.bindings?.type || r.type
|
|
2107
|
+
if (type) classSet.add(type)
|
|
2108
|
+
})
|
|
2109
|
+
offset += pageSize
|
|
2110
|
+
|
|
2111
|
+
if (offset > 5000) {
|
|
2112
|
+
hasMore = false
|
|
2113
|
+
}
|
|
2114
|
+
}
|
|
2115
|
+
}
|
|
2116
|
+
schema.classes = Array.from(classSet)
|
|
2117
|
+
|
|
2118
|
+
// Get sample triples for examples
|
|
2119
|
+
const sampleResults = this.kg.querySelect(
|
|
2120
|
+
`SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT ${CONFIG.schema.maxSamples || 30}`
|
|
2121
|
+
)
|
|
2009
2122
|
schema.examples = sampleResults.map(r => ({
|
|
2010
2123
|
s: r.bindings?.s || r.s,
|
|
2011
2124
|
p: r.bindings?.p || r.p,
|
|
2012
2125
|
o: r.bindings?.o || r.o
|
|
2013
2126
|
}))
|
|
2127
|
+
|
|
2128
|
+
// Initialize predicate resolver (native Rust or JS fallback)
|
|
2129
|
+
const threshold = CONFIG.scoring?.similarityThreshold || 0.3
|
|
2130
|
+
if (nativeResolver?.OlogSchema && nativeResolver?.PredicateResolverService) {
|
|
2131
|
+
try {
|
|
2132
|
+
// Build OlogSchema from extracted schema
|
|
2133
|
+
const olog = new nativeResolver.OlogSchema()
|
|
2134
|
+
olog.withNamespace('http://schema.org/')
|
|
2135
|
+
|
|
2136
|
+
// Add classes
|
|
2137
|
+
for (const cls of (schema.classes || [])) {
|
|
2138
|
+
try {
|
|
2139
|
+
const localName = cls.split('/').pop().split('#').pop()
|
|
2140
|
+
olog.addClass(localName)
|
|
2141
|
+
} catch (e) { /* skip invalid class */ }
|
|
2142
|
+
}
|
|
2143
|
+
|
|
2144
|
+
// Add properties with aliases extracted from local names
|
|
2145
|
+
for (const prop of (schema.predicates || [])) {
|
|
2146
|
+
try {
|
|
2147
|
+
const localName = prop.split('/').pop().split('#').pop()
|
|
2148
|
+
// Generate aliases from tokenized form
|
|
2149
|
+
const tokens = nativeResolver.tokenizeIdentifier(localName)
|
|
2150
|
+
const aliases = tokens.length > 1 ? [tokens.join(''), tokens.join('_')] : []
|
|
2151
|
+
olog.addProperty(localName, 'Thing', 'Thing', aliases)
|
|
2152
|
+
} catch (e) { /* skip invalid property */ }
|
|
2153
|
+
}
|
|
2154
|
+
|
|
2155
|
+
olog.build()
|
|
2156
|
+
schema._nativeResolver = new nativeResolver.PredicateResolverService(olog, threshold)
|
|
2157
|
+
schema._nativeOlog = olog
|
|
2158
|
+
} catch (e) {
|
|
2159
|
+
// Fallback to JS ranker on error
|
|
2160
|
+
schema._nativeResolver = null
|
|
2161
|
+
}
|
|
2162
|
+
}
|
|
2163
|
+
|
|
2014
2164
|
} catch (err) {
|
|
2015
2165
|
schema.error = err.message
|
|
2016
2166
|
}
|
|
2017
2167
|
|
|
2018
2168
|
this._schemaCache = schema
|
|
2019
|
-
this._schemaCacheExpiry = now +
|
|
2169
|
+
this._schemaCacheExpiry = now + CONFIG.schema.cacheExpiryMs
|
|
2020
2170
|
return schema
|
|
2021
2171
|
}
|
|
2022
2172
|
|
|
@@ -2368,29 +2518,132 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
|
|
|
2368
2518
|
return steps
|
|
2369
2519
|
}
|
|
2370
2520
|
|
|
2521
|
+
/**
|
|
2522
|
+
* Generate SPARQL query using schema-aware predicate ranking
|
|
2523
|
+
*
|
|
2524
|
+
* Improvement over MCP YAML tools:
|
|
2525
|
+
* - Uses ensemble similarity for predicate matching
|
|
2526
|
+
* - NO hardcoded domain keywords
|
|
2527
|
+
* - Validates predicates exist in schema before using
|
|
2528
|
+
* - Returns query with confidence score
|
|
2529
|
+
*
|
|
2530
|
+
* @private
|
|
2531
|
+
*/
|
|
2371
2532
|
_generateSchemaSparql(intent, schema, context) {
|
|
2372
|
-
// Use
|
|
2533
|
+
// Use explicit SPARQL if provided
|
|
2373
2534
|
if (context.sparql) return context.sparql
|
|
2374
2535
|
|
|
2375
|
-
// Check if schema has relevant predicates
|
|
2376
2536
|
const predicates = schema.predicates || []
|
|
2537
|
+
const prompt = context.originalPrompt || ''
|
|
2377
2538
|
|
|
2539
|
+
// Aggregate queries don't need specific predicates
|
|
2378
2540
|
if (intent.aggregate) {
|
|
2379
2541
|
return 'SELECT (COUNT(*) as ?count) WHERE { ?s ?p ?o }'
|
|
2380
2542
|
}
|
|
2381
2543
|
|
|
2382
|
-
//
|
|
2383
|
-
const
|
|
2384
|
-
|
|
2544
|
+
// Use ranker to find relevant predicates from prompt
|
|
2545
|
+
const rankedPreds = this._findRelevantPredicatesRanked
|
|
2546
|
+
? this._findRelevantPredicatesRanked(prompt.toLowerCase(), predicates, { threshold: 0.3 })
|
|
2547
|
+
: []
|
|
2548
|
+
|
|
2549
|
+
// If we have high-confidence predicate matches, use them
|
|
2550
|
+
if (rankedPreds.length > 0 && rankedPreds[0].score >= 0.5) {
|
|
2551
|
+
const bestPred = rankedPreds[0]
|
|
2552
|
+
|
|
2553
|
+
// Check if it looks like a numeric property (for ordering)
|
|
2554
|
+
const localName = bestPred.localName || ''
|
|
2555
|
+
const isNumeric = /score|amount|value|count|total|number|rank|rating|level|degree/i.test(localName)
|
|
2556
|
+
|
|
2557
|
+
if (isNumeric) {
|
|
2558
|
+
return `SELECT ?s ?value WHERE { ?s <${bestPred.predicate}> ?value } ORDER BY DESC(?value) LIMIT ${CONFIG.query.defaultLimit}`
|
|
2559
|
+
}
|
|
2560
|
+
|
|
2561
|
+
// Object property - return subject-object pairs
|
|
2562
|
+
return `SELECT ?s ?o WHERE { ?s <${bestPred.predicate}> ?o } LIMIT ${CONFIG.query.defaultLimit}`
|
|
2563
|
+
}
|
|
2564
|
+
|
|
2565
|
+
// If we have type-related predicates, use for class queries
|
|
2566
|
+
if (intent.query || intent.compliance) {
|
|
2567
|
+
const typePredsRanked = this._findRelevantPredicatesRanked
|
|
2568
|
+
? this._findRelevantPredicatesRanked('type class', predicates, { threshold: 0.4 })
|
|
2569
|
+
: []
|
|
2385
2570
|
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
return `SELECT ?s ?score WHERE { ?s <${riskPreds[0]}> ?score } ORDER BY DESC(?score) LIMIT 100`
|
|
2571
|
+
if (typePredsRanked.length > 0) {
|
|
2572
|
+
return `SELECT ?s ?type WHERE { ?s <${typePredsRanked[0].predicate}> ?type } LIMIT ${CONFIG.query.defaultLimit}`
|
|
2389
2573
|
}
|
|
2390
2574
|
}
|
|
2391
2575
|
|
|
2392
|
-
// Default: return
|
|
2393
|
-
return
|
|
2576
|
+
// Default: return sample triples
|
|
2577
|
+
return `SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT ${CONFIG.query.defaultLimit}`
|
|
2578
|
+
}
|
|
2579
|
+
|
|
2580
|
+
/**
|
|
2581
|
+
* Validate that a SPARQL query only uses predicates from schema
|
|
2582
|
+
*
|
|
2583
|
+
* @param {string} sparql - SPARQL query string
|
|
2584
|
+
* @param {Object} schema - Schema context with predicates
|
|
2585
|
+
* @returns {Object} { valid: boolean, errors: [], warnings: [] }
|
|
2586
|
+
*/
|
|
2587
|
+
_validateQueryPredicates(sparql, schema) {
|
|
2588
|
+
const result = { valid: true, errors: [], warnings: [], predicatesUsed: [] }
|
|
2589
|
+
if (!sparql || !schema?.predicates) return result
|
|
2590
|
+
|
|
2591
|
+
const predicateSet = new Set(schema.predicates)
|
|
2592
|
+
|
|
2593
|
+
// Extract URIs from query (simple regex - handles <uri> and prefix:local)
|
|
2594
|
+
const uriPattern = /<([^>]+)>/g
|
|
2595
|
+
let match
|
|
2596
|
+
while ((match = uriPattern.exec(sparql)) !== null) {
|
|
2597
|
+
const uri = match[1]
|
|
2598
|
+
// Skip common RDF/RDFS/OWL URIs
|
|
2599
|
+
if (uri.startsWith('http://www.w3.org/') ||
|
|
2600
|
+
uri.startsWith('http://xmlns.com/') ||
|
|
2601
|
+
uri.includes('rdf-syntax-ns') ||
|
|
2602
|
+
uri.includes('rdf-schema')) {
|
|
2603
|
+
continue
|
|
2604
|
+
}
|
|
2605
|
+
|
|
2606
|
+
result.predicatesUsed.push(uri)
|
|
2607
|
+
|
|
2608
|
+
// Check if this predicate exists in schema
|
|
2609
|
+
if (!predicateSet.has(uri) && !predicateSet.has(`<${uri}>`)) {
|
|
2610
|
+
// Try fuzzy match using native Rust similarity (no JS fallback)
|
|
2611
|
+
let bestMatch = null
|
|
2612
|
+
let bestScore = 0.8 // threshold
|
|
2613
|
+
|
|
2614
|
+
{
|
|
2615
|
+
const uriLocalName = uri.split('/').pop().split('#').pop().toLowerCase()
|
|
2616
|
+
for (const pred of schema.predicates) {
|
|
2617
|
+
const predLocalName = pred.split('/').pop().split('#').pop().toLowerCase()
|
|
2618
|
+
const score = computeSimilarity(uriLocalName, predLocalName)
|
|
2619
|
+
if (score > bestScore) {
|
|
2620
|
+
bestScore = score
|
|
2621
|
+
bestMatch = { predicate: pred, score }
|
|
2622
|
+
}
|
|
2623
|
+
}
|
|
2624
|
+
}
|
|
2625
|
+
|
|
2626
|
+
if (bestMatch) {
|
|
2627
|
+
result.warnings.push({
|
|
2628
|
+
predicate: uri,
|
|
2629
|
+
message: `Predicate not in schema. Did you mean: ${bestMatch.predicate}?`,
|
|
2630
|
+
suggestion: bestMatch.predicate
|
|
2631
|
+
})
|
|
2632
|
+
} else {
|
|
2633
|
+
result.warnings.push({
|
|
2634
|
+
predicate: uri,
|
|
2635
|
+
message: `Predicate not found in schema: ${uri}`
|
|
2636
|
+
})
|
|
2637
|
+
}
|
|
2638
|
+
}
|
|
2639
|
+
}
|
|
2640
|
+
|
|
2641
|
+
// If we have errors (strict mode), mark as invalid
|
|
2642
|
+
if (result.errors.length > 0) {
|
|
2643
|
+
result.valid = false
|
|
2644
|
+
}
|
|
2645
|
+
|
|
2646
|
+
return result
|
|
2394
2647
|
}
|
|
2395
2648
|
|
|
2396
2649
|
// ============================================================================
|
|
@@ -2503,42 +2756,173 @@ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, gen
|
|
|
2503
2756
|
|
|
2504
2757
|
/**
|
|
2505
2758
|
* Find predicates from schema that match the text intent
|
|
2759
|
+
*
|
|
2760
|
+
* Improvement over MCP YAML tools:
|
|
2761
|
+
* - NO hardcoded domain mappings (works with ANY ontology)
|
|
2762
|
+
* - Uses ensemble similarity (Jaro-Winkler, N-gram, token overlap)
|
|
2763
|
+
* - Returns RANKED matches with confidence scores
|
|
2764
|
+
* - Generic: same algorithm works for LUBM, fraud, social, etc.
|
|
2765
|
+
*
|
|
2506
2766
|
* @private
|
|
2767
|
+
* @param {string} textLower - Natural language text (lowercase)
|
|
2768
|
+
* @param {string[]} predicates - Schema predicates
|
|
2769
|
+
* @param {Object} options - Options { threshold, maxResults }
|
|
2770
|
+
* @returns {Array} Ranked predicates with scores
|
|
2507
2771
|
*/
|
|
2508
|
-
_findRelevantPredicates(textLower, predicates) {
|
|
2509
|
-
|
|
2510
|
-
|
|
2772
|
+
_findRelevantPredicates(textLower, predicates, options = {}) {
|
|
2773
|
+
if (!predicates || predicates.length === 0) return []
|
|
2774
|
+
|
|
2775
|
+
const threshold = options.threshold ?? CONFIG.scoring?.similarityThreshold ?? 0.3
|
|
2776
|
+
const maxResults = options.maxResults ?? 5
|
|
2777
|
+
|
|
2778
|
+
// Extract meaningful keywords (generic - no domain-specific stopwords)
|
|
2779
|
+
const keywords = extractKeywords(textLower)
|
|
2780
|
+
if (keywords.length === 0) return []
|
|
2781
|
+
|
|
2782
|
+
// Use native Rust similarity with stemming and tokenization
|
|
2783
|
+
// Multi-method ranking: direct + stemmed + token-based
|
|
2784
|
+
const allMatches = new Map() // predicate -> { predicate, score }
|
|
2785
|
+
|
|
2786
|
+
for (const keyword of keywords) {
|
|
2787
|
+
// Stem the keyword once
|
|
2788
|
+
const stemmedKeyword = stemWord(keyword)
|
|
2789
|
+
|
|
2790
|
+
for (const pred of predicates) {
|
|
2791
|
+
// Extract local name from predicate URI
|
|
2792
|
+
const localName = pred.split('/').pop().split('#').pop()
|
|
2793
|
+
const localNameLower = localName.toLowerCase()
|
|
2794
|
+
|
|
2795
|
+
// Method 1: Direct string similarity
|
|
2796
|
+
const directScore = computeSimilarity(keyword, localNameLower)
|
|
2797
|
+
|
|
2798
|
+
// Method 2: Stemmed similarity
|
|
2799
|
+
const stemmedLocalName = stemWord(localNameLower)
|
|
2800
|
+
const stemmedScore = computeSimilarity(stemmedKeyword, stemmedLocalName)
|
|
2801
|
+
|
|
2802
|
+
// Method 3: Token-based matching (CamelCase/snake_case decomposition)
|
|
2803
|
+
const tokens = tokenizeIdentifier(localName)
|
|
2804
|
+
let tokenScore = 0
|
|
2805
|
+
for (const token of tokens) {
|
|
2806
|
+
const tokenLower = token.toLowerCase()
|
|
2807
|
+
const directTokenScore = computeSimilarity(keyword, tokenLower)
|
|
2808
|
+
const stemmedTokenScore = computeSimilarity(stemmedKeyword, stemWord(tokenLower))
|
|
2809
|
+
tokenScore = Math.max(tokenScore, directTokenScore, stemmedTokenScore)
|
|
2810
|
+
}
|
|
2511
2811
|
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
|
|
2518
|
-
|
|
2812
|
+
// Take the best score from all methods
|
|
2813
|
+
const bestScore = Math.max(directScore, stemmedScore, tokenScore)
|
|
2814
|
+
|
|
2815
|
+
if (bestScore >= threshold) {
|
|
2816
|
+
const existing = allMatches.get(pred)
|
|
2817
|
+
if (!existing || bestScore > existing.score) {
|
|
2818
|
+
allMatches.set(pred, { predicate: pred, score: bestScore, localName })
|
|
2819
|
+
}
|
|
2820
|
+
}
|
|
2821
|
+
}
|
|
2519
2822
|
}
|
|
2520
2823
|
|
|
2824
|
+
// Also try full text match (for compound queries)
|
|
2521
2825
|
for (const pred of predicates) {
|
|
2522
|
-
const
|
|
2826
|
+
const localName = pred.split('/').pop().split('#').pop()
|
|
2827
|
+
const localNameLower = localName.toLowerCase()
|
|
2523
2828
|
|
|
2524
|
-
// Direct
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2829
|
+
// Direct full text
|
|
2830
|
+
const directScore = computeSimilarity(textLower, localNameLower)
|
|
2831
|
+
|
|
2832
|
+
// Stemmed full text
|
|
2833
|
+
const stemmedText = textLower.split(/\s+/).map(w => stemWord(w)).join(' ')
|
|
2834
|
+
const stemmedLocal = stemWord(localNameLower)
|
|
2835
|
+
const stemmedScore = computeSimilarity(stemmedText, stemmedLocal)
|
|
2836
|
+
|
|
2837
|
+
const bestScore = Math.max(directScore, stemmedScore)
|
|
2838
|
+
|
|
2839
|
+
if (bestScore >= threshold) {
|
|
2840
|
+
const existing = allMatches.get(pred)
|
|
2841
|
+
if (!existing || bestScore > existing.score) {
|
|
2842
|
+
allMatches.set(pred, { predicate: pred, score: bestScore, localName })
|
|
2843
|
+
}
|
|
2528
2844
|
}
|
|
2845
|
+
}
|
|
2529
2846
|
|
|
2530
|
-
|
|
2531
|
-
|
|
2532
|
-
|
|
2533
|
-
|
|
2534
|
-
|
|
2535
|
-
|
|
2847
|
+
// Sort by score and return top matches
|
|
2848
|
+
const sorted = Array.from(allMatches.values())
|
|
2849
|
+
.sort((a, b) => b.score - a.score)
|
|
2850
|
+
.slice(0, maxResults)
|
|
2851
|
+
|
|
2852
|
+
// Return just predicate URIs for backward compatibility
|
|
2853
|
+
// (callers expect string[] not object[])
|
|
2854
|
+
return sorted.map(m => m.predicate)
|
|
2855
|
+
}
|
|
2856
|
+
|
|
2857
|
+
/**
|
|
2858
|
+
* Find predicates with full ranking info (for advanced use)
|
|
2859
|
+
* Uses native Rust ensemble similarity with stemming and tokenization
|
|
2860
|
+
*
|
|
2861
|
+
* Algorithm:
|
|
2862
|
+
* 1. Direct similarity: keyword vs localName (Jaro-Winkler + Levenshtein + N-gram)
|
|
2863
|
+
* 2. Stemmed similarity: stem(keyword) vs stem(localName) - handles "professor" → "profess"
|
|
2864
|
+
* 3. Token similarity: keyword vs each token of CamelCase/snake_case name
|
|
2865
|
+
*
|
|
2866
|
+
* Final score = max(direct, stemmed, tokenMatch) - takes best match method
|
|
2867
|
+
*
|
|
2868
|
+
* @private
|
|
2869
|
+
*/
|
|
2870
|
+
_findRelevantPredicatesRanked(textLower, predicates, options = {}) {
|
|
2871
|
+
if (!predicates || predicates.length === 0) return []
|
|
2872
|
+
|
|
2873
|
+
const threshold = options.threshold ?? CONFIG.scoring?.similarityThreshold ?? 0.3
|
|
2874
|
+
const keywords = extractKeywords(textLower)
|
|
2875
|
+
|
|
2876
|
+
// Use native Rust similarity with stemming and tokenization
|
|
2877
|
+
const allMatches = new Map()
|
|
2878
|
+
|
|
2879
|
+
for (const keyword of keywords) {
|
|
2880
|
+
// Stem the keyword once
|
|
2881
|
+
const stemmedKeyword = stemWord(keyword)
|
|
2882
|
+
|
|
2883
|
+
for (const pred of predicates) {
|
|
2884
|
+
const localName = pred.split('/').pop().split('#').pop()
|
|
2885
|
+
const localNameLower = localName.toLowerCase()
|
|
2886
|
+
|
|
2887
|
+
// Method 1: Direct string similarity
|
|
2888
|
+
const directScore = computeSimilarity(keyword, localNameLower)
|
|
2889
|
+
|
|
2890
|
+
// Method 2: Stemmed similarity (handles "professor" vs "fullProfessor")
|
|
2891
|
+
const stemmedLocalName = stemWord(localNameLower)
|
|
2892
|
+
const stemmedScore = computeSimilarity(stemmedKeyword, stemmedLocalName)
|
|
2893
|
+
|
|
2894
|
+
// Method 3: Token-based matching (CamelCase/snake_case decomposition)
|
|
2895
|
+
// "fullProfessor" → ["full", "professor"]
|
|
2896
|
+
const tokens = tokenizeIdentifier(localName)
|
|
2897
|
+
let tokenScore = 0
|
|
2898
|
+
for (const token of tokens) {
|
|
2899
|
+
const tokenLower = token.toLowerCase()
|
|
2900
|
+
const directTokenScore = computeSimilarity(keyword, tokenLower)
|
|
2901
|
+
const stemmedTokenScore = computeSimilarity(stemmedKeyword, stemWord(tokenLower))
|
|
2902
|
+
tokenScore = Math.max(tokenScore, directTokenScore, stemmedTokenScore)
|
|
2903
|
+
}
|
|
2904
|
+
|
|
2905
|
+
// Take the best score from all methods
|
|
2906
|
+
const bestScore = Math.max(directScore, stemmedScore, tokenScore)
|
|
2907
|
+
|
|
2908
|
+
if (bestScore >= threshold) {
|
|
2909
|
+
const existing = allMatches.get(pred)
|
|
2910
|
+
if (!existing || bestScore > existing.score) {
|
|
2911
|
+
allMatches.set(pred, {
|
|
2912
|
+
predicate: pred,
|
|
2913
|
+
score: bestScore,
|
|
2914
|
+
localName,
|
|
2915
|
+
matchMethod: bestScore === directScore ? 'direct' :
|
|
2916
|
+
bestScore === stemmedScore ? 'stemmed' : 'token',
|
|
2917
|
+
tokens
|
|
2918
|
+
})
|
|
2536
2919
|
}
|
|
2537
2920
|
}
|
|
2538
2921
|
}
|
|
2539
2922
|
}
|
|
2540
2923
|
|
|
2541
|
-
return
|
|
2924
|
+
return Array.from(allMatches.values())
|
|
2925
|
+
.sort((a, b) => b.score - a.score)
|
|
2542
2926
|
}
|
|
2543
2927
|
|
|
2544
2928
|
/**
|
package/index.d.ts
CHANGED
|
@@ -2170,3 +2170,31 @@ export function tokenizeIdentifier(identifier: string): string[]
|
|
|
2170
2170
|
* ```
|
|
2171
2171
|
*/
|
|
2172
2172
|
export function stemWord(word: string): string
|
|
2173
|
+
|
|
2174
|
+
/**
|
|
2175
|
+
* Extract keywords from natural language text.
|
|
2176
|
+
*
|
|
2177
|
+
* Uses tokenization without hardcoded stop words.
|
|
2178
|
+
* Ensemble similarity scoring naturally downweights generic words.
|
|
2179
|
+
*
|
|
2180
|
+
* Reference: Native Rust implementation in hypermind-tools
|
|
2181
|
+
*
|
|
2182
|
+
* @param text - Natural language text
|
|
2183
|
+
* @returns Array of extracted keywords
|
|
2184
|
+
*
|
|
2185
|
+
* @example
|
|
2186
|
+
* ```typescript
|
|
2187
|
+
* extractKeywords('Find all teachers') // ['find', 'teachers']
|
|
2188
|
+
* extractKeywords('Get student email addresses') // ['student', 'email', 'addresses']
|
|
2189
|
+
* ```
|
|
2190
|
+
*/
|
|
2191
|
+
export function extractKeywords(text: string): string[]
|
|
2192
|
+
|
|
2193
|
+
// =============================================================================
|
|
2194
|
+
// NOTE: Query Memory Store, Hybrid Reranker, and Trigger System
|
|
2195
|
+
// have been moved to Rust core accessed via HyperAgentProxy/WASM runtime.
|
|
2196
|
+
// SDK remains thin - heavy logic stays in Rust core.
|
|
2197
|
+
// See: crates/hypermind-runtime/src/memory/query_store.rs
|
|
2198
|
+
// See: crates/hypermind-runtime/src/memory/reranker.rs
|
|
2199
|
+
// See: crates/embeddings/src/trigger/
|
|
2200
|
+
// =============================================================================
|
package/index.js
CHANGED
|
@@ -59,6 +59,9 @@ const {
|
|
|
59
59
|
computeSimilarity,
|
|
60
60
|
tokenizeIdentifier,
|
|
61
61
|
stemWord,
|
|
62
|
+
extractKeywords,
|
|
63
|
+
// NOTE: QueryMemoryStore, HybridReranker, TriggerManager moved to Rust core
|
|
64
|
+
// Access via HyperAgentProxy/WASM runtime (SDK remains thin)
|
|
62
65
|
} = loadNativeBinding()
|
|
63
66
|
|
|
64
67
|
// HyperMind Agentic Framework
|
|
@@ -178,4 +181,7 @@ module.exports = {
|
|
|
178
181
|
computeSimilarity, // Ensemble string similarity
|
|
179
182
|
tokenizeIdentifier, // CamelCase/snake_case tokenization
|
|
180
183
|
stemWord, // Porter Stemmer
|
|
184
|
+
extractKeywords, // Keyword extraction from natural language
|
|
185
|
+
// NOTE: QueryMemoryStore, HybridReranker, TriggerManager moved to Rust core
|
|
186
|
+
// Access via HyperAgentProxy/WASM runtime (SDK remains thin)
|
|
181
187
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rust-kgdb",
|
|
3
|
-
"version": "0.6.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.6.43",
|
|
4
|
+
"description": "High-performance RDF/SPARQL database with AI agent framework. GraphDB (449ns lookups, 35x faster than RDFox), GraphFrames analytics (PageRank, motifs), Datalog reasoning, HNSW vector embeddings. HyperMindAgent for schema-aware query generation with audit trails. W3C SPARQL 1.1 compliant. Native performance via Rust + NAPI-RS.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|
|
7
7
|
"napi": {
|
|
Binary file
|