rust-kgdb 0.6.9 → 0.6.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/README.archive.md +2632 -0
- package/README.md +839 -2267
- package/examples/fraud-detection-agent.js +458 -7
- package/examples/underwriting-agent.js +651 -20
- package/hypermind-agent.js +2221 -76
- package/index.js +28 -0
- package/ontology/agent-memory.ttl +421 -0
- package/package.json +10 -2
package/hypermind-agent.js
CHANGED
|
@@ -14,51 +14,1665 @@
|
|
|
14
14
|
|
|
15
15
|
const crypto = require('crypto')
|
|
16
16
|
|
|
17
|
+
// ============================================================================
|
|
18
|
+
// CONFIGURATION - All tunable parameters (NO hardcoding)
|
|
19
|
+
// ============================================================================
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* CONFIG - Centralized configuration for all tunable parameters
|
|
23
|
+
*
|
|
24
|
+
* Design Principle: No magic numbers in code. All thresholds, limits, and
|
|
25
|
+
* parameters are defined here and derived from schema where possible.
|
|
26
|
+
*/
|
|
27
|
+
const CONFIG = {
|
|
28
|
+
// Schema extraction limits (derived from KG size heuristics)
|
|
29
|
+
schema: {
|
|
30
|
+
maxClasses: 500,
|
|
31
|
+
maxProperties: 500,
|
|
32
|
+
maxSamples: 30,
|
|
33
|
+
fallbackLimit: 200,
|
|
34
|
+
cacheExpiryMs: 5 * 60 * 1000 // 5 minutes
|
|
35
|
+
},
|
|
36
|
+
|
|
37
|
+
// Query generation
|
|
38
|
+
query: {
|
|
39
|
+
defaultLimit: 100,
|
|
40
|
+
maxResultLimit: 1000
|
|
41
|
+
},
|
|
42
|
+
|
|
43
|
+
// Similarity and scoring (from research: TypeQL, Ologs)
|
|
44
|
+
scoring: {
|
|
45
|
+
similarityThreshold: 0.5, // Minimum Jaccard similarity for suggestions
|
|
46
|
+
validationConfidence: 0.95, // Confidence when validation passes
|
|
47
|
+
fallbackConfidence: 0.6 // Confidence when validation fails
|
|
48
|
+
},
|
|
49
|
+
|
|
50
|
+
// Memory temporal scoring (from agent-memory.ttl ontology)
|
|
51
|
+
memory: {
|
|
52
|
+
decayRate: 0.995, // Per hour (~12% per day)
|
|
53
|
+
weights: {
|
|
54
|
+
recency: 0.3,
|
|
55
|
+
relevance: 0.5,
|
|
56
|
+
importance: 0.2
|
|
57
|
+
},
|
|
58
|
+
defaultGraph: 'http://hypermind.ai/memory/'
|
|
59
|
+
},
|
|
60
|
+
|
|
61
|
+
// Graph algorithms (standard defaults)
|
|
62
|
+
algorithms: {
|
|
63
|
+
pageRank: {
|
|
64
|
+
dampingFactor: 0.85,
|
|
65
|
+
maxIterations: 20
|
|
66
|
+
},
|
|
67
|
+
embedding: {
|
|
68
|
+
k: 10,
|
|
69
|
+
threshold: 0.7
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
|
|
73
|
+
// LLM settings
|
|
74
|
+
llm: {
|
|
75
|
+
maxTokens: 1024,
|
|
76
|
+
temperature: 0.1, // Low for determinism
|
|
77
|
+
defaultConfidence: 0.8
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ============================================================================
|
|
82
|
+
// SCHEMA CACHE - Shared across all agents (Singleton Pattern)
|
|
83
|
+
// ============================================================================
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* SchemaCache - Global schema cache shared across all HyperMind agents
|
|
87
|
+
*
|
|
88
|
+
* Design Principles:
|
|
89
|
+
* 1. Once computed, schema is cached by signature hash
|
|
90
|
+
* 2. Same KG/ontology → same signature → cache hit
|
|
91
|
+
* 3. TTL-based expiry (configurable via CONFIG.schema.cacheExpiryMs)
|
|
92
|
+
* 4. Cross-agent sharing via singleton pattern
|
|
93
|
+
* 5. Thread-safe for Node.js (single-threaded event loop)
|
|
94
|
+
*
|
|
95
|
+
* Cache Key: Combination of:
|
|
96
|
+
* - KG base URI (for KG-derived schemas)
|
|
97
|
+
* - Ontology hash (for imported ontologies)
|
|
98
|
+
* - Schema signature hash
|
|
99
|
+
*
|
|
100
|
+
* This ensures:
|
|
101
|
+
* - Same input → same cached schema (determinism)
|
|
102
|
+
* - Multiple agents can share schema (efficiency)
|
|
103
|
+
* - Schema updates propagate after TTL (freshness)
|
|
104
|
+
*/
|
|
105
|
+
class SchemaCache {
|
|
106
|
+
constructor() {
|
|
107
|
+
this._cache = new Map() // key → { schema, timestamp, hits }
|
|
108
|
+
this._stats = { hits: 0, misses: 0, evictions: 0 }
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Generate cache key from KG and/or ontology
|
|
113
|
+
*/
|
|
114
|
+
_generateKey(kgBaseUri, ontologyHash) {
|
|
115
|
+
const parts = []
|
|
116
|
+
if (kgBaseUri) parts.push(`kg:${kgBaseUri}`)
|
|
117
|
+
if (ontologyHash) parts.push(`onto:${ontologyHash}`)
|
|
118
|
+
return parts.join('|') || 'default'
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Get schema from cache (if valid)
|
|
123
|
+
* @returns {SchemaContext|null}
|
|
124
|
+
*/
|
|
125
|
+
get(kgBaseUri, ontologyHash = null) {
|
|
126
|
+
const key = this._generateKey(kgBaseUri, ontologyHash)
|
|
127
|
+
const entry = this._cache.get(key)
|
|
128
|
+
|
|
129
|
+
if (!entry) {
|
|
130
|
+
this._stats.misses++
|
|
131
|
+
return null
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Check TTL expiry
|
|
135
|
+
const age = Date.now() - entry.timestamp
|
|
136
|
+
if (age > CONFIG.schema.cacheExpiryMs) {
|
|
137
|
+
this._cache.delete(key)
|
|
138
|
+
this._stats.evictions++
|
|
139
|
+
this._stats.misses++
|
|
140
|
+
return null
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
entry.hits++
|
|
144
|
+
this._stats.hits++
|
|
145
|
+
return entry.schema
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Store schema in cache
|
|
150
|
+
*/
|
|
151
|
+
set(kgBaseUri, schema, ontologyHash = null) {
|
|
152
|
+
const key = this._generateKey(kgBaseUri, ontologyHash)
|
|
153
|
+
this._cache.set(key, {
|
|
154
|
+
schema,
|
|
155
|
+
timestamp: Date.now(),
|
|
156
|
+
hits: 0
|
|
157
|
+
})
|
|
158
|
+
return this
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Get or compute schema (cache-aside pattern)
|
|
163
|
+
* @param {string} kgBaseUri - KG identifier
|
|
164
|
+
* @param {Function} computeFn - Async function to compute schema if not cached
|
|
165
|
+
* @param {string} ontologyHash - Optional ontology hash
|
|
166
|
+
* @returns {Promise<SchemaContext>}
|
|
167
|
+
*/
|
|
168
|
+
async getOrCompute(kgBaseUri, computeFn, ontologyHash = null) {
|
|
169
|
+
// Try cache first
|
|
170
|
+
const cached = this.get(kgBaseUri, ontologyHash)
|
|
171
|
+
if (cached) return cached
|
|
172
|
+
|
|
173
|
+
// Compute and cache
|
|
174
|
+
const schema = await computeFn()
|
|
175
|
+
this.set(kgBaseUri, schema, ontologyHash)
|
|
176
|
+
return schema
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Invalidate cache entry
|
|
181
|
+
*/
|
|
182
|
+
invalidate(kgBaseUri, ontologyHash = null) {
|
|
183
|
+
const key = this._generateKey(kgBaseUri, ontologyHash)
|
|
184
|
+
this._cache.delete(key)
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Clear entire cache
|
|
189
|
+
*/
|
|
190
|
+
clear() {
|
|
191
|
+
this._cache.clear()
|
|
192
|
+
this._stats = { hits: 0, misses: 0, evictions: 0 }
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Get cache statistics
|
|
197
|
+
*/
|
|
198
|
+
getStats() {
|
|
199
|
+
return {
|
|
200
|
+
...this._stats,
|
|
201
|
+
size: this._cache.size,
|
|
202
|
+
hitRate: this._stats.hits / (this._stats.hits + this._stats.misses) || 0
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Global singleton instance - shared across all agents
|
|
208
|
+
const SCHEMA_CACHE = new SchemaCache()
|
|
209
|
+
|
|
210
|
+
// ============================================================================
|
|
211
|
+
// SCHEMA-AWARE GRAPHDB WRAPPER - Auto schema extraction on load
|
|
212
|
+
// ============================================================================
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* SchemaAwareGraphDB - Wrapper that auto-extracts schema after load operations
|
|
216
|
+
*
|
|
217
|
+
* Design: Schema extraction is an INTERNAL part of the engine.
|
|
218
|
+
* When data is loaded, schema is extracted ONCE and cached globally.
|
|
219
|
+
*
|
|
220
|
+
* Architecture:
|
|
221
|
+
* 1. Wraps native GraphDb instance
|
|
222
|
+
* 2. Intercepts loadTtl(), loadNtriples() methods
|
|
223
|
+
* 3. After load completes, triggers ASYNC schema extraction
|
|
224
|
+
* 4. Schema stored in global SCHEMA_CACHE for cross-agent sharing
|
|
225
|
+
*
|
|
226
|
+
* Usage:
|
|
227
|
+
* ```javascript
|
|
228
|
+
* const db = new SchemaAwareGraphDB('http://example.org/')
|
|
229
|
+
* await db.loadTtl(ttlData, null) // Schema extracted automatically!
|
|
230
|
+
* const schema = db.getSchema() // Instant access to cached schema
|
|
231
|
+
* ```
|
|
232
|
+
*
|
|
233
|
+
* Mathematical Foundation:
|
|
234
|
+
* - Schema = Category where Objects = Classes, Morphisms = Properties
|
|
235
|
+
* - Load operation = Functor from RDF Instance → Schema Category
|
|
236
|
+
* - Cache = Memoization of functor application
|
|
237
|
+
*/
|
|
238
|
+
class SchemaAwareGraphDB {
|
|
239
|
+
/**
|
|
240
|
+
* @param {string|Object} baseUriOrNativeDb - Base URI string or existing GraphDb instance
|
|
241
|
+
* @param {Object} options - Configuration options
|
|
242
|
+
* @param {string} options.ontology - Pre-built ontology TTL (BYOO)
|
|
243
|
+
* @param {boolean} options.autoExtract - Auto-extract schema on load (default: true)
|
|
244
|
+
* @param {string} options.kgId - Unique identifier for this KG (for cache key)
|
|
245
|
+
*/
|
|
246
|
+
constructor(baseUriOrNativeDb, options = {}) {
|
|
247
|
+
// Handle both string (create new) and object (wrap existing)
|
|
248
|
+
if (typeof baseUriOrNativeDb === 'string') {
|
|
249
|
+
// Lazy load native GraphDb to avoid circular dependency
|
|
250
|
+
const { GraphDb } = require('./index')
|
|
251
|
+
this._db = new GraphDb(baseUriOrNativeDb)
|
|
252
|
+
this._baseUri = baseUriOrNativeDb
|
|
253
|
+
} else if (baseUriOrNativeDb && typeof baseUriOrNativeDb.querySelect === 'function') {
|
|
254
|
+
// Wrap existing GraphDb instance
|
|
255
|
+
this._db = baseUriOrNativeDb
|
|
256
|
+
this._baseUri = baseUriOrNativeDb.baseUri || options.kgId || 'wrapped-kg'
|
|
257
|
+
} else {
|
|
258
|
+
throw new Error('SchemaAwareGraphDB requires a base URI string or GraphDb instance')
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Configuration
|
|
262
|
+
this._autoExtract = options.autoExtract !== false // Default: true
|
|
263
|
+
this._kgId = options.kgId || this._baseUri
|
|
264
|
+
this._ontologyTtl = options.ontology || null
|
|
265
|
+
|
|
266
|
+
// Schema state
|
|
267
|
+
this._schema = null
|
|
268
|
+
this._schemaPromise = null
|
|
269
|
+
this._schemaReady = false
|
|
270
|
+
this._schemaExtracted = false // Has initial extraction been done?
|
|
271
|
+
this._dataModified = false // Has data been modified since last extraction?
|
|
272
|
+
|
|
273
|
+
// If ontology provided, parse it immediately
|
|
274
|
+
if (this._ontologyTtl) {
|
|
275
|
+
this._initOntologySchema()
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Initialize schema from provided ontology (synchronous)
|
|
281
|
+
*/
|
|
282
|
+
_initOntologySchema() {
|
|
283
|
+
const ontologyHash = this._computeHash(this._ontologyTtl)
|
|
284
|
+
const cached = SCHEMA_CACHE.get(this._kgId, ontologyHash)
|
|
285
|
+
if (cached) {
|
|
286
|
+
this._schema = cached
|
|
287
|
+
this._schemaReady = true
|
|
288
|
+
return
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// Parse ontology synchronously (it's just string parsing)
|
|
292
|
+
this._schema = SchemaContext.fromOntology(this._db, this._ontologyTtl, {
|
|
293
|
+
source: 'ontology',
|
|
294
|
+
graphUri: 'http://hypermind.ai/ontology/'
|
|
295
|
+
})
|
|
296
|
+
SCHEMA_CACHE.set(this._kgId, this._schema, ontologyHash)
|
|
297
|
+
this._schemaReady = true
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Simple hash for cache keys
|
|
302
|
+
*/
|
|
303
|
+
_computeHash(str) {
|
|
304
|
+
if (!str) return null
|
|
305
|
+
let hash = 0
|
|
306
|
+
for (let i = 0; i < Math.min(str.length, 500); i++) {
|
|
307
|
+
hash = ((hash << 5) - hash) + str.charCodeAt(i)
|
|
308
|
+
hash = hash & hash
|
|
309
|
+
}
|
|
310
|
+
return 'h_' + Math.abs(hash).toString(16)
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Trigger async schema extraction (non-blocking)
|
|
315
|
+
*
|
|
316
|
+
* TRIGGER CONDITIONS (schema extraction happens ONLY when):
|
|
317
|
+
* 1. loadTtl() or loadNtriples() called (new data)
|
|
318
|
+
* 2. updateInsert() called (data modified)
|
|
319
|
+
* 3. refreshSchema() explicitly called
|
|
320
|
+
* 4. First time (no schema yet)
|
|
321
|
+
*
|
|
322
|
+
* NO TRIGGER (reuses existing schema):
|
|
323
|
+
* - waitForSchema() - just waits for existing
|
|
324
|
+
* - getSchema() - returns cached
|
|
325
|
+
* - querySelect() - read only
|
|
326
|
+
*
|
|
327
|
+
* RACE CONDITION HANDLING:
|
|
328
|
+
* - If agent requests schema before extraction completes, it waits
|
|
329
|
+
* - If schema already in cache (TTL not expired), returns immediately
|
|
330
|
+
* - Promise is stored so multiple waiters share the same extraction
|
|
331
|
+
*
|
|
332
|
+
* @param {boolean} forceExtract - Force new extraction (used by load/insert)
|
|
333
|
+
*/
|
|
334
|
+
_triggerSchemaExtraction(forceExtract = false) {
|
|
335
|
+
if (!this._autoExtract) return Promise.resolve(null)
|
|
336
|
+
|
|
337
|
+
// If schema already extracted and no data modifications, return existing
|
|
338
|
+
if (!forceExtract && this._schemaExtracted && this._schema && !this._dataModified) {
|
|
339
|
+
this._schemaReady = true
|
|
340
|
+
return Promise.resolve(this._schema)
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// If extraction already in progress, return existing promise (deduplication)
|
|
344
|
+
if (this._schemaPromise) return this._schemaPromise
|
|
345
|
+
|
|
346
|
+
this._schemaPromise = (async () => {
|
|
347
|
+
try {
|
|
348
|
+
// Check cache first (covers TTL case - if cached and no modifications, use it)
|
|
349
|
+
if (!forceExtract && !this._dataModified) {
|
|
350
|
+
const cached = SCHEMA_CACHE.get(this._kgId)
|
|
351
|
+
if (cached) {
|
|
352
|
+
this._schema = cached
|
|
353
|
+
this._schemaReady = true
|
|
354
|
+
this._schemaExtracted = true
|
|
355
|
+
return cached
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// Extract from KG (async)
|
|
360
|
+
const kgSchema = await SchemaContext.fromKG(this._db)
|
|
361
|
+
|
|
362
|
+
// If we have ontology, merge; otherwise use KG schema
|
|
363
|
+
if (this._ontologyTtl && this._schema) {
|
|
364
|
+
this._schema = SchemaContext.merge(this._schema, kgSchema)
|
|
365
|
+
} else {
|
|
366
|
+
this._schema = kgSchema
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// Cache globally
|
|
370
|
+
SCHEMA_CACHE.set(this._kgId, this._schema)
|
|
371
|
+
this._schemaReady = true
|
|
372
|
+
this._schemaExtracted = true
|
|
373
|
+
this._dataModified = false // Reset modification flag
|
|
374
|
+
|
|
375
|
+
return this._schema
|
|
376
|
+
} catch (err) {
|
|
377
|
+
// Schema extraction failed - continue without schema
|
|
378
|
+
console.warn('Schema extraction failed:', err.message)
|
|
379
|
+
this._schemaReady = true
|
|
380
|
+
this._schemaExtracted = true
|
|
381
|
+
return null
|
|
382
|
+
} finally {
|
|
383
|
+
// Keep promise for a short time to handle rapid sequential calls
|
|
384
|
+
setTimeout(() => { this._schemaPromise = null }, 100)
|
|
385
|
+
}
|
|
386
|
+
})()
|
|
387
|
+
|
|
388
|
+
return this._schemaPromise
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
/**
|
|
392
|
+
* Wait for schema to be ready (BLOCKING for callers)
|
|
393
|
+
*
|
|
394
|
+
* This is the KEY method for handling race conditions:
|
|
395
|
+
* - If schema already ready → returns immediately
|
|
396
|
+
* - If extraction in progress → waits for completion
|
|
397
|
+
* - If not started → triggers extraction and waits
|
|
398
|
+
*
|
|
399
|
+
* Usage:
|
|
400
|
+
* ```javascript
|
|
401
|
+
* const db = new SchemaAwareGraphDB('http://example.org/')
|
|
402
|
+
* db.loadTtl(data, null) // Triggers async extraction
|
|
403
|
+
*
|
|
404
|
+
* // ... agent starts ...
|
|
405
|
+
* const schema = await db.waitForSchema() // Waits if needed
|
|
406
|
+
* // Now schema is guaranteed to be ready
|
|
407
|
+
* ```
|
|
408
|
+
*
|
|
409
|
+
* @param {number} timeoutMs - Maximum time to wait (default: 30000ms)
|
|
410
|
+
* @returns {Promise<SchemaContext>}
|
|
411
|
+
*/
|
|
412
|
+
async waitForSchema(timeoutMs = 30000) {
|
|
413
|
+
// Fast path: schema already ready
|
|
414
|
+
if (this._schemaReady && this._schema) {
|
|
415
|
+
return this._schema
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// Check cache (might have been populated by another agent)
|
|
419
|
+
const cached = SCHEMA_CACHE.get(this._kgId)
|
|
420
|
+
if (cached) {
|
|
421
|
+
this._schema = cached
|
|
422
|
+
this._schemaReady = true
|
|
423
|
+
return cached
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// Wait for in-progress extraction or start new one
|
|
427
|
+
const extractionPromise = this._schemaPromise || this._triggerSchemaExtraction()
|
|
428
|
+
if (!extractionPromise) {
|
|
429
|
+
return null // autoExtract disabled
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// Race between extraction and timeout
|
|
433
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
434
|
+
setTimeout(() => reject(new Error(`Schema extraction timeout after ${timeoutMs}ms`)), timeoutMs)
|
|
435
|
+
})
|
|
436
|
+
|
|
437
|
+
try {
|
|
438
|
+
return await Promise.race([extractionPromise, timeoutPromise])
|
|
439
|
+
} catch (err) {
|
|
440
|
+
// Timeout or error - return whatever we have
|
|
441
|
+
console.warn('waitForSchema:', err.message)
|
|
442
|
+
return this._schema || null
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// =========================================================================
|
|
447
|
+
// WRAPPED METHODS - Intercept load operations for auto schema extraction
|
|
448
|
+
// =========================================================================
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Load TTL data with automatic schema extraction
|
|
452
|
+
*
|
|
453
|
+
* Schema extraction is triggered ONCE after load completes.
|
|
454
|
+
* Subsequent loads will re-trigger extraction.
|
|
455
|
+
*
|
|
456
|
+
* @param {string} data - TTL/Turtle format data
|
|
457
|
+
* @param {string|null} graphUri - Named graph URI (null for default graph)
|
|
458
|
+
*/
|
|
459
|
+
loadTtl(data, graphUri) {
|
|
460
|
+
const result = this._db.loadTtl(data, graphUri)
|
|
461
|
+
|
|
462
|
+
// Mark data as modified - schema needs refresh
|
|
463
|
+
this._dataModified = true
|
|
464
|
+
this._schemaReady = false
|
|
465
|
+
|
|
466
|
+
// Trigger async schema extraction (non-blocking)
|
|
467
|
+
// Schema will be ready by the time queries are issued
|
|
468
|
+
this._triggerSchemaExtraction(true) // forceExtract = true
|
|
469
|
+
|
|
470
|
+
return result
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
/**
|
|
474
|
+
* Load N-Triples data with automatic schema extraction
|
|
475
|
+
*/
|
|
476
|
+
loadNtriples(data, graphUri) {
|
|
477
|
+
const result = this._db.loadNtriples(data, graphUri)
|
|
478
|
+
|
|
479
|
+
// Mark data as modified
|
|
480
|
+
this._dataModified = true
|
|
481
|
+
this._schemaReady = false
|
|
482
|
+
|
|
483
|
+
this._triggerSchemaExtraction(true) // forceExtract = true
|
|
484
|
+
return result
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
// =========================================================================
|
|
488
|
+
// SCHEMA ACCESS METHODS
|
|
489
|
+
// =========================================================================
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Get extracted schema (synchronous - returns cached or null)
|
|
493
|
+
* @returns {SchemaContext|null}
|
|
494
|
+
*/
|
|
495
|
+
getSchema() {
|
|
496
|
+
return this._schema
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
/**
|
|
500
|
+
* Wait for schema extraction to complete
|
|
501
|
+
* @returns {Promise<SchemaContext>}
|
|
502
|
+
*/
|
|
503
|
+
async getSchemaAsync() {
|
|
504
|
+
if (this._schemaReady && this._schema) {
|
|
505
|
+
return this._schema
|
|
506
|
+
}
|
|
507
|
+
if (this._schemaPromise) {
|
|
508
|
+
return this._schemaPromise
|
|
509
|
+
}
|
|
510
|
+
// Trigger extraction if not started
|
|
511
|
+
return this._triggerSchemaExtraction()
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* Check if schema is ready (non-blocking)
|
|
516
|
+
*/
|
|
517
|
+
isSchemaReady() {
|
|
518
|
+
return this._schemaReady
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
/**
|
|
522
|
+
* Force schema refresh
|
|
523
|
+
*/
|
|
524
|
+
async refreshSchema() {
|
|
525
|
+
SCHEMA_CACHE.invalidate(this._kgId)
|
|
526
|
+
this._schemaReady = false
|
|
527
|
+
this._schema = null
|
|
528
|
+
this._schemaPromise = null
|
|
529
|
+
return this._triggerSchemaExtraction()
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// =========================================================================
|
|
533
|
+
// PASSTHROUGH METHODS - Delegate to underlying GraphDb
|
|
534
|
+
// =========================================================================
|
|
535
|
+
|
|
536
|
+
querySelect(sparql) {
|
|
537
|
+
return this._db.querySelect(sparql)
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
queryAsk(sparql) {
|
|
541
|
+
return this._db.queryAsk(sparql)
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
queryConstruct(sparql) {
|
|
545
|
+
return this._db.queryConstruct(sparql)
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
updateInsert(sparql) {
|
|
549
|
+
const result = this._db.updateInsert(sparql)
|
|
550
|
+
// Schema might change after INSERT - mark for lazy refresh
|
|
551
|
+
this._dataModified = true
|
|
552
|
+
this._schemaReady = false
|
|
553
|
+
// Don't trigger extraction immediately - wait until schema is actually needed
|
|
554
|
+
// This is more efficient for batch inserts
|
|
555
|
+
return result
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
updateDelete(sparql) {
|
|
559
|
+
const result = this._db.updateDelete(sparql)
|
|
560
|
+
// Schema might change after DELETE (properties/classes removed)
|
|
561
|
+
this._dataModified = true
|
|
562
|
+
this._schemaReady = false
|
|
563
|
+
return result
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
count() {
|
|
567
|
+
return this._db.count()
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
countTriples() {
|
|
571
|
+
return this._db.countTriples ? this._db.countTriples() : this._db.count()
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
clear() {
|
|
575
|
+
const result = this._db.clear()
|
|
576
|
+
// Clear schema cache too
|
|
577
|
+
SCHEMA_CACHE.invalidate(this._kgId)
|
|
578
|
+
this._schema = null
|
|
579
|
+
this._schemaReady = false
|
|
580
|
+
return result
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
getVersion() {
|
|
584
|
+
return this._db.getVersion ? this._db.getVersion() : 'unknown'
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
getGraphUri() {
|
|
588
|
+
return this._db.getGraphUri ? this._db.getGraphUri() : this._baseUri
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
/**
|
|
592
|
+
* Get underlying native GraphDb instance
|
|
593
|
+
*/
|
|
594
|
+
getNativeDb() {
|
|
595
|
+
return this._db
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
/**
|
|
599
|
+
* Get KG identifier (for cache key)
|
|
600
|
+
*/
|
|
601
|
+
getKgId() {
|
|
602
|
+
return this._kgId
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
/**
|
|
607
|
+
* Factory function to create schema-aware GraphDB
|
|
608
|
+
*
|
|
609
|
+
* Usage:
|
|
610
|
+
* ```javascript
|
|
611
|
+
* const db = createSchemaAwareGraphDB('http://example.org/', {
|
|
612
|
+
* ontology: insuranceOntologyTtl, // Optional: BYOO
|
|
613
|
+
* autoExtract: true // Default: true
|
|
614
|
+
* })
|
|
615
|
+
* ```
|
|
616
|
+
*/
|
|
617
|
+
function createSchemaAwareGraphDB(baseUri, options = {}) {
|
|
618
|
+
return new SchemaAwareGraphDB(baseUri, options)
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
/**
|
|
622
|
+
* Wrap existing GraphDb with schema awareness
|
|
623
|
+
*
|
|
624
|
+
* Usage:
|
|
625
|
+
* ```javascript
|
|
626
|
+
* const nativeDb = new GraphDb('http://example.org/')
|
|
627
|
+
* const smartDb = wrapWithSchemaAwareness(nativeDb, { kgId: 'my-kg' })
|
|
628
|
+
* ```
|
|
629
|
+
*/
|
|
630
|
+
function wrapWithSchemaAwareness(nativeDb, options = {}) {
|
|
631
|
+
return new SchemaAwareGraphDB(nativeDb, options)
|
|
632
|
+
}
|
|
633
|
+
|
|
17
634
|
// ============================================================================
|
|
18
635
|
// TYPE SYSTEM (Hindley-Milner + Refinement Types)
|
|
19
636
|
// ============================================================================
|
|
20
637
|
|
|
21
|
-
/**
|
|
22
|
-
* TypeId - Complete type system ensuring no hallucination
|
|
23
|
-
* Every value has a proof of its type correctness
|
|
24
|
-
*/
|
|
25
|
-
const TypeId = {
|
|
26
|
-
// Base types
|
|
27
|
-
String: 'String',
|
|
28
|
-
Int64: 'Int64',
|
|
29
|
-
Float64: 'Float64',
|
|
30
|
-
Bool: 'Bool',
|
|
31
|
-
Unit: 'Unit',
|
|
638
|
+
/**
|
|
639
|
+
* TypeId - Complete type system ensuring no hallucination
|
|
640
|
+
* Every value has a proof of its type correctness
|
|
641
|
+
*/
|
|
642
|
+
const TypeId = {
|
|
643
|
+
// Base types
|
|
644
|
+
String: 'String',
|
|
645
|
+
Int64: 'Int64',
|
|
646
|
+
Float64: 'Float64',
|
|
647
|
+
Bool: 'Bool',
|
|
648
|
+
Unit: 'Unit',
|
|
649
|
+
|
|
650
|
+
// RDF-native types (knowledge graph first-class citizens)
|
|
651
|
+
Node: 'Node',
|
|
652
|
+
Triple: 'Triple',
|
|
653
|
+
Quad: 'Quad',
|
|
654
|
+
BindingSet: 'BindingSet',
|
|
655
|
+
|
|
656
|
+
// Compound types (higher-kinded)
|
|
657
|
+
List: (t) => `List<${t}>`,
|
|
658
|
+
Option: (t) => `Option<${t}>`,
|
|
659
|
+
Result: (t, e) => `Result<${t}, ${e}>`,
|
|
660
|
+
Map: (k, v) => `Map<${k}, ${v}>`,
|
|
661
|
+
|
|
662
|
+
// Refinement types (business domain values with constraints)
|
|
663
|
+
RiskScore: 'RiskScore', // Float64 where 0.0 <= x <= 1.0
|
|
664
|
+
PolicyNumber: 'PolicyNumber', // String matching /^POL-\d{4}-\d{4}$/
|
|
665
|
+
ClaimAmount: 'ClaimAmount', // Currency where amount > 0
|
|
666
|
+
ClaimId: 'ClaimId', // String matching /^CLM-\d{4}-\d+$/
|
|
667
|
+
CreditScore: 'CreditScore', // Int64 where 300 <= x <= 850
|
|
668
|
+
ConfidenceScore: 'ConfidenceScore', // Float64 where 0.0 <= x <= 1.0
|
|
669
|
+
|
|
670
|
+
// Schema types (for type-safe graph queries)
|
|
671
|
+
SchemaType: (name) => `Schema<${name}>`,
|
|
672
|
+
|
|
673
|
+
// Type checking utilities
|
|
674
|
+
isCompatible: (output, input) => {
|
|
675
|
+
if (output === input) return true
|
|
676
|
+
if (output === 'BindingSet' && input === 'String') return true
|
|
677
|
+
if (output.startsWith && output.startsWith('List<') && input === 'String') return true
|
|
678
|
+
return false
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
// ============================================================================
|
|
683
|
+
// CONTEXT THEORY - Type-theoretic foundations for SPARQL validation
|
|
684
|
+
// ============================================================================
|
|
685
|
+
|
|
686
|
+
/**
|
|
687
|
+
* SchemaContext (Γ) - Type-theoretic context for knowledge graph schema
|
|
688
|
+
*
|
|
689
|
+
* Mathematical Foundation (David Spivak's Ologs + Functorial Data Migration):
|
|
690
|
+
* - Schema S is a category where Objects = Classes, Morphisms = Properties
|
|
691
|
+
* - Context Γ = (Classes, Properties, Domains, Ranges, Constraints)
|
|
692
|
+
* - Type Judgment: Γ ⊢ e : τ ("in context Γ, expression e has type τ")
|
|
693
|
+
*
|
|
694
|
+
* References:
|
|
695
|
+
* - Spivak & Kent, "Ologs: A Categorical Framework for Knowledge Representation" (2012)
|
|
696
|
+
* - Spivak, "Functorial Data Migration" (2012)
|
|
697
|
+
* - TypeQL: "A Type-Theoretic & Polymorphic Query Language" (2024)
|
|
698
|
+
*/
|
|
699
|
+
class SchemaContext {
|
|
700
|
+
constructor() {
|
|
701
|
+
// Classes (objects in schema category)
|
|
702
|
+
this.classes = new Map() // className → { uri, superclasses, constraints }
|
|
703
|
+
|
|
704
|
+
// Properties (morphisms in schema category)
|
|
705
|
+
this.properties = new Map() // propName → { uri, domain, range, functional, inverse }
|
|
706
|
+
|
|
707
|
+
// Variable bindings (typing context Γ)
|
|
708
|
+
this.bindings = new Map() // ?var → Type
|
|
709
|
+
|
|
710
|
+
// Path equations (functorial constraints)
|
|
711
|
+
this.pathEquations = [] // [{ lhs: [p1, p2], rhs: [p3] }] meaning p1;p2 = p3
|
|
712
|
+
|
|
713
|
+
// Schema signature hash (for determinism)
|
|
714
|
+
this._signatureHash = null
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
/**
|
|
718
|
+
* Build context from knowledge graph schema (Functorial extraction)
|
|
719
|
+
*
|
|
720
|
+
* Design: Schema is derived from KG, not hardcoded.
|
|
721
|
+
* This implements Spivak's Ologs: KG Instance → Schema Category
|
|
722
|
+
*
|
|
723
|
+
* Research-backed scalability for Enterprise KGs (ISWC 2024, ABSTAT-HD):
|
|
724
|
+
* 1. VoID-first: Try VoID descriptions (O(1) if available)
|
|
725
|
+
* 2. RDFS/OWL metadata: Extract explicit schema declarations
|
|
726
|
+
* 3. Frequency-based sampling: For very large KGs, sample by predicate frequency
|
|
727
|
+
* 4. ShEx generation: Human-readable schema for LLM consumption
|
|
728
|
+
*
|
|
729
|
+
* References:
|
|
730
|
+
* - VoID: https://www.w3.org/TR/void/
|
|
731
|
+
* - ABSTAT-HD: Scalable KG profiling
|
|
732
|
+
* - sparql-llm: RAG over SPARQL endpoints (2024)
|
|
733
|
+
*/
|
|
734
|
+
static async fromKG(kg, options = {}) {
|
|
735
|
+
const ctx = new SchemaContext()
|
|
736
|
+
|
|
737
|
+
if (!kg) return ctx
|
|
738
|
+
|
|
739
|
+
// Merge options with CONFIG (allows override for enterprise scale)
|
|
740
|
+
const config = {
|
|
741
|
+
maxClasses: options.maxClasses || CONFIG.schema.maxClasses,
|
|
742
|
+
maxProperties: options.maxProperties || CONFIG.schema.maxProperties,
|
|
743
|
+
fallbackLimit: options.fallbackLimit || CONFIG.schema.fallbackLimit,
|
|
744
|
+
sampleSize: options.sampleSize || CONFIG.schema.maxSamples,
|
|
745
|
+
useExplicitSchemaOnly: options.useExplicitSchemaOnly || false,
|
|
746
|
+
useVoID: options.useVoID !== false // Try VoID by default
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
try {
|
|
750
|
+
// STRATEGY 1: Try VoID descriptions first (research-backed best practice)
|
|
751
|
+
// VoID provides schema metadata in O(1) if available
|
|
752
|
+
if (config.useVoID) {
|
|
753
|
+
const voidQuery = `
|
|
754
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
755
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
756
|
+
SELECT DISTINCT ?prop ?class WHERE {
|
|
757
|
+
{ [] void:property ?prop }
|
|
758
|
+
UNION
|
|
759
|
+
{ [] void:class ?class }
|
|
760
|
+
UNION
|
|
761
|
+
{ [] void:classPartition [ void:class ?class ] }
|
|
762
|
+
UNION
|
|
763
|
+
{ [] void:propertyPartition [ void:property ?prop ] }
|
|
764
|
+
} LIMIT ${config.maxProperties}
|
|
765
|
+
`
|
|
766
|
+
try {
|
|
767
|
+
const voidResults = kg.querySelect(voidQuery)
|
|
768
|
+
for (const r of voidResults) {
|
|
769
|
+
const prop = r.bindings?.prop || r.prop
|
|
770
|
+
const cls = r.bindings?.class || r.class
|
|
771
|
+
if (prop) ctx.properties.set(prop, { uri: prop, domain: null, range: null, functional: false, source: 'void' })
|
|
772
|
+
if (cls) ctx.classes.set(cls, { uri: cls, superclasses: [], constraints: [], source: 'void' })
|
|
773
|
+
}
|
|
774
|
+
} catch (e) {
|
|
775
|
+
// VoID not available, continue with other strategies
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
// STRATEGY 2: Extract RDFS/OWL explicit schema (if VoID incomplete)
|
|
780
|
+
if (ctx.classes.size < 10) {
|
|
781
|
+
const classQuery = `
|
|
782
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
783
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
784
|
+
SELECT DISTINCT ?class ?super ?label WHERE {
|
|
785
|
+
{ ?class a rdfs:Class } UNION { ?class a owl:Class }
|
|
786
|
+
OPTIONAL { ?class rdfs:subClassOf ?super }
|
|
787
|
+
OPTIONAL { ?class rdfs:label ?label }
|
|
788
|
+
} LIMIT ${config.maxClasses}
|
|
789
|
+
`
|
|
790
|
+
const classResults = kg.querySelect(classQuery)
|
|
791
|
+
for (const r of classResults) {
|
|
792
|
+
const cls = r.bindings?.class || r.class
|
|
793
|
+
const sup = r.bindings?.super || r.super
|
|
794
|
+
const label = r.bindings?.label || r.label
|
|
795
|
+
if (cls && !ctx.classes.has(cls)) {
|
|
796
|
+
ctx.classes.set(cls, { uri: cls, label, superclasses: sup ? [sup] : [], constraints: [], source: 'rdfs' })
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
// STRATEGY 3: Extract property morphisms with domain/range
|
|
802
|
+
if (ctx.properties.size < 10) {
|
|
803
|
+
const propQuery = `
|
|
804
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
805
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
806
|
+
SELECT DISTINCT ?prop ?domain ?range ?label WHERE {
|
|
807
|
+
{ ?prop a rdf:Property } UNION { ?prop a owl:ObjectProperty } UNION { ?prop a owl:DatatypeProperty }
|
|
808
|
+
OPTIONAL { ?prop rdfs:domain ?domain }
|
|
809
|
+
OPTIONAL { ?prop rdfs:range ?range }
|
|
810
|
+
OPTIONAL { ?prop rdfs:label ?label }
|
|
811
|
+
} LIMIT ${config.maxProperties}
|
|
812
|
+
`
|
|
813
|
+
const propResults = kg.querySelect(propQuery)
|
|
814
|
+
for (const r of propResults) {
|
|
815
|
+
const prop = r.bindings?.prop || r.prop
|
|
816
|
+
if (prop && !ctx.properties.has(prop)) {
|
|
817
|
+
ctx.properties.set(prop, {
|
|
818
|
+
uri: prop,
|
|
819
|
+
label: r.bindings?.label || r.label || null,
|
|
820
|
+
domain: r.bindings?.domain || r.domain || null,
|
|
821
|
+
range: r.bindings?.range || r.range || null,
|
|
822
|
+
functional: false,
|
|
823
|
+
source: 'rdfs'
|
|
824
|
+
})
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
// STRATEGY 4: Frequency-based sampling (for large KGs without explicit schema)
|
|
830
|
+
// This is O(sample_size), not O(total_triples) - ABSTAT-HD approach
|
|
831
|
+
if (ctx.properties.size === 0 && !config.useExplicitSchemaOnly) {
|
|
832
|
+
const instanceQuery = `SELECT DISTINCT ?p WHERE { ?s ?p ?o } LIMIT ${config.fallbackLimit}`
|
|
833
|
+
const instResults = kg.querySelect(instanceQuery)
|
|
834
|
+
for (const r of instResults) {
|
|
835
|
+
const prop = r.bindings?.p || r.p
|
|
836
|
+
if (prop) ctx.properties.set(prop, { uri: prop, domain: null, range: null, functional: false, source: 'instance' })
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
// STRATEGY 5: Infer classes from rdf:type usage (statistical sampling)
|
|
841
|
+
if (ctx.classes.size === 0 && !config.useExplicitSchemaOnly) {
|
|
842
|
+
const typeQuery = `
|
|
843
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
844
|
+
SELECT DISTINCT ?type WHERE { ?s rdf:type ?type } LIMIT ${config.fallbackLimit}
|
|
845
|
+
`
|
|
846
|
+
const typeResults = kg.querySelect(typeQuery)
|
|
847
|
+
for (const r of typeResults) {
|
|
848
|
+
const cls = r.bindings?.type || r.type
|
|
849
|
+
if (cls) ctx.classes.set(cls, { uri: cls, superclasses: [], constraints: [], source: 'instance' })
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
ctx._computeSignature()
|
|
854
|
+
} catch (err) {
|
|
855
|
+
// Schema extraction failed - return empty context
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
return ctx
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
/**
|
|
862
|
+
* Build context from existing ontology (Bring Your Own Ontology - BYOO)
|
|
863
|
+
*
|
|
864
|
+
* For enterprise organizations with dedicated ontology teams,
|
|
865
|
+
* this allows importing pre-built ontologies rather than deriving from KG.
|
|
866
|
+
*
|
|
867
|
+
* Supported formats:
|
|
868
|
+
* - TTL (Turtle) - Most common for ontologies
|
|
869
|
+
* - OWL/RDF/XML via KG loader
|
|
870
|
+
* - ShEx/SHACL shapes
|
|
871
|
+
*
|
|
872
|
+
* Design: Ontology-first approach aligns with enterprise data governance
|
|
873
|
+
* where schema is controlled and versioned separately from instance data.
|
|
874
|
+
*
|
|
875
|
+
* Mathematical Foundation (Spivak Ologs):
|
|
876
|
+
* - Classes map to Objects in schema category
|
|
877
|
+
* - Properties map to Morphisms with domain/range
|
|
878
|
+
* - Subclass relations map to functorial embeddings
|
|
879
|
+
*
|
|
880
|
+
* @param {Object} kg - GraphDB instance to load ontology into (optional)
|
|
881
|
+
* @param {string} ontologyTtl - Ontology in TTL format
|
|
882
|
+
* @param {Object} options - Configuration options
|
|
883
|
+
* @returns {SchemaContext} Populated schema context
|
|
884
|
+
*/
|
|
885
|
+
static fromOntology(kg, ontologyTtl, options = {}) {
|
|
886
|
+
const ctx = new SchemaContext()
|
|
887
|
+
|
|
888
|
+
if (!ontologyTtl || typeof ontologyTtl !== 'string') {
|
|
889
|
+
return ctx
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
// Source marker for provenance
|
|
893
|
+
const source = options.source || 'ontology'
|
|
894
|
+
const namespace = options.namespace || 'http://example.org/'
|
|
895
|
+
|
|
896
|
+
// If KG provided, load ontology into a named graph for querying
|
|
897
|
+
let loadedKg = kg
|
|
898
|
+
if (kg && typeof kg.loadTtl === 'function') {
|
|
899
|
+
try {
|
|
900
|
+
const graphUri = options.graphUri || 'http://hypermind.ai/ontology/'
|
|
901
|
+
kg.loadTtl(ontologyTtl, graphUri)
|
|
902
|
+
loadedKg = kg
|
|
903
|
+
} catch (e) {
|
|
904
|
+
// Fall back to regex parsing if KG load fails
|
|
905
|
+
loadedKg = null
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
// Strategy 1: Use KG SPARQL if loaded successfully
|
|
910
|
+
if (loadedKg && typeof loadedKg.querySelect === 'function') {
|
|
911
|
+
try {
|
|
912
|
+
// Extract classes (Objects in schema category)
|
|
913
|
+
const classQuery = `
|
|
914
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
915
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
916
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
917
|
+
SELECT DISTINCT ?class ?super ?label ?comment WHERE {
|
|
918
|
+
{ ?class a rdfs:Class }
|
|
919
|
+
UNION { ?class a owl:Class }
|
|
920
|
+
OPTIONAL { ?class rdfs:subClassOf ?super }
|
|
921
|
+
OPTIONAL { ?class rdfs:label ?label }
|
|
922
|
+
OPTIONAL { ?class rdfs:comment ?comment }
|
|
923
|
+
} LIMIT ${CONFIG.schema.maxClasses}
|
|
924
|
+
`
|
|
925
|
+
const classResults = loadedKg.querySelect(classQuery)
|
|
926
|
+
for (const r of classResults) {
|
|
927
|
+
const cls = r.bindings?.class || r.class
|
|
928
|
+
const sup = r.bindings?.super || r.super
|
|
929
|
+
const label = r.bindings?.label || r.label
|
|
930
|
+
const comment = r.bindings?.comment || r.comment
|
|
931
|
+
if (cls) {
|
|
932
|
+
const existing = ctx.classes.get(cls)
|
|
933
|
+
ctx.classes.set(cls, {
|
|
934
|
+
uri: cls,
|
|
935
|
+
label: label || existing?.label,
|
|
936
|
+
comment: comment || existing?.comment,
|
|
937
|
+
superclasses: sup ? [...(existing?.superclasses || []), sup] : (existing?.superclasses || []),
|
|
938
|
+
constraints: existing?.constraints || [],
|
|
939
|
+
source
|
|
940
|
+
})
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
// Extract properties (Morphisms with domain/range)
|
|
945
|
+
const propQuery = `
|
|
946
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
947
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
948
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
949
|
+
SELECT DISTINCT ?prop ?domain ?range ?label ?functional WHERE {
|
|
950
|
+
{ ?prop a rdf:Property }
|
|
951
|
+
UNION { ?prop a owl:ObjectProperty }
|
|
952
|
+
UNION { ?prop a owl:DatatypeProperty }
|
|
953
|
+
OPTIONAL { ?prop rdfs:domain ?domain }
|
|
954
|
+
OPTIONAL { ?prop rdfs:range ?range }
|
|
955
|
+
OPTIONAL { ?prop rdfs:label ?label }
|
|
956
|
+
OPTIONAL { ?prop a owl:FunctionalProperty . BIND(true AS ?functional) }
|
|
957
|
+
} LIMIT ${CONFIG.schema.maxProperties}
|
|
958
|
+
`
|
|
959
|
+
const propResults = loadedKg.querySelect(propQuery)
|
|
960
|
+
for (const r of propResults) {
|
|
961
|
+
const prop = r.bindings?.prop || r.prop
|
|
962
|
+
const domain = r.bindings?.domain || r.domain
|
|
963
|
+
const range = r.bindings?.range || r.range
|
|
964
|
+
const label = r.bindings?.label || r.label
|
|
965
|
+
const functional = r.bindings?.functional || r.functional
|
|
966
|
+
if (prop) {
|
|
967
|
+
ctx.properties.set(prop, {
|
|
968
|
+
uri: prop,
|
|
969
|
+
domain: domain || null,
|
|
970
|
+
range: range || null,
|
|
971
|
+
label: label || null,
|
|
972
|
+
functional: !!functional,
|
|
973
|
+
source
|
|
974
|
+
})
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
// Extract inverse properties (category theory: adjoint functors)
|
|
979
|
+
const inverseQuery = `
|
|
980
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
981
|
+
SELECT ?prop ?inverse WHERE {
|
|
982
|
+
?prop owl:inverseOf ?inverse
|
|
983
|
+
}
|
|
984
|
+
`
|
|
985
|
+
try {
|
|
986
|
+
const inverseResults = loadedKg.querySelect(inverseQuery)
|
|
987
|
+
for (const r of inverseResults) {
|
|
988
|
+
const prop = r.bindings?.prop || r.prop
|
|
989
|
+
const inverse = r.bindings?.inverse || r.inverse
|
|
990
|
+
if (prop && inverse && ctx.properties.has(prop)) {
|
|
991
|
+
ctx.properties.get(prop).inverse = inverse
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
} catch (e) {
|
|
995
|
+
// Inverse query not supported - continue
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
} catch (e) {
|
|
999
|
+
// SPARQL extraction failed - fall back to regex
|
|
1000
|
+
loadedKg = null
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
// Strategy 2: Regex parsing (fallback for when no KG available)
|
|
1005
|
+
if (!loadedKg || ctx.classes.size === 0) {
|
|
1006
|
+
// Parse classes: @prefix lines, rdfs:Class, owl:Class declarations
|
|
1007
|
+
const classPatterns = [
|
|
1008
|
+
/<([^>]+)>\s+a\s+(rdfs:Class|owl:Class)/gi,
|
|
1009
|
+
/<([^>]+)>\s+rdf:type\s+(rdfs:Class|owl:Class)/gi,
|
|
1010
|
+
/:(\w+)\s+a\s+(rdfs:Class|owl:Class)/gi
|
|
1011
|
+
]
|
|
1012
|
+
for (const pattern of classPatterns) {
|
|
1013
|
+
let match
|
|
1014
|
+
while ((match = pattern.exec(ontologyTtl)) !== null) {
|
|
1015
|
+
const uri = match[1].includes(':') ? match[1] : namespace + match[1]
|
|
1016
|
+
if (!ctx.classes.has(uri)) {
|
|
1017
|
+
ctx.classes.set(uri, { uri, superclasses: [], constraints: [], source })
|
|
1018
|
+
}
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
// Parse properties: rdf:Property, owl:ObjectProperty, owl:DatatypeProperty
|
|
1023
|
+
const propPatterns = [
|
|
1024
|
+
/<([^>]+)>\s+a\s+(rdf:Property|owl:ObjectProperty|owl:DatatypeProperty)/gi,
|
|
1025
|
+
/:(\w+)\s+a\s+(rdf:Property|owl:ObjectProperty|owl:DatatypeProperty)/gi
|
|
1026
|
+
]
|
|
1027
|
+
for (const pattern of propPatterns) {
|
|
1028
|
+
let match
|
|
1029
|
+
while ((match = pattern.exec(ontologyTtl)) !== null) {
|
|
1030
|
+
const uri = match[1].includes(':') ? match[1] : namespace + match[1]
|
|
1031
|
+
if (!ctx.properties.has(uri)) {
|
|
1032
|
+
ctx.properties.set(uri, { uri, domain: null, range: null, functional: false, source })
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
// Parse domain/range from TTL
|
|
1038
|
+
const domainPattern = /<([^>]+)>\s+rdfs:domain\s+<([^>]+)>/gi
|
|
1039
|
+
let domainMatch
|
|
1040
|
+
while ((domainMatch = domainPattern.exec(ontologyTtl)) !== null) {
|
|
1041
|
+
const prop = domainMatch[1]
|
|
1042
|
+
const domain = domainMatch[2]
|
|
1043
|
+
if (ctx.properties.has(prop)) {
|
|
1044
|
+
ctx.properties.get(prop).domain = domain
|
|
1045
|
+
}
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
const rangePattern = /<([^>]+)>\s+rdfs:range\s+<([^>]+)>/gi
|
|
1049
|
+
let rangeMatch
|
|
1050
|
+
while ((rangeMatch = rangePattern.exec(ontologyTtl)) !== null) {
|
|
1051
|
+
const prop = rangeMatch[1]
|
|
1052
|
+
const range = rangeMatch[2]
|
|
1053
|
+
if (ctx.properties.has(prop)) {
|
|
1054
|
+
ctx.properties.get(prop).range = range
|
|
1055
|
+
}
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
ctx._computeSignature()
|
|
1060
|
+
return ctx
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
/**
|
|
1064
|
+
* Create a merged context from multiple sources (KG + Ontology)
|
|
1065
|
+
*
|
|
1066
|
+
* For enterprise scenarios where:
|
|
1067
|
+
* 1. Core ontology is maintained by ontology team
|
|
1068
|
+
* 2. Extensions/instances are discovered from KG
|
|
1069
|
+
*
|
|
1070
|
+
* @param {SchemaContext[]} contexts - Array of contexts to merge
|
|
1071
|
+
* @returns {SchemaContext} Merged context
|
|
1072
|
+
*/
|
|
1073
|
+
static merge(...contexts) {
|
|
1074
|
+
const merged = new SchemaContext()
|
|
1075
|
+
|
|
1076
|
+
for (const ctx of contexts) {
|
|
1077
|
+
if (!ctx) continue
|
|
1078
|
+
|
|
1079
|
+
// Merge classes (later contexts override earlier)
|
|
1080
|
+
for (const [uri, cls] of ctx.classes) {
|
|
1081
|
+
const existing = merged.classes.get(uri)
|
|
1082
|
+
merged.classes.set(uri, {
|
|
1083
|
+
...cls,
|
|
1084
|
+
superclasses: [...new Set([...(existing?.superclasses || []), ...cls.superclasses])],
|
|
1085
|
+
source: existing ? `${existing.source}+${cls.source}` : cls.source
|
|
1086
|
+
})
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
// Merge properties
|
|
1090
|
+
for (const [uri, prop] of ctx.properties) {
|
|
1091
|
+
const existing = merged.properties.get(uri)
|
|
1092
|
+
merged.properties.set(uri, {
|
|
1093
|
+
...prop,
|
|
1094
|
+
domain: prop.domain || existing?.domain,
|
|
1095
|
+
range: prop.range || existing?.range,
|
|
1096
|
+
source: existing ? `${existing.source}+${prop.source}` : prop.source
|
|
1097
|
+
})
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
// Merge bindings
|
|
1101
|
+
for (const [varName, type] of ctx.bindings) {
|
|
1102
|
+
merged.bindings.set(varName, type)
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
// Merge path equations
|
|
1106
|
+
merged.pathEquations.push(...ctx.pathEquations)
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
merged._computeSignature()
|
|
1110
|
+
return merged
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
/**
|
|
1114
|
+
* Convert to simple schema format (backward compatibility)
|
|
1115
|
+
*/
|
|
1116
|
+
toSimpleSchema() {
|
|
1117
|
+
return {
|
|
1118
|
+
predicates: Array.from(this.properties.keys()),
|
|
1119
|
+
classes: Array.from(this.classes.keys()),
|
|
1120
|
+
examples: [], // Derived on demand
|
|
1121
|
+
timestamp: new Date().toISOString()
|
|
1122
|
+
}
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
/**
|
|
1126
|
+
* Compute deterministic signature hash for the schema
|
|
1127
|
+
* Same schema → same hash (ensures idempotent query generation)
|
|
1128
|
+
*/
|
|
1129
|
+
_computeSignature() {
|
|
1130
|
+
const classKeys = Array.from(this.classes.keys()).sort()
|
|
1131
|
+
const propKeys = Array.from(this.properties.keys()).sort()
|
|
1132
|
+
const signature = JSON.stringify({ classes: classKeys, properties: propKeys })
|
|
1133
|
+
|
|
1134
|
+
// Simple hash function
|
|
1135
|
+
let hash = 0
|
|
1136
|
+
for (let i = 0; i < signature.length; i++) {
|
|
1137
|
+
const char = signature.charCodeAt(i)
|
|
1138
|
+
hash = ((hash << 5) - hash) + char
|
|
1139
|
+
hash = hash & hash
|
|
1140
|
+
}
|
|
1141
|
+
this._signatureHash = 'sig_' + Math.abs(hash).toString(16)
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
/**
|
|
1145
|
+
* Introduce variable binding: Γ, ?x : T
|
|
1146
|
+
*/
|
|
1147
|
+
bindVariable(varName, type) {
|
|
1148
|
+
const normalized = varName.startsWith('?') ? varName : '?' + varName
|
|
1149
|
+
this.bindings.set(normalized, type)
|
|
1150
|
+
return this
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
/**
|
|
1154
|
+
* Type lookup: Γ ⊢ ?x : τ
|
|
1155
|
+
*/
|
|
1156
|
+
getType(varName) {
|
|
1157
|
+
const normalized = varName.startsWith('?') ? varName : '?' + varName
|
|
1158
|
+
return this.bindings.get(normalized) || 'Any'
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
/**
|
|
1162
|
+
* Check if property P has domain D: Γ contains (P : D → ?)
|
|
1163
|
+
*/
|
|
1164
|
+
getDomain(propertyUri) {
|
|
1165
|
+
const prop = this.properties.get(propertyUri)
|
|
1166
|
+
return prop?.domain || null
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
/**
|
|
1170
|
+
* Check if property P has range R: Γ contains (P : ? → R)
|
|
1171
|
+
*/
|
|
1172
|
+
getRange(propertyUri) {
|
|
1173
|
+
const prop = this.properties.get(propertyUri)
|
|
1174
|
+
return prop?.range || null
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
/**
|
|
1178
|
+
* Get all properties with given domain
|
|
1179
|
+
*/
|
|
1180
|
+
getPropertiesForClass(classUri) {
|
|
1181
|
+
const result = []
|
|
1182
|
+
for (const [uri, prop] of this.properties) {
|
|
1183
|
+
if (prop.domain === classUri || prop.domain === null) {
|
|
1184
|
+
result.push(uri)
|
|
1185
|
+
}
|
|
1186
|
+
}
|
|
1187
|
+
return result
|
|
1188
|
+
}
|
|
1189
|
+
|
|
1190
|
+
/**
|
|
1191
|
+
* Serialize context for hashing (determinism)
|
|
1192
|
+
*/
|
|
1193
|
+
toCanonical() {
|
|
1194
|
+
return {
|
|
1195
|
+
signature: this._signatureHash,
|
|
1196
|
+
classCount: this.classes.size,
|
|
1197
|
+
propertyCount: this.properties.size,
|
|
1198
|
+
bindings: Object.fromEntries(this.bindings)
|
|
1199
|
+
}
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
/**
|
|
1204
|
+
* TypeJudgment - Formal type judgment Γ ⊢ e : τ
|
|
1205
|
+
*
|
|
1206
|
+
* Based on Hindley-Milner type inference with extensions for:
|
|
1207
|
+
* - Dependent types (property domain/range)
|
|
1208
|
+
* - Refinement types (business constraints)
|
|
1209
|
+
*/
|
|
1210
|
+
class TypeJudgment {
|
|
1211
|
+
constructor(context, expression, type, rule) {
|
|
1212
|
+
this.context = context // Γ (SchemaContext)
|
|
1213
|
+
this.expression = expression // e (SPARQL triple pattern or expression)
|
|
1214
|
+
this.type = type // τ (the derived type)
|
|
1215
|
+
this.rule = rule // derivation rule name
|
|
1216
|
+
this.premises = [] // sub-judgments (for proof tree)
|
|
1217
|
+
this.timestamp = Date.now()
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
/**
|
|
1221
|
+
* Add premise (sub-proof)
|
|
1222
|
+
*/
|
|
1223
|
+
addPremise(judgment) {
|
|
1224
|
+
this.premises.push(judgment)
|
|
1225
|
+
return this
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
/**
|
|
1229
|
+
* Check if judgment is valid (all premises valid)
|
|
1230
|
+
*/
|
|
1231
|
+
isValid() {
|
|
1232
|
+
if (this.premises.length === 0) return true
|
|
1233
|
+
return this.premises.every(p => p.isValid())
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
/**
|
|
1237
|
+
* Convert to proof tree string
|
|
1238
|
+
*/
|
|
1239
|
+
toProofTree(indent = 0) {
|
|
1240
|
+
const pad = ' '.repeat(indent)
|
|
1241
|
+
let result = `${pad}${this.rule}: ${this.expression} : ${this.type}\n`
|
|
1242
|
+
for (const premise of this.premises) {
|
|
1243
|
+
result += premise.toProofTree(indent + 1)
|
|
1244
|
+
}
|
|
1245
|
+
return result
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
/**
|
|
1249
|
+
* Compute deterministic hash of judgment
|
|
1250
|
+
*/
|
|
1251
|
+
hash() {
|
|
1252
|
+
const content = JSON.stringify({
|
|
1253
|
+
ctx: this.context.toCanonical(),
|
|
1254
|
+
expr: this.expression,
|
|
1255
|
+
type: this.type,
|
|
1256
|
+
rule: this.rule
|
|
1257
|
+
})
|
|
1258
|
+
let hash = 0
|
|
1259
|
+
for (let i = 0; i < content.length; i++) {
|
|
1260
|
+
hash = ((hash << 5) - hash) + content.charCodeAt(i)
|
|
1261
|
+
hash = hash & hash
|
|
1262
|
+
}
|
|
1263
|
+
return 'judge_' + Math.abs(hash).toString(16)
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
/**
|
|
1268
|
+
* QueryValidator - Validates SPARQL queries using type-theoretic derivation rules
|
|
1269
|
+
*
|
|
1270
|
+
* Derivation Rules (based on categorical semantics):
|
|
1271
|
+
*
|
|
1272
|
+
* 1. VAR-INTRO (Variable Introduction):
|
|
1273
|
+
* ────────────────
|
|
1274
|
+
* Γ ⊢ ?x : Fresh
|
|
1275
|
+
*
|
|
1276
|
+
* 2. TYPE-INTRO (Type Introduction via rdf:type):
|
|
1277
|
+
* Γ ⊢ ?x rdf:type C : Valid
|
|
1278
|
+
* ─────────────────────────
|
|
1279
|
+
* Γ, ?x : C ⊢ ... : Valid
|
|
1280
|
+
*
|
|
1281
|
+
* 3. PROP-CHECK (Property Domain/Range Check):
|
|
1282
|
+
* Γ ⊢ P : D → R Γ ⊢ ?s : D Γ ⊢ ?o : R
|
|
1283
|
+
* ─────────────────────────────────────────
|
|
1284
|
+
* Γ ⊢ (?s P ?o) : Valid
|
|
1285
|
+
*
|
|
1286
|
+
* 4. COMPOSE (Morphism Composition - Category Theory):
|
|
1287
|
+
* Γ ⊢ P₁ : A → B Γ ⊢ P₂ : B → C
|
|
1288
|
+
* ─────────────────────────────────
|
|
1289
|
+
* Γ ⊢ P₁ ; P₂ : A → C
|
|
1290
|
+
*/
|
|
1291
|
+
class QueryValidator {
|
|
1292
|
+
constructor(context) {
|
|
1293
|
+
this.context = context
|
|
1294
|
+
this.derivations = []
|
|
1295
|
+
this.errors = []
|
|
1296
|
+
this.warnings = []
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
/**
|
|
1300
|
+
* Validate a SPARQL triple pattern
|
|
1301
|
+
* Returns TypeJudgment with proof tree
|
|
1302
|
+
*/
|
|
1303
|
+
validateTriplePattern(subject, predicate, object) {
|
|
1304
|
+
// Rule: VAR-INTRO for subject
|
|
1305
|
+
const subjectType = this._inferType(subject)
|
|
1306
|
+
const subjectJudgment = new TypeJudgment(
|
|
1307
|
+
this.context, subject, subjectType, 'VAR-INTRO'
|
|
1308
|
+
)
|
|
1309
|
+
|
|
1310
|
+
// Rule: PROP-CHECK for predicate
|
|
1311
|
+
const domain = this.context.getDomain(predicate)
|
|
1312
|
+
const range = this.context.getRange(predicate)
|
|
1313
|
+
|
|
1314
|
+
// If predicate not in schema, warn but allow
|
|
1315
|
+
if (!this.context.properties.has(predicate)) {
|
|
1316
|
+
this.warnings.push({
|
|
1317
|
+
code: 'UNKNOWN_PREDICATE',
|
|
1318
|
+
message: `Predicate not in schema: ${predicate}`,
|
|
1319
|
+
suggestion: this._suggestPredicate(predicate)
|
|
1320
|
+
})
|
|
1321
|
+
}
|
|
1322
|
+
|
|
1323
|
+
// Rule: TYPE-INTRO if predicate is rdf:type
|
|
1324
|
+
if (predicate === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' ||
|
|
1325
|
+
predicate === 'rdf:type' || predicate === 'a') {
|
|
1326
|
+
this.context.bindVariable(subject, object)
|
|
1327
|
+
return new TypeJudgment(
|
|
1328
|
+
this.context,
|
|
1329
|
+
`${subject} rdf:type ${object}`,
|
|
1330
|
+
'Valid',
|
|
1331
|
+
'TYPE-INTRO'
|
|
1332
|
+
).addPremise(subjectJudgment)
|
|
1333
|
+
}
|
|
1334
|
+
|
|
1335
|
+
// Rule: PROP-CHECK with domain/range validation
|
|
1336
|
+
const objectType = this._inferType(object)
|
|
1337
|
+
const objectJudgment = new TypeJudgment(
|
|
1338
|
+
this.context, object, objectType, 'VAR-INTRO'
|
|
1339
|
+
)
|
|
1340
|
+
|
|
1341
|
+
// Check domain compatibility
|
|
1342
|
+
if (domain && subjectType !== 'Any' && subjectType !== domain) {
|
|
1343
|
+
this.errors.push({
|
|
1344
|
+
code: 'DOMAIN_MISMATCH',
|
|
1345
|
+
message: `Subject type ${subjectType} incompatible with property domain ${domain}`,
|
|
1346
|
+
expression: `${subject} ${predicate} ${object}`
|
|
1347
|
+
})
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
// Check range compatibility
|
|
1351
|
+
if (range && objectType !== 'Any' && objectType !== range) {
|
|
1352
|
+
this.errors.push({
|
|
1353
|
+
code: 'RANGE_MISMATCH',
|
|
1354
|
+
message: `Object type ${objectType} incompatible with property range ${range}`,
|
|
1355
|
+
expression: `${subject} ${predicate} ${object}`
|
|
1356
|
+
})
|
|
1357
|
+
}
|
|
1358
|
+
|
|
1359
|
+
const judgment = new TypeJudgment(
|
|
1360
|
+
this.context,
|
|
1361
|
+
`${subject} ${predicate} ${object}`,
|
|
1362
|
+
this.errors.length === 0 ? 'Valid' : 'Invalid',
|
|
1363
|
+
'PROP-CHECK'
|
|
1364
|
+
).addPremise(subjectJudgment).addPremise(objectJudgment)
|
|
1365
|
+
|
|
1366
|
+
this.derivations.push(judgment)
|
|
1367
|
+
return judgment
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
/**
|
|
1371
|
+
* Validate morphism composition (property path)
|
|
1372
|
+
* Implements COMPOSE rule from category theory
|
|
1373
|
+
*/
|
|
1374
|
+
validateComposition(property1, property2) {
|
|
1375
|
+
const range1 = this.context.getRange(property1)
|
|
1376
|
+
const domain2 = this.context.getDomain(property2)
|
|
1377
|
+
|
|
1378
|
+
// Check composition validity: range of P1 must match domain of P2
|
|
1379
|
+
if (range1 && domain2 && range1 !== domain2) {
|
|
1380
|
+
this.errors.push({
|
|
1381
|
+
code: 'COMPOSITION_INVALID',
|
|
1382
|
+
message: `Cannot compose ${property1} (range: ${range1}) with ${property2} (domain: ${domain2})`,
|
|
1383
|
+
rule: 'COMPOSE'
|
|
1384
|
+
})
|
|
1385
|
+
return new TypeJudgment(
|
|
1386
|
+
this.context,
|
|
1387
|
+
`${property1} ; ${property2}`,
|
|
1388
|
+
'Invalid',
|
|
1389
|
+
'COMPOSE'
|
|
1390
|
+
)
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
const domain1 = this.context.getDomain(property1)
|
|
1394
|
+
const range2 = this.context.getRange(property2)
|
|
1395
|
+
|
|
1396
|
+
return new TypeJudgment(
|
|
1397
|
+
this.context,
|
|
1398
|
+
`${property1} ; ${property2}`,
|
|
1399
|
+
`${domain1 || 'Any'} → ${range2 || 'Any'}`,
|
|
1400
|
+
'COMPOSE'
|
|
1401
|
+
)
|
|
1402
|
+
}
|
|
1403
|
+
|
|
1404
|
+
/**
|
|
1405
|
+
* Infer type of expression
|
|
1406
|
+
*/
|
|
1407
|
+
_inferType(expr) {
|
|
1408
|
+
if (typeof expr !== 'string') return 'Any'
|
|
1409
|
+
|
|
1410
|
+
// Variable: check context
|
|
1411
|
+
if (expr.startsWith('?')) {
|
|
1412
|
+
return this.context.getType(expr)
|
|
1413
|
+
}
|
|
1414
|
+
|
|
1415
|
+
// Literal
|
|
1416
|
+
if (expr.startsWith('"') || expr.startsWith("'")) {
|
|
1417
|
+
if (expr.includes('^^')) {
|
|
1418
|
+
const datatypeMatch = expr.match(/\^\^<?([^>]+)>?$/)
|
|
1419
|
+
if (datatypeMatch) return datatypeMatch[1]
|
|
1420
|
+
}
|
|
1421
|
+
return 'xsd:string'
|
|
1422
|
+
}
|
|
1423
|
+
|
|
1424
|
+
// IRI - check if it's a class
|
|
1425
|
+
if (this.context.classes.has(expr)) {
|
|
1426
|
+
return 'Class'
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
return 'IRI'
|
|
1430
|
+
}
|
|
1431
|
+
|
|
1432
|
+
/**
|
|
1433
|
+
* Suggest similar predicate from schema (fuzzy matching)
|
|
1434
|
+
*/
|
|
1435
|
+
_suggestPredicate(predicate) {
|
|
1436
|
+
const predicates = Array.from(this.context.properties.keys())
|
|
1437
|
+
const localName = predicate.split(/[#/]/).pop().toLowerCase()
|
|
1438
|
+
|
|
1439
|
+
let bestMatch = null
|
|
1440
|
+
let bestScore = 0
|
|
1441
|
+
|
|
1442
|
+
for (const p of predicates) {
|
|
1443
|
+
const pLocal = p.split(/[#/]/).pop().toLowerCase()
|
|
1444
|
+
const score = this._similarityScore(localName, pLocal)
|
|
1445
|
+
if (score > bestScore && score > 0.5) {
|
|
1446
|
+
bestScore = score
|
|
1447
|
+
bestMatch = p
|
|
1448
|
+
}
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
return bestMatch
|
|
1452
|
+
}
|
|
1453
|
+
|
|
1454
|
+
/**
|
|
1455
|
+
* Simple string similarity (Jaccard on character bigrams)
|
|
1456
|
+
*/
|
|
1457
|
+
_similarityScore(a, b) {
|
|
1458
|
+
if (a === b) return 1.0
|
|
1459
|
+
const bigramsA = new Set()
|
|
1460
|
+
const bigramsB = new Set()
|
|
1461
|
+
for (let i = 0; i < a.length - 1; i++) bigramsA.add(a.slice(i, i + 2))
|
|
1462
|
+
for (let i = 0; i < b.length - 1; i++) bigramsB.add(b.slice(i, i + 2))
|
|
1463
|
+
const intersection = new Set([...bigramsA].filter(x => bigramsB.has(x)))
|
|
1464
|
+
const union = new Set([...bigramsA, ...bigramsB])
|
|
1465
|
+
return union.size > 0 ? intersection.size / union.size : 0
|
|
1466
|
+
}
|
|
1467
|
+
|
|
1468
|
+
/**
|
|
1469
|
+
* Get validation result
|
|
1470
|
+
*/
|
|
1471
|
+
getResult() {
|
|
1472
|
+
return {
|
|
1473
|
+
valid: this.errors.length === 0,
|
|
1474
|
+
errors: this.errors,
|
|
1475
|
+
warnings: this.warnings,
|
|
1476
|
+
derivations: this.derivations.map(d => ({
|
|
1477
|
+
expression: d.expression,
|
|
1478
|
+
type: d.type,
|
|
1479
|
+
rule: d.rule,
|
|
1480
|
+
hash: d.hash()
|
|
1481
|
+
})),
|
|
1482
|
+
proofTree: this.derivations.map(d => d.toProofTree()).join('\n')
|
|
1483
|
+
}
|
|
1484
|
+
}
|
|
1485
|
+
}
|
|
1486
|
+
|
|
1487
|
+
/**
|
|
1488
|
+
* ProofDAG - Directed Acyclic Graph of reasoning steps (Curry-Howard)
|
|
1489
|
+
*
|
|
1490
|
+
* Every answer produced by the agent has a proof showing:
|
|
1491
|
+
* 1. What SPARQL queries were executed
|
|
1492
|
+
* 2. What rules were applied
|
|
1493
|
+
* 3. What intermediate results were derived
|
|
1494
|
+
* 4. Full chain from question to answer
|
|
1495
|
+
*
|
|
1496
|
+
* Based on Curry-Howard correspondence:
|
|
1497
|
+
* - Types ↔ Propositions
|
|
1498
|
+
* - Programs ↔ Proofs
|
|
1499
|
+
* - Tool executions ↔ Inference steps
|
|
1500
|
+
*/
|
|
1501
|
+
class ProofDAG {
|
|
1502
|
+
constructor(rootClaim) {
|
|
1503
|
+
this.rootClaim = rootClaim // The final answer/claim
|
|
1504
|
+
this.nodes = new Map() // nodeId → { claim, evidence, rule, children }
|
|
1505
|
+
this.edges = [] // { from, to, relation }
|
|
1506
|
+
this._nodeCounter = 0
|
|
1507
|
+
|
|
1508
|
+
// Create root node
|
|
1509
|
+
this.rootId = this._addNode(rootClaim, null, 'ROOT')
|
|
1510
|
+
}
|
|
1511
|
+
|
|
1512
|
+
/**
|
|
1513
|
+
* Add node to proof DAG
|
|
1514
|
+
*/
|
|
1515
|
+
_addNode(claim, evidence, rule) {
|
|
1516
|
+
const nodeId = `node_${++this._nodeCounter}`
|
|
1517
|
+
this.nodes.set(nodeId, {
|
|
1518
|
+
id: nodeId,
|
|
1519
|
+
claim,
|
|
1520
|
+
evidence,
|
|
1521
|
+
rule,
|
|
1522
|
+
children: [],
|
|
1523
|
+
timestamp: Date.now()
|
|
1524
|
+
})
|
|
1525
|
+
return nodeId
|
|
1526
|
+
}
|
|
1527
|
+
|
|
1528
|
+
/**
|
|
1529
|
+
* Add evidence (sub-proof) supporting a claim
|
|
1530
|
+
*/
|
|
1531
|
+
addEvidence(parentId, claim, evidence, rule) {
|
|
1532
|
+
const nodeId = this._addNode(claim, evidence, rule)
|
|
1533
|
+
const parent = this.nodes.get(parentId)
|
|
1534
|
+
if (parent) {
|
|
1535
|
+
parent.children.push(nodeId)
|
|
1536
|
+
this.edges.push({ from: parentId, to: nodeId, relation: 'supports' })
|
|
1537
|
+
}
|
|
1538
|
+
return nodeId
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1541
|
+
/**
|
|
1542
|
+
* Add SPARQL query execution as evidence
|
|
1543
|
+
*/
|
|
1544
|
+
addSparqlEvidence(parentId, sparql, bindings) {
|
|
1545
|
+
return this.addEvidence(
|
|
1546
|
+
parentId,
|
|
1547
|
+
`Query returned ${bindings.length} results`,
|
|
1548
|
+
{ type: 'sparql', query: sparql, resultCount: bindings.length },
|
|
1549
|
+
'SPARQL_EXEC'
|
|
1550
|
+
)
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
/**
|
|
1554
|
+
* Add Datalog inference as evidence
|
|
1555
|
+
*/
|
|
1556
|
+
addDatalogEvidence(parentId, rules, inferredFacts) {
|
|
1557
|
+
return this.addEvidence(
|
|
1558
|
+
parentId,
|
|
1559
|
+
`Inferred ${inferredFacts.length} facts from ${rules.length} rules`,
|
|
1560
|
+
{ type: 'datalog', rules, factCount: inferredFacts.length },
|
|
1561
|
+
'DATALOG_INFER'
|
|
1562
|
+
)
|
|
1563
|
+
}
|
|
1564
|
+
|
|
1565
|
+
/**
|
|
1566
|
+
* Add embedding similarity as evidence
|
|
1567
|
+
*/
|
|
1568
|
+
addEmbeddingEvidence(parentId, entity, similar, threshold) {
|
|
1569
|
+
return this.addEvidence(
|
|
1570
|
+
parentId,
|
|
1571
|
+
`Found ${similar.length} entities similar to ${entity}`,
|
|
1572
|
+
{ type: 'embedding', entity, similarCount: similar.length, threshold },
|
|
1573
|
+
'EMBEDDING_SEARCH'
|
|
1574
|
+
)
|
|
1575
|
+
}
|
|
1576
|
+
|
|
1577
|
+
/**
|
|
1578
|
+
* Add memory retrieval as evidence
|
|
1579
|
+
*/
|
|
1580
|
+
addMemoryEvidence(parentId, episodes) {
|
|
1581
|
+
return this.addEvidence(
|
|
1582
|
+
parentId,
|
|
1583
|
+
`Retrieved ${episodes.length} relevant episodes from memory`,
|
|
1584
|
+
{ type: 'memory', episodeCount: episodes.length },
|
|
1585
|
+
'MEMORY_RETRIEVAL'
|
|
1586
|
+
)
|
|
1587
|
+
}
|
|
32
1588
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
1589
|
+
/**
|
|
1590
|
+
* Compute deterministic hash of entire proof
|
|
1591
|
+
*/
|
|
1592
|
+
computeHash() {
|
|
1593
|
+
const content = JSON.stringify({
|
|
1594
|
+
root: this.rootClaim,
|
|
1595
|
+
nodes: Array.from(this.nodes.values()).map(n => ({
|
|
1596
|
+
claim: n.claim,
|
|
1597
|
+
rule: n.rule,
|
|
1598
|
+
children: n.children
|
|
1599
|
+
}))
|
|
1600
|
+
})
|
|
38
1601
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
1602
|
+
let hash = 0
|
|
1603
|
+
for (let i = 0; i < content.length; i++) {
|
|
1604
|
+
hash = ((hash << 5) - hash) + content.charCodeAt(i)
|
|
1605
|
+
hash = hash & hash
|
|
1606
|
+
}
|
|
1607
|
+
return 'proof_' + Math.abs(hash).toString(16)
|
|
1608
|
+
}
|
|
44
1609
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
1610
|
+
/**
|
|
1611
|
+
* Verify proof integrity (all nodes have valid parents except root)
|
|
1612
|
+
*/
|
|
1613
|
+
verify() {
|
|
1614
|
+
const visited = new Set()
|
|
1615
|
+
const queue = [this.rootId]
|
|
1616
|
+
|
|
1617
|
+
while (queue.length > 0) {
|
|
1618
|
+
const nodeId = queue.shift()
|
|
1619
|
+
if (visited.has(nodeId)) {
|
|
1620
|
+
return { valid: false, error: `Cycle detected at ${nodeId}` }
|
|
1621
|
+
}
|
|
1622
|
+
visited.add(nodeId)
|
|
52
1623
|
|
|
53
|
-
|
|
54
|
-
|
|
1624
|
+
const node = this.nodes.get(nodeId)
|
|
1625
|
+
if (!node) {
|
|
1626
|
+
return { valid: false, error: `Missing node ${nodeId}` }
|
|
1627
|
+
}
|
|
55
1628
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
1629
|
+
queue.push(...node.children)
|
|
1630
|
+
}
|
|
1631
|
+
|
|
1632
|
+
return { valid: true, nodeCount: visited.size }
|
|
1633
|
+
}
|
|
1634
|
+
|
|
1635
|
+
/**
|
|
1636
|
+
* Serialize proof for storage/transmission
|
|
1637
|
+
*/
|
|
1638
|
+
serialize() {
|
|
1639
|
+
return {
|
|
1640
|
+
rootClaim: this.rootClaim,
|
|
1641
|
+
rootId: this.rootId,
|
|
1642
|
+
proofHash: this.computeHash(),
|
|
1643
|
+
nodes: Object.fromEntries(this.nodes),
|
|
1644
|
+
edges: this.edges,
|
|
1645
|
+
verification: this.verify()
|
|
1646
|
+
}
|
|
1647
|
+
}
|
|
1648
|
+
|
|
1649
|
+
/**
|
|
1650
|
+
* Human-readable proof trace
|
|
1651
|
+
*/
|
|
1652
|
+
toExplanation(nodeId = this.rootId, indent = 0) {
|
|
1653
|
+
const node = this.nodes.get(nodeId)
|
|
1654
|
+
if (!node) return ''
|
|
1655
|
+
|
|
1656
|
+
const pad = ' '.repeat(indent)
|
|
1657
|
+
let result = `${pad}[${node.rule}] ${node.claim}\n`
|
|
1658
|
+
|
|
1659
|
+
if (node.evidence) {
|
|
1660
|
+
if (node.evidence.type === 'sparql') {
|
|
1661
|
+
result += `${pad} Query: ${node.evidence.query.slice(0, 100)}...\n`
|
|
1662
|
+
} else if (node.evidence.type === 'datalog') {
|
|
1663
|
+
result += `${pad} Applied ${node.evidence.rules.length} rules\n`
|
|
1664
|
+
} else if (node.evidence.type === 'embedding') {
|
|
1665
|
+
result += `${pad} Similarity search for: ${node.evidence.entity}\n`
|
|
1666
|
+
} else if (node.evidence.type === 'memory') {
|
|
1667
|
+
result += `${pad} From ${node.evidence.episodeCount} past episodes\n`
|
|
1668
|
+
}
|
|
1669
|
+
}
|
|
1670
|
+
|
|
1671
|
+
for (const childId of node.children) {
|
|
1672
|
+
result += this.toExplanation(childId, indent + 1)
|
|
1673
|
+
}
|
|
1674
|
+
|
|
1675
|
+
return result
|
|
62
1676
|
}
|
|
63
1677
|
}
|
|
64
1678
|
|
|
@@ -157,59 +1771,523 @@ const TOOL_REGISTRY = {
|
|
|
157
1771
|
// ============================================================================
|
|
158
1772
|
|
|
159
1773
|
/**
|
|
160
|
-
* LLMPlanner -
|
|
161
|
-
*
|
|
1774
|
+
* LLMPlanner - Schema-aware planner with Context Theory validation
|
|
1775
|
+
*
|
|
1776
|
+
* Architecture (based on David Spivak's Ologs + Functorial Data Migration):
|
|
1777
|
+
* 1. Schema Extraction: Build SchemaContext (Γ) from KG
|
|
1778
|
+
* 2. Type-theoretic Validation: Validate queries using derivation rules
|
|
1779
|
+
* 3. Deterministic Generation: Same schema + same intent = same query
|
|
1780
|
+
* 4. LLM for Summarization Only: Not for critical reasoning paths
|
|
1781
|
+
* 5. Proof DAG: Every answer has verifiable reasoning chain
|
|
1782
|
+
*
|
|
1783
|
+
* Mathematical Foundation:
|
|
1784
|
+
* - Schema S is a category: Objects = Classes, Morphisms = Properties
|
|
1785
|
+
* - Context Γ = (Classes, Properties, Domains, Ranges, Constraints)
|
|
1786
|
+
* - Type Judgment: Γ ⊢ e : τ ensures query validity
|
|
1787
|
+
* - Derivation Rules: VAR-INTRO, TYPE-INTRO, PROP-CHECK, COMPOSE
|
|
1788
|
+
*
|
|
1789
|
+
* Three modes:
|
|
1790
|
+
* - Demo Mode: Pattern matching with hardcoded templates (no LLM)
|
|
1791
|
+
* - Validated Mode: Schema context + type-theoretic validation
|
|
1792
|
+
* - Production Mode: LLM for intent + context-validated SPARQL
|
|
162
1793
|
*/
|
|
163
1794
|
class LLMPlanner {
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
1795
|
+
/**
|
|
1796
|
+
* @param {Object} config - Planner configuration
|
|
1797
|
+
* @param {Object} config.kg - Knowledge graph instance (required for schema)
|
|
1798
|
+
* @param {string} config.model - LLM model name (e.g., 'claude-sonnet-4', 'gpt-4o')
|
|
1799
|
+
* @param {string} config.apiKey - API key for LLM provider
|
|
1800
|
+
* @param {Object} config.tools - Tool registry (defaults to TOOL_REGISTRY)
|
|
1801
|
+
*/
|
|
1802
|
+
constructor(config = {}) {
|
|
1803
|
+
this.kg = config.kg || null
|
|
1804
|
+
this.model = config.model || null
|
|
1805
|
+
this.apiKey = config.apiKey || null
|
|
1806
|
+
this.tools = config.tools || TOOL_REGISTRY
|
|
1807
|
+
|
|
1808
|
+
// Bring Your Own Ontology (BYOO) support
|
|
1809
|
+
// For enterprise orgs with dedicated ontology teams
|
|
1810
|
+
this._ontologyTtl = config.ontology || null
|
|
1811
|
+
this._ontologyHash = this._ontologyTtl ? this._computeOntologyHash(config.ontology) : null
|
|
1812
|
+
|
|
1813
|
+
// Schema cache (simple schema for backward compat)
|
|
1814
|
+
this._schemaCache = null
|
|
1815
|
+
this._schemaCacheExpiry = 0
|
|
1816
|
+
|
|
1817
|
+
// Context Theory: Type-theoretic schema context (Γ)
|
|
1818
|
+
// NOTE: Uses global SCHEMA_CACHE for cross-agent sharing
|
|
1819
|
+
this._schemaContext = null
|
|
1820
|
+
this._contextCacheExpiry = 0
|
|
1821
|
+
|
|
1822
|
+
// KG identifier for cache key
|
|
1823
|
+
this._kgBaseUri = config.kgBaseUri || (config.kg?.baseUri) || 'default-kg'
|
|
1824
|
+
|
|
1825
|
+
// Intent patterns (deterministic - not LLM dependent)
|
|
1826
|
+
this.intentPatterns = {
|
|
1827
|
+
query: ['find', 'search', 'list', 'show', 'get', 'select'],
|
|
1828
|
+
infer: ['infer', 'deduce', 'derive', 'reason', 'conclude'],
|
|
1829
|
+
similar: ['similar', 'like', 'related', 'nearest', 'closest'],
|
|
1830
|
+
pattern: ['pattern', 'motif', 'circular', 'cycle', 'ring', 'fraud', 'suspicious'],
|
|
1831
|
+
rank: ['rank', 'important', 'pagerank', 'score', 'risk'],
|
|
1832
|
+
compliance: ['compliance', 'check', 'validate', 'verify'],
|
|
1833
|
+
aggregate: ['count', 'total', 'how many', 'sum', 'average']
|
|
1834
|
+
}
|
|
1835
|
+
|
|
1836
|
+
// Query template registry (deterministic - schema-based)
|
|
1837
|
+
this._queryTemplates = new Map()
|
|
1838
|
+
}
|
|
1839
|
+
|
|
1840
|
+
/**
|
|
1841
|
+
* Compute hash of ontology TTL for cache key
|
|
1842
|
+
*/
|
|
1843
|
+
_computeOntologyHash(ttl) {
|
|
1844
|
+
if (!ttl) return null
|
|
1845
|
+
let hash = 0
|
|
1846
|
+
for (let i = 0; i < Math.min(ttl.length, 1000); i++) {
|
|
1847
|
+
hash = ((hash << 5) - hash) + ttl.charCodeAt(i)
|
|
1848
|
+
hash = hash & hash
|
|
1849
|
+
}
|
|
1850
|
+
return 'onto_' + Math.abs(hash).toString(16)
|
|
1851
|
+
}
|
|
1852
|
+
|
|
1853
|
+
/**
|
|
1854
|
+
* Build type-theoretic schema context (Γ) from KG or imported ontology
|
|
1855
|
+
*
|
|
1856
|
+
* Uses global SCHEMA_CACHE for cross-agent sharing:
|
|
1857
|
+
* - Same KG/ontology → same cached schema
|
|
1858
|
+
* - Multiple agents share schema (efficiency)
|
|
1859
|
+
* - TTL-based expiry (freshness)
|
|
1860
|
+
*
|
|
1861
|
+
* Schema Sources (in priority order):
|
|
1862
|
+
* 1. Imported ontology (BYOO) - for enterprise ontology teams
|
|
1863
|
+
* 2. KG-derived schema - extract from instance data
|
|
1864
|
+
* 3. Merged (ontology + KG extensions) - hybrid approach
|
|
1865
|
+
*
|
|
1866
|
+
* @param {boolean} forceRefresh - Force schema refresh
|
|
1867
|
+
* @returns {Promise<SchemaContext>}
|
|
1868
|
+
*/
|
|
1869
|
+
async buildSchemaContext(forceRefresh = false) {
|
|
1870
|
+
// Try global cache first (cross-agent sharing)
|
|
1871
|
+
if (!forceRefresh) {
|
|
1872
|
+
const cached = SCHEMA_CACHE.get(this._kgBaseUri, this._ontologyHash)
|
|
1873
|
+
if (cached) {
|
|
1874
|
+
this._schemaContext = cached
|
|
1875
|
+
return cached
|
|
1876
|
+
}
|
|
1877
|
+
}
|
|
1878
|
+
|
|
1879
|
+
// Build schema from appropriate source
|
|
1880
|
+
let schemaContext
|
|
1881
|
+
|
|
1882
|
+
if (this._ontologyTtl) {
|
|
1883
|
+
// BYOO: Use imported ontology
|
|
1884
|
+
const ontologySchema = SchemaContext.fromOntology(this.kg, this._ontologyTtl, {
|
|
1885
|
+
source: 'ontology',
|
|
1886
|
+
graphUri: 'http://hypermind.ai/ontology/'
|
|
1887
|
+
})
|
|
1888
|
+
|
|
1889
|
+
// Optionally merge with KG-derived extensions
|
|
1890
|
+
if (this.kg) {
|
|
1891
|
+
const kgSchema = await SchemaContext.fromKG(this.kg, { useExplicitSchemaOnly: false })
|
|
1892
|
+
schemaContext = SchemaContext.merge(ontologySchema, kgSchema)
|
|
1893
|
+
} else {
|
|
1894
|
+
schemaContext = ontologySchema
|
|
1895
|
+
}
|
|
1896
|
+
} else if (this.kg) {
|
|
1897
|
+
// KG-derived schema only
|
|
1898
|
+
schemaContext = await SchemaContext.fromKG(this.kg)
|
|
1899
|
+
} else {
|
|
1900
|
+
// Empty schema
|
|
1901
|
+
schemaContext = new SchemaContext()
|
|
1902
|
+
}
|
|
1903
|
+
|
|
1904
|
+
// Store in global cache for cross-agent sharing
|
|
1905
|
+
SCHEMA_CACHE.set(this._kgBaseUri, schemaContext, this._ontologyHash)
|
|
1906
|
+
|
|
1907
|
+
// Also store local reference
|
|
1908
|
+
this._schemaContext = schemaContext
|
|
1909
|
+
this._contextCacheExpiry = Date.now() + CONFIG.schema.cacheExpiryMs
|
|
1910
|
+
|
|
1911
|
+
return schemaContext
|
|
1912
|
+
}
|
|
1913
|
+
|
|
1914
|
+
/**
|
|
1915
|
+
* Get schema cache statistics (for monitoring/debugging)
|
|
1916
|
+
*/
|
|
1917
|
+
getSchemaCacheStats() {
|
|
1918
|
+
return SCHEMA_CACHE.getStats()
|
|
1919
|
+
}
|
|
1920
|
+
|
|
1921
|
+
/**
|
|
1922
|
+
* Invalidate schema cache (call when schema changes)
|
|
1923
|
+
*/
|
|
1924
|
+
invalidateSchemaCache() {
|
|
1925
|
+
SCHEMA_CACHE.invalidate(this._kgBaseUri, this._ontologyHash)
|
|
1926
|
+
this._schemaContext = null
|
|
1927
|
+
this._contextCacheExpiry = 0
|
|
1928
|
+
}
|
|
1929
|
+
|
|
1930
|
+
/**
|
|
1931
|
+
* Validate SPARQL query using type-theoretic derivation rules
|
|
1932
|
+
* Returns validation result with proof tree
|
|
1933
|
+
*/
|
|
1934
|
+
validateQuery(sparql, schemaContext) {
|
|
1935
|
+
const validator = new QueryValidator(schemaContext || this._schemaContext || new SchemaContext())
|
|
1936
|
+
|
|
1937
|
+
// Parse SPARQL and extract triple patterns (simplified)
|
|
1938
|
+
const triplePatterns = this._extractTriplePatterns(sparql)
|
|
1939
|
+
|
|
1940
|
+
for (const { s, p, o } of triplePatterns) {
|
|
1941
|
+
validator.validateTriplePattern(s, p, o)
|
|
1942
|
+
}
|
|
1943
|
+
|
|
1944
|
+
return validator.getResult()
|
|
1945
|
+
}
|
|
1946
|
+
|
|
1947
|
+
/**
|
|
1948
|
+
* Extract triple patterns from SPARQL query (simplified parser)
|
|
1949
|
+
*/
|
|
1950
|
+
_extractTriplePatterns(sparql) {
|
|
1951
|
+
const patterns = []
|
|
1952
|
+
// Match triple patterns: ?var <uri> ?var or ?var prefix:local ?var
|
|
1953
|
+
const tripleRegex = /([?]\w+|<[^>]+>)\s+([?]\w+|<[^>]+>|[\w]+:[\w]+)\s+([?]\w+|<[^>]+>|"[^"]*")/g
|
|
1954
|
+
let match
|
|
1955
|
+
while ((match = tripleRegex.exec(sparql)) !== null) {
|
|
1956
|
+
patterns.push({ s: match[1], p: match[2], o: match[3] })
|
|
1957
|
+
}
|
|
1958
|
+
return patterns
|
|
1959
|
+
}
|
|
1960
|
+
|
|
1961
|
+
/**
|
|
1962
|
+
* Generate deterministic query hash for caching
|
|
1963
|
+
* Same schema + same intent = same hash
|
|
1964
|
+
*/
|
|
1965
|
+
_computeQueryHash(intent, schemaContext) {
|
|
1966
|
+
const intentKey = Object.entries(intent).filter(([_, v]) => v).map(([k]) => k).sort().join(':')
|
|
1967
|
+
const schemaKey = schemaContext?.toCanonical?.()?.signature || 'no-schema'
|
|
1968
|
+
const content = `${intentKey}|${schemaKey}`
|
|
1969
|
+
|
|
1970
|
+
let hash = 0
|
|
1971
|
+
for (let i = 0; i < content.length; i++) {
|
|
1972
|
+
hash = ((hash << 5) - hash) + content.charCodeAt(i)
|
|
1973
|
+
hash = hash & hash
|
|
1974
|
+
}
|
|
1975
|
+
return 'qhash_' + Math.abs(hash).toString(16)
|
|
1976
|
+
}
|
|
1977
|
+
|
|
1978
|
+
/**
|
|
1979
|
+
* Extract schema from knowledge graph
|
|
1980
|
+
* @returns {Object} Schema with predicates, classes, examples
|
|
1981
|
+
*/
|
|
1982
|
+
async extractSchema(forceRefresh = false) {
|
|
1983
|
+
if (!this.kg) return { predicates: [], classes: [], examples: [] }
|
|
1984
|
+
|
|
1985
|
+
const now = Date.now()
|
|
1986
|
+
if (!forceRefresh && this._schemaCache && now < this._schemaCacheExpiry) {
|
|
1987
|
+
return this._schemaCache
|
|
1988
|
+
}
|
|
1989
|
+
|
|
1990
|
+
const schema = { predicates: [], classes: [], examples: [], timestamp: new Date().toISOString() }
|
|
1991
|
+
|
|
1992
|
+
try {
|
|
1993
|
+
// Get unique predicates
|
|
1994
|
+
const predResults = this.kg.querySelect('SELECT DISTINCT ?p WHERE { ?s ?p ?o } LIMIT 200')
|
|
1995
|
+
schema.predicates = predResults.map(r => r.bindings?.p || r.p).filter(Boolean)
|
|
1996
|
+
|
|
1997
|
+
// Get RDF types
|
|
1998
|
+
const typeResults = this.kg.querySelect(`
|
|
1999
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
2000
|
+
SELECT DISTINCT ?type WHERE { ?s rdf:type ?type } LIMIT 100
|
|
2001
|
+
`)
|
|
2002
|
+
schema.classes = typeResults.map(r => r.bindings?.type || r.type).filter(Boolean)
|
|
2003
|
+
|
|
2004
|
+
// Get sample triples
|
|
2005
|
+
const sampleResults = this.kg.querySelect('SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 30')
|
|
2006
|
+
schema.examples = sampleResults.map(r => ({
|
|
2007
|
+
s: r.bindings?.s || r.s,
|
|
2008
|
+
p: r.bindings?.p || r.p,
|
|
2009
|
+
o: r.bindings?.o || r.o
|
|
2010
|
+
}))
|
|
2011
|
+
} catch (err) {
|
|
2012
|
+
schema.error = err.message
|
|
2013
|
+
}
|
|
2014
|
+
|
|
2015
|
+
this._schemaCache = schema
|
|
2016
|
+
this._schemaCacheExpiry = now + 5 * 60 * 1000 // 5 minute cache
|
|
2017
|
+
return schema
|
|
167
2018
|
}
|
|
168
2019
|
|
|
169
2020
|
/**
|
|
170
2021
|
* Generate execution plan from natural language
|
|
2022
|
+
*
|
|
2023
|
+
* Context Theory Integration:
|
|
2024
|
+
* 1. Build SchemaContext (Γ) for type-theoretic validation
|
|
2025
|
+
* 2. Deterministic intent classification (not LLM dependent)
|
|
2026
|
+
* 3. Schema-validated SPARQL generation
|
|
2027
|
+
* 4. ProofDAG for verifiable reasoning chain
|
|
2028
|
+
* 5. LLM used ONLY for summarization (not query generation)
|
|
2029
|
+
*
|
|
2030
|
+
* Guarantees:
|
|
2031
|
+
* - Same input + same schema = same output (deterministic)
|
|
2032
|
+
* - All queries validated against schema context
|
|
2033
|
+
* - Full proof chain for every answer
|
|
2034
|
+
*
|
|
171
2035
|
* @param {string} prompt - Natural language query
|
|
172
|
-
* @param {Object} context - Optional context
|
|
173
|
-
* @returns {
|
|
2036
|
+
* @param {Object} context - Optional context (memories, schema)
|
|
2037
|
+
* @returns {Object} Execution plan with typed steps and proof
|
|
174
2038
|
*/
|
|
175
2039
|
async plan(prompt, context = {}) {
|
|
176
2040
|
const planId = `plan-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
|
177
2041
|
|
|
178
|
-
//
|
|
2042
|
+
// STEP 1: Build type-theoretic schema context (Γ)
|
|
2043
|
+
const schemaContext = await this.buildSchemaContext()
|
|
2044
|
+
const schema = context.schema || await this.extractSchema()
|
|
2045
|
+
|
|
2046
|
+
// STEP 2: Deterministic intent classification (NOT LLM dependent)
|
|
2047
|
+
// This ensures same input → same intent (idempotent)
|
|
179
2048
|
const intent = this._analyzeIntent(prompt)
|
|
180
2049
|
|
|
181
|
-
//
|
|
182
|
-
|
|
2050
|
+
// STEP 3: Compute deterministic query hash
|
|
2051
|
+
// Same schema + same intent = same hash (for caching/reproducibility)
|
|
2052
|
+
const queryHash = this._computeQueryHash(intent, schemaContext)
|
|
2053
|
+
|
|
2054
|
+
// STEP 4: Generate steps using schema context
|
|
2055
|
+
const steps = this._generateSteps(intent, { ...context, schema, schemaContext })
|
|
2056
|
+
|
|
2057
|
+
// STEP 5: Extract and validate SPARQL queries
|
|
2058
|
+
const sparqlSteps = steps.filter(s => s.tool === 'kg.sparql.query')
|
|
2059
|
+
let validation = { valid: true, errors: [], warnings: [], derivations: [] }
|
|
2060
|
+
|
|
2061
|
+
if (sparqlSteps.length > 0 && sparqlSteps[0].args?.sparql) {
|
|
2062
|
+
validation = this.validateQuery(sparqlSteps[0].args.sparql, schemaContext)
|
|
2063
|
+
}
|
|
2064
|
+
|
|
2065
|
+
// STEP 6: Create ProofDAG for reasoning chain
|
|
2066
|
+
const proof = new ProofDAG(`Answer to: "${prompt.slice(0, 100)}"`)
|
|
2067
|
+
const planNode = proof.addEvidence(
|
|
2068
|
+
proof.rootId,
|
|
2069
|
+
`Plan generated with ${steps.length} steps`,
|
|
2070
|
+
{ type: 'plan', stepCount: steps.length, intent },
|
|
2071
|
+
'PLAN_GEN'
|
|
2072
|
+
)
|
|
2073
|
+
|
|
2074
|
+
// Add schema evidence
|
|
2075
|
+
proof.addEvidence(
|
|
2076
|
+
planNode,
|
|
2077
|
+
`Schema context: ${schemaContext.properties.size} properties, ${schemaContext.classes.size} classes`,
|
|
2078
|
+
{ type: 'schema', signature: schemaContext._signatureHash },
|
|
2079
|
+
'SCHEMA_EXTRACT'
|
|
2080
|
+
)
|
|
2081
|
+
|
|
2082
|
+
// Add validation evidence
|
|
2083
|
+
if (sparqlSteps.length > 0) {
|
|
2084
|
+
proof.addEvidence(
|
|
2085
|
+
planNode,
|
|
2086
|
+
validation.valid ? 'Query validated against schema' : `Validation errors: ${validation.errors.length}`,
|
|
2087
|
+
{ type: 'validation', valid: validation.valid, errors: validation.errors },
|
|
2088
|
+
'QUERY_VALIDATE'
|
|
2089
|
+
)
|
|
2090
|
+
}
|
|
183
2091
|
|
|
184
|
-
//
|
|
185
|
-
|
|
2092
|
+
// STEP 7: Optional LLM for summarization (NOT for query generation)
|
|
2093
|
+
let llmSummary = null
|
|
2094
|
+
if (this.model && this.apiKey && context.useLLMSummary) {
|
|
2095
|
+
llmSummary = await this._summarizeWithLLM(prompt, steps, validation)
|
|
2096
|
+
}
|
|
186
2097
|
|
|
187
2098
|
return {
|
|
188
2099
|
id: planId,
|
|
189
2100
|
prompt,
|
|
190
2101
|
intent,
|
|
191
2102
|
steps,
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
2103
|
+
|
|
2104
|
+
// Context Theory outputs
|
|
2105
|
+
schemaContext: schemaContext.toCanonical(),
|
|
2106
|
+
queryHash,
|
|
2107
|
+
validation,
|
|
2108
|
+
|
|
2109
|
+
// Proof chain
|
|
2110
|
+
proof: proof.serialize(),
|
|
2111
|
+
proofHash: proof.computeHash(),
|
|
2112
|
+
|
|
2113
|
+
// Metadata
|
|
2114
|
+
schema_used: !!schema.predicates.length,
|
|
2115
|
+
llm_used: !!llmSummary,
|
|
2116
|
+
type_chain: this._buildTypeChain(steps),
|
|
2117
|
+
confidence: validation.valid ? 0.95 : 0.6,
|
|
2118
|
+
explanation: llmSummary || this._generateExplanation(steps, intent)
|
|
195
2119
|
}
|
|
196
2120
|
}
|
|
197
2121
|
|
|
198
|
-
|
|
199
|
-
|
|
2122
|
+
/**
|
|
2123
|
+
* LLM used ONLY for summarization, not for query generation
|
|
2124
|
+
* This ensures deterministic queries while allowing natural language output
|
|
2125
|
+
*/
|
|
2126
|
+
async _summarizeWithLLM(prompt, steps, validation) {
|
|
2127
|
+
if (!this.model || !this.apiKey) return null
|
|
200
2128
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
2129
|
+
const systemPrompt = `You are a summarizer. Given a query plan, produce a one-sentence summary.
|
|
2130
|
+
Do NOT generate queries. Only summarize what the plan will do.`
|
|
2131
|
+
|
|
2132
|
+
const userPrompt = `Plan for "${prompt}":
|
|
2133
|
+
Steps: ${steps.map(s => s.tool).join(' → ')}
|
|
2134
|
+
Validation: ${validation.valid ? 'PASSED' : 'FAILED'}
|
|
2135
|
+
|
|
2136
|
+
Summarize in one sentence.`
|
|
2137
|
+
|
|
2138
|
+
try {
|
|
2139
|
+
return await this._callLLM(systemPrompt, userPrompt)
|
|
2140
|
+
} catch (err) {
|
|
2141
|
+
return null
|
|
2142
|
+
}
|
|
2143
|
+
}
|
|
2144
|
+
|
|
2145
|
+
async _planWithLLM(prompt, schema, memories) {
|
|
2146
|
+
if (!this.model || !this.apiKey) return null
|
|
2147
|
+
|
|
2148
|
+
const systemPrompt = this._buildSystemPrompt(schema, memories)
|
|
2149
|
+
const userPrompt = `User query: "${prompt}"\n\nGenerate intent classification and SPARQL query.`
|
|
2150
|
+
|
|
2151
|
+
try {
|
|
2152
|
+
const response = await this._callLLM(systemPrompt, userPrompt)
|
|
2153
|
+
return this._parseLLMResponse(response)
|
|
2154
|
+
} catch (err) {
|
|
2155
|
+
// LLM call failed - fall back to pattern matching
|
|
2156
|
+
return null
|
|
2157
|
+
}
|
|
2158
|
+
}
|
|
2159
|
+
|
|
2160
|
+
_buildSystemPrompt(schema, memories) {
|
|
2161
|
+
let schemaText = '## Knowledge Graph Schema\n\n'
|
|
2162
|
+
|
|
2163
|
+
if (schema.classes.length > 0) {
|
|
2164
|
+
schemaText += '### Classes:\n' + schema.classes.slice(0, 15).map(c => `- ${c}`).join('\n') + '\n\n'
|
|
2165
|
+
}
|
|
2166
|
+
if (schema.predicates.length > 0) {
|
|
2167
|
+
schemaText += '### Predicates:\n' + schema.predicates.slice(0, 25).map(p => `- ${p}`).join('\n') + '\n\n'
|
|
2168
|
+
}
|
|
2169
|
+
if (schema.examples.length > 0) {
|
|
2170
|
+
schemaText += '### Sample Triples:\n' + schema.examples.slice(0, 8).map(t => `- <${t.s}> <${t.p}> ${t.o}`).join('\n') + '\n'
|
|
2171
|
+
}
|
|
2172
|
+
|
|
2173
|
+
let memoryText = ''
|
|
2174
|
+
if (memories.length > 0) {
|
|
2175
|
+
memoryText = '\n## Recent Episodes:\n' + memories.slice(0, 5).map((m, i) =>
|
|
2176
|
+
`${i + 1}. "${m.episode?.prompt || m.prompt}" (${m.episode?.success ?? m.success ? 'success' : 'failed'})`
|
|
2177
|
+
).join('\n')
|
|
2178
|
+
}
|
|
2179
|
+
|
|
2180
|
+
return `You are a knowledge graph query planner.
|
|
2181
|
+
|
|
2182
|
+
${schemaText}
|
|
2183
|
+
${memoryText}
|
|
2184
|
+
|
|
2185
|
+
RULES:
|
|
2186
|
+
- ONLY use predicates from the schema above
|
|
2187
|
+
- NEVER invent predicate names
|
|
2188
|
+
- If schema doesn't match user's request, set intent to "schema_mismatch"
|
|
2189
|
+
- Use proper SPARQL syntax
|
|
2190
|
+
|
|
2191
|
+
Respond in JSON:
|
|
2192
|
+
{
|
|
2193
|
+
"intent": "<type>",
|
|
2194
|
+
"sparql": "<query or null>",
|
|
2195
|
+
"confidence": <0.0-1.0>,
|
|
2196
|
+
"reasoning": "<explanation>"
|
|
2197
|
+
}
|
|
2198
|
+
|
|
2199
|
+
Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, general_query, schema_mismatch`
|
|
2200
|
+
}
|
|
2201
|
+
|
|
2202
|
+
async _callLLM(systemPrompt, userPrompt) {
|
|
2203
|
+
const model = this.model.toLowerCase()
|
|
2204
|
+
const isAnthropic = model.includes('claude') || model.includes('anthropic')
|
|
2205
|
+
|
|
2206
|
+
const endpoint = isAnthropic
|
|
2207
|
+
? 'https://api.anthropic.com/v1/messages'
|
|
2208
|
+
: 'https://api.openai.com/v1/chat/completions'
|
|
2209
|
+
|
|
2210
|
+
const headers = isAnthropic
|
|
2211
|
+
? { 'Content-Type': 'application/json', 'x-api-key': this.apiKey, 'anthropic-version': '2023-06-01' }
|
|
2212
|
+
: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }
|
|
2213
|
+
|
|
2214
|
+
const body = isAnthropic
|
|
2215
|
+
? { model: this.model, max_tokens: 1024, system: systemPrompt, messages: [{ role: 'user', content: userPrompt }] }
|
|
2216
|
+
: { model: this.model, messages: [{ role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt }], temperature: 0.1 }
|
|
2217
|
+
|
|
2218
|
+
const response = await fetch(endpoint, { method: 'POST', headers, body: JSON.stringify(body) })
|
|
2219
|
+
if (!response.ok) throw new Error(`API error: ${response.status}`)
|
|
2220
|
+
|
|
2221
|
+
const data = await response.json()
|
|
2222
|
+
return isAnthropic ? data.content[0].text : data.choices[0].message.content
|
|
2223
|
+
}
|
|
2224
|
+
|
|
2225
|
+
_parseLLMResponse(response) {
|
|
2226
|
+
try {
|
|
2227
|
+
let jsonStr = response
|
|
2228
|
+
const match = response.match(/```json\s*([\s\S]*?)\s*```/) || response.match(/\{[\s\S]*\}/)
|
|
2229
|
+
if (match) jsonStr = match[1] || match[0]
|
|
2230
|
+
|
|
2231
|
+
const parsed = JSON.parse(jsonStr)
|
|
2232
|
+
return {
|
|
2233
|
+
type: parsed.intent || 'general_query',
|
|
2234
|
+
sparql: parsed.sparql,
|
|
2235
|
+
confidence: parsed.confidence || 0.8,
|
|
2236
|
+
reasoning: parsed.reasoning,
|
|
2237
|
+
tools: this._getToolsForIntent(parsed.intent)
|
|
2238
|
+
}
|
|
2239
|
+
} catch (err) {
|
|
2240
|
+
return null
|
|
2241
|
+
}
|
|
2242
|
+
}
|
|
2243
|
+
|
|
2244
|
+
_getToolsForIntent(intent) {
|
|
2245
|
+
const toolMap = {
|
|
2246
|
+
'detect_fraud': ['kg.sparql.query', 'kg.datalog.apply'],
|
|
2247
|
+
'find_similar': ['kg.embeddings.search'],
|
|
2248
|
+
'explain': ['kg.datalog.apply'],
|
|
2249
|
+
'find_patterns': ['kg.motif.find'],
|
|
2250
|
+
'aggregate': ['kg.sparql.query'],
|
|
2251
|
+
'general_query': ['kg.sparql.query'],
|
|
2252
|
+
'schema_mismatch': []
|
|
2253
|
+
}
|
|
2254
|
+
return toolMap[intent] || ['kg.sparql.query']
|
|
2255
|
+
}
|
|
2256
|
+
|
|
2257
|
+
_generateStepsFromLLM(llmResult, sparql) {
|
|
2258
|
+
const steps = []
|
|
2259
|
+
let stepId = 1
|
|
2260
|
+
|
|
2261
|
+
if (sparql) {
|
|
2262
|
+
steps.push({
|
|
2263
|
+
id: stepId++,
|
|
2264
|
+
tool: 'kg.sparql.query',
|
|
2265
|
+
input_type: 'Query',
|
|
2266
|
+
output_type: 'BindingSet',
|
|
2267
|
+
args: { sparql }
|
|
2268
|
+
})
|
|
209
2269
|
}
|
|
210
2270
|
|
|
2271
|
+
// Add additional tools based on intent
|
|
2272
|
+
const additionalTools = llmResult.tools.filter(t => t !== 'kg.sparql.query')
|
|
2273
|
+
additionalTools.forEach(tool => {
|
|
2274
|
+
steps.push({
|
|
2275
|
+
id: stepId++,
|
|
2276
|
+
tool,
|
|
2277
|
+
input_type: this.tools[tool]?.input || 'Any',
|
|
2278
|
+
output_type: this.tools[tool]?.output || 'Any',
|
|
2279
|
+
args: {}
|
|
2280
|
+
})
|
|
2281
|
+
})
|
|
2282
|
+
|
|
2283
|
+
return steps
|
|
2284
|
+
}
|
|
2285
|
+
|
|
2286
|
+
_analyzeIntent(prompt) {
|
|
2287
|
+
const lowerPrompt = prompt.toLowerCase()
|
|
211
2288
|
const detected = {}
|
|
212
|
-
|
|
2289
|
+
|
|
2290
|
+
for (const [intentType, keywords] of Object.entries(this.intentPatterns)) {
|
|
213
2291
|
detected[intentType] = keywords.some(k => lowerPrompt.includes(k))
|
|
214
2292
|
}
|
|
215
2293
|
|
|
@@ -219,19 +2297,20 @@ class LLMPlanner {
|
|
|
219
2297
|
_generateSteps(intent, context) {
|
|
220
2298
|
const steps = []
|
|
221
2299
|
let stepId = 1
|
|
2300
|
+
const schema = context.schema || { predicates: [], classes: [] }
|
|
222
2301
|
|
|
223
|
-
//
|
|
224
|
-
if (intent.query || intent.compliance) {
|
|
2302
|
+
// Generate SPARQL based on intent and schema
|
|
2303
|
+
if (intent.query || intent.compliance || intent.aggregate) {
|
|
2304
|
+
const sparql = this._generateSchemaSparql(intent, schema, context)
|
|
225
2305
|
steps.push({
|
|
226
2306
|
id: stepId++,
|
|
227
2307
|
tool: 'kg.sparql.query',
|
|
228
2308
|
input_type: 'Query',
|
|
229
2309
|
output_type: 'BindingSet',
|
|
230
|
-
args: { sparql
|
|
2310
|
+
args: { sparql }
|
|
231
2311
|
})
|
|
232
2312
|
}
|
|
233
2313
|
|
|
234
|
-
// Add pattern finding if pattern intent detected
|
|
235
2314
|
if (intent.pattern) {
|
|
236
2315
|
steps.push({
|
|
237
2316
|
id: stepId++,
|
|
@@ -242,7 +2321,6 @@ class LLMPlanner {
|
|
|
242
2321
|
})
|
|
243
2322
|
}
|
|
244
2323
|
|
|
245
|
-
// Add inference if infer intent detected
|
|
246
2324
|
if (intent.infer) {
|
|
247
2325
|
steps.push({
|
|
248
2326
|
id: stepId++,
|
|
@@ -253,7 +2331,6 @@ class LLMPlanner {
|
|
|
253
2331
|
})
|
|
254
2332
|
}
|
|
255
2333
|
|
|
256
|
-
// Add similarity search if similar intent detected
|
|
257
2334
|
if (intent.similar) {
|
|
258
2335
|
steps.push({
|
|
259
2336
|
id: stepId++,
|
|
@@ -264,7 +2341,6 @@ class LLMPlanner {
|
|
|
264
2341
|
})
|
|
265
2342
|
}
|
|
266
2343
|
|
|
267
|
-
// Add ranking if rank intent detected
|
|
268
2344
|
if (intent.rank) {
|
|
269
2345
|
steps.push({
|
|
270
2346
|
id: stepId++,
|
|
@@ -275,34 +2351,58 @@ class LLMPlanner {
|
|
|
275
2351
|
})
|
|
276
2352
|
}
|
|
277
2353
|
|
|
278
|
-
// Default
|
|
2354
|
+
// Default query if no steps
|
|
279
2355
|
if (steps.length === 0) {
|
|
280
2356
|
steps.push({
|
|
281
2357
|
id: stepId++,
|
|
282
2358
|
tool: 'kg.sparql.query',
|
|
283
2359
|
input_type: 'Query',
|
|
284
2360
|
output_type: 'BindingSet',
|
|
285
|
-
args: { sparql: 'SELECT
|
|
2361
|
+
args: { sparql: 'SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 100' }
|
|
286
2362
|
})
|
|
287
2363
|
}
|
|
288
2364
|
|
|
289
2365
|
return steps
|
|
290
2366
|
}
|
|
291
2367
|
|
|
2368
|
+
_generateSchemaSparql(intent, schema, context) {
|
|
2369
|
+
// Use schema-aware SPARQL generation
|
|
2370
|
+
if (context.sparql) return context.sparql
|
|
2371
|
+
|
|
2372
|
+
// Check if schema has relevant predicates
|
|
2373
|
+
const predicates = schema.predicates || []
|
|
2374
|
+
|
|
2375
|
+
if (intent.aggregate) {
|
|
2376
|
+
return 'SELECT (COUNT(*) as ?count) WHERE { ?s ?p ?o }'
|
|
2377
|
+
}
|
|
2378
|
+
|
|
2379
|
+
// Try to match predicates based on intent
|
|
2380
|
+
const riskPreds = predicates.filter(p => p.toLowerCase().includes('risk') || p.toLowerCase().includes('score'))
|
|
2381
|
+
const typePreds = predicates.filter(p => p.includes('type') || p.includes('Type'))
|
|
2382
|
+
|
|
2383
|
+
if (intent.pattern || intent.rank) {
|
|
2384
|
+
if (riskPreds.length > 0) {
|
|
2385
|
+
return `SELECT ?s ?score WHERE { ?s <${riskPreds[0]}> ?score } ORDER BY DESC(?score) LIMIT 100`
|
|
2386
|
+
}
|
|
2387
|
+
}
|
|
2388
|
+
|
|
2389
|
+
// Default: return all triples
|
|
2390
|
+
return 'SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 100'
|
|
2391
|
+
}
|
|
2392
|
+
|
|
292
2393
|
_buildTypeChain(steps) {
|
|
293
2394
|
return steps.map(s => `${s.input_type} → ${s.output_type}`).join(' ; ')
|
|
294
2395
|
}
|
|
295
2396
|
|
|
296
2397
|
_calculateConfidence(steps, intent) {
|
|
297
|
-
// Higher confidence if intent matches tool selection
|
|
298
2398
|
const matchedIntents = Object.values(intent).filter(v => v).length
|
|
299
2399
|
return Math.min(0.95, 0.7 + (matchedIntents * 0.05))
|
|
300
2400
|
}
|
|
301
2401
|
|
|
302
2402
|
_generateExplanation(steps, intent) {
|
|
303
2403
|
const toolNames = steps.map(s => s.tool).join(', ')
|
|
304
|
-
|
|
305
|
-
|
|
2404
|
+
const detectedIntents = Object.entries(intent).filter(([_, v]) => v).map(([k]) => k).join(', ')
|
|
2405
|
+
return `Plan uses ${steps.length} tool(s): ${toolNames}. Detected intents: ${detectedIntents || 'general query'}.`
|
|
306
2406
|
}
|
|
307
2407
|
}
|
|
308
2408
|
|
|
@@ -658,7 +2758,15 @@ class MemoryManager {
|
|
|
658
2758
|
|
|
659
2759
|
async _getEmbedding(text) {
|
|
660
2760
|
if (!this.embeddingService) return null
|
|
661
|
-
|
|
2761
|
+
// EmbeddingService doesn't have embed() - it's for vector storage/search
|
|
2762
|
+
// For text embedding, we generate a simple deterministic hash-based embedding
|
|
2763
|
+
// In production, integrate with OpenAI/Anthropic embedding APIs
|
|
2764
|
+
const hash = text.split('').reduce((acc, char) => ((acc << 5) - acc) + char.charCodeAt(0), 0)
|
|
2765
|
+
const embedding = new Float32Array(384)
|
|
2766
|
+
for (let i = 0; i < 384; i++) {
|
|
2767
|
+
embedding[i] = Math.sin(hash * (i + 1) * 0.01) * 0.5
|
|
2768
|
+
}
|
|
2769
|
+
return Array.from(embedding)
|
|
662
2770
|
}
|
|
663
2771
|
|
|
664
2772
|
_episodeToTurtle(episode) {
|
|
@@ -796,14 +2904,31 @@ class HyperMindAgent {
|
|
|
796
2904
|
this.memory = config.memory || new MemoryManager(config.kg, config.embeddings)
|
|
797
2905
|
this.embeddings = config.embeddings || null
|
|
798
2906
|
this.apiKey = config.apiKey || null
|
|
2907
|
+
this.model = config.model || null
|
|
799
2908
|
this.rules = config.rules || new DatalogRuleSet()
|
|
800
2909
|
this.sandbox = new WasmSandbox(config.sandbox || {})
|
|
801
2910
|
this.name = config.name || 'hypermind-agent'
|
|
802
2911
|
|
|
803
|
-
//
|
|
2912
|
+
// LLMPlanner for schema-aware planning (delegates all LLM/schema logic)
|
|
2913
|
+
this.planner = new LLMPlanner({
|
|
2914
|
+
kg: config.kg,
|
|
2915
|
+
model: config.model,
|
|
2916
|
+
apiKey: config.apiKey,
|
|
2917
|
+
tools: TOOL_REGISTRY
|
|
2918
|
+
})
|
|
2919
|
+
|
|
2920
|
+
// Intent patterns for fallback mode
|
|
804
2921
|
this.intentPatterns = this._buildIntentPatterns()
|
|
805
2922
|
}
|
|
806
2923
|
|
|
2924
|
+
/**
|
|
2925
|
+
* Extract schema from KG (delegates to planner)
|
|
2926
|
+
* @returns {Object} Schema with predicates, classes, examples
|
|
2927
|
+
*/
|
|
2928
|
+
async extractSchema(forceRefresh = false) {
|
|
2929
|
+
return this.planner.extractSchema(forceRefresh)
|
|
2930
|
+
}
|
|
2931
|
+
|
|
807
2932
|
/**
|
|
808
2933
|
* Execute a natural language request
|
|
809
2934
|
* Returns answer + full explainable AI output
|
|
@@ -1879,6 +4004,26 @@ module.exports = {
|
|
|
1879
4004
|
LLMPlanner,
|
|
1880
4005
|
TOOL_REGISTRY,
|
|
1881
4006
|
|
|
4007
|
+
// Context Theory (v0.6.11+) - Type-theoretic foundations for SPARQL validation
|
|
4008
|
+
// Based on: Spivak's Ologs, Functorial Data Migration, TypeQL
|
|
4009
|
+
SchemaContext, // Γ context with classes, properties, bindings
|
|
4010
|
+
TypeJudgment, // Γ ⊢ e : τ formal type judgment
|
|
4011
|
+
QueryValidator, // Validates SPARQL using derivation rules
|
|
4012
|
+
ProofDAG, // Curry-Howard proof of reasoning chain
|
|
4013
|
+
|
|
4014
|
+
// Schema Caching (v0.6.12+) - Cross-agent schema sharing
|
|
4015
|
+
SchemaCache, // Cache class for schema storage
|
|
4016
|
+
SCHEMA_CACHE, // Global singleton instance (shared across all agents)
|
|
4017
|
+
|
|
4018
|
+
// Schema-Aware GraphDB (v0.6.13+) - Auto schema extraction on load
|
|
4019
|
+
// Schema is extracted ONCE after data load (not on every access)
|
|
4020
|
+
SchemaAwareGraphDB, // Wrapper with auto schema extraction
|
|
4021
|
+
createSchemaAwareGraphDB, // Factory function
|
|
4022
|
+
wrapWithSchemaAwareness, // Wrap existing GraphDb
|
|
4023
|
+
|
|
4024
|
+
// Configuration (v0.6.11+) - Centralized tunable parameters
|
|
4025
|
+
CONFIG, // All CONFIG values (no hardcoding)
|
|
4026
|
+
|
|
1882
4027
|
// Supporting Classes
|
|
1883
4028
|
MemoryManager,
|
|
1884
4029
|
DatalogRuleSet,
|