rust-kgdb 0.6.9 → 0.6.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,51 +14,1665 @@
14
14
 
15
15
  const crypto = require('crypto')
16
16
 
17
+ // ============================================================================
18
+ // CONFIGURATION - All tunable parameters (NO hardcoding)
19
+ // ============================================================================
20
+
21
+ /**
22
+ * CONFIG - Centralized configuration for all tunable parameters
23
+ *
24
+ * Design Principle: No magic numbers in code. All thresholds, limits, and
25
+ * parameters are defined here and derived from schema where possible.
26
+ */
27
+ const CONFIG = {
28
+ // Schema extraction limits (derived from KG size heuristics)
29
+ schema: {
30
+ maxClasses: 500,
31
+ maxProperties: 500,
32
+ maxSamples: 30,
33
+ fallbackLimit: 200,
34
+ cacheExpiryMs: 5 * 60 * 1000 // 5 minutes
35
+ },
36
+
37
+ // Query generation
38
+ query: {
39
+ defaultLimit: 100,
40
+ maxResultLimit: 1000
41
+ },
42
+
43
+ // Similarity and scoring (from research: TypeQL, Ologs)
44
+ scoring: {
45
+ similarityThreshold: 0.5, // Minimum Jaccard similarity for suggestions
46
+ validationConfidence: 0.95, // Confidence when validation passes
47
+ fallbackConfidence: 0.6 // Confidence when validation fails
48
+ },
49
+
50
+ // Memory temporal scoring (from agent-memory.ttl ontology)
51
+ memory: {
52
+ decayRate: 0.995, // Per hour (~12% per day)
53
+ weights: {
54
+ recency: 0.3,
55
+ relevance: 0.5,
56
+ importance: 0.2
57
+ },
58
+ defaultGraph: 'http://hypermind.ai/memory/'
59
+ },
60
+
61
+ // Graph algorithms (standard defaults)
62
+ algorithms: {
63
+ pageRank: {
64
+ dampingFactor: 0.85,
65
+ maxIterations: 20
66
+ },
67
+ embedding: {
68
+ k: 10,
69
+ threshold: 0.7
70
+ }
71
+ },
72
+
73
+ // LLM settings
74
+ llm: {
75
+ maxTokens: 1024,
76
+ temperature: 0.1, // Low for determinism
77
+ defaultConfidence: 0.8
78
+ }
79
+ }
80
+
81
+ // ============================================================================
82
+ // SCHEMA CACHE - Shared across all agents (Singleton Pattern)
83
+ // ============================================================================
84
+
85
+ /**
86
+ * SchemaCache - Global schema cache shared across all HyperMind agents
87
+ *
88
+ * Design Principles:
89
+ * 1. Once computed, schema is cached by signature hash
90
+ * 2. Same KG/ontology → same signature → cache hit
91
+ * 3. TTL-based expiry (configurable via CONFIG.schema.cacheExpiryMs)
92
+ * 4. Cross-agent sharing via singleton pattern
93
+ * 5. Thread-safe for Node.js (single-threaded event loop)
94
+ *
95
+ * Cache Key: Combination of:
96
+ * - KG base URI (for KG-derived schemas)
97
+ * - Ontology hash (for imported ontologies)
98
+ * - Schema signature hash
99
+ *
100
+ * This ensures:
101
+ * - Same input → same cached schema (determinism)
102
+ * - Multiple agents can share schema (efficiency)
103
+ * - Schema updates propagate after TTL (freshness)
104
+ */
105
+ class SchemaCache {
106
+ constructor() {
107
+ this._cache = new Map() // key → { schema, timestamp, hits }
108
+ this._stats = { hits: 0, misses: 0, evictions: 0 }
109
+ }
110
+
111
+ /**
112
+ * Generate cache key from KG and/or ontology
113
+ */
114
+ _generateKey(kgBaseUri, ontologyHash) {
115
+ const parts = []
116
+ if (kgBaseUri) parts.push(`kg:${kgBaseUri}`)
117
+ if (ontologyHash) parts.push(`onto:${ontologyHash}`)
118
+ return parts.join('|') || 'default'
119
+ }
120
+
121
+ /**
122
+ * Get schema from cache (if valid)
123
+ * @returns {SchemaContext|null}
124
+ */
125
+ get(kgBaseUri, ontologyHash = null) {
126
+ const key = this._generateKey(kgBaseUri, ontologyHash)
127
+ const entry = this._cache.get(key)
128
+
129
+ if (!entry) {
130
+ this._stats.misses++
131
+ return null
132
+ }
133
+
134
+ // Check TTL expiry
135
+ const age = Date.now() - entry.timestamp
136
+ if (age > CONFIG.schema.cacheExpiryMs) {
137
+ this._cache.delete(key)
138
+ this._stats.evictions++
139
+ this._stats.misses++
140
+ return null
141
+ }
142
+
143
+ entry.hits++
144
+ this._stats.hits++
145
+ return entry.schema
146
+ }
147
+
148
+ /**
149
+ * Store schema in cache
150
+ */
151
+ set(kgBaseUri, schema, ontologyHash = null) {
152
+ const key = this._generateKey(kgBaseUri, ontologyHash)
153
+ this._cache.set(key, {
154
+ schema,
155
+ timestamp: Date.now(),
156
+ hits: 0
157
+ })
158
+ return this
159
+ }
160
+
161
+ /**
162
+ * Get or compute schema (cache-aside pattern)
163
+ * @param {string} kgBaseUri - KG identifier
164
+ * @param {Function} computeFn - Async function to compute schema if not cached
165
+ * @param {string} ontologyHash - Optional ontology hash
166
+ * @returns {Promise<SchemaContext>}
167
+ */
168
+ async getOrCompute(kgBaseUri, computeFn, ontologyHash = null) {
169
+ // Try cache first
170
+ const cached = this.get(kgBaseUri, ontologyHash)
171
+ if (cached) return cached
172
+
173
+ // Compute and cache
174
+ const schema = await computeFn()
175
+ this.set(kgBaseUri, schema, ontologyHash)
176
+ return schema
177
+ }
178
+
179
+ /**
180
+ * Invalidate cache entry
181
+ */
182
+ invalidate(kgBaseUri, ontologyHash = null) {
183
+ const key = this._generateKey(kgBaseUri, ontologyHash)
184
+ this._cache.delete(key)
185
+ }
186
+
187
+ /**
188
+ * Clear entire cache
189
+ */
190
+ clear() {
191
+ this._cache.clear()
192
+ this._stats = { hits: 0, misses: 0, evictions: 0 }
193
+ }
194
+
195
+ /**
196
+ * Get cache statistics
197
+ */
198
+ getStats() {
199
+ return {
200
+ ...this._stats,
201
+ size: this._cache.size,
202
+ hitRate: this._stats.hits / (this._stats.hits + this._stats.misses) || 0
203
+ }
204
+ }
205
+ }
206
+
207
+ // Global singleton instance - shared across all agents
208
+ const SCHEMA_CACHE = new SchemaCache()
209
+
210
+ // ============================================================================
211
+ // SCHEMA-AWARE GRAPHDB WRAPPER - Auto schema extraction on load
212
+ // ============================================================================
213
+
214
+ /**
215
+ * SchemaAwareGraphDB - Wrapper that auto-extracts schema after load operations
216
+ *
217
+ * Design: Schema extraction is an INTERNAL part of the engine.
218
+ * When data is loaded, schema is extracted ONCE and cached globally.
219
+ *
220
+ * Architecture:
221
+ * 1. Wraps native GraphDb instance
222
+ * 2. Intercepts loadTtl(), loadNtriples() methods
223
+ * 3. After load completes, triggers ASYNC schema extraction
224
+ * 4. Schema stored in global SCHEMA_CACHE for cross-agent sharing
225
+ *
226
+ * Usage:
227
+ * ```javascript
228
+ * const db = new SchemaAwareGraphDB('http://example.org/')
229
+ * await db.loadTtl(ttlData, null) // Schema extracted automatically!
230
+ * const schema = db.getSchema() // Instant access to cached schema
231
+ * ```
232
+ *
233
+ * Mathematical Foundation:
234
+ * - Schema = Category where Objects = Classes, Morphisms = Properties
235
+ * - Load operation = Functor from RDF Instance → Schema Category
236
+ * - Cache = Memoization of functor application
237
+ */
238
+ class SchemaAwareGraphDB {
239
+ /**
240
+ * @param {string|Object} baseUriOrNativeDb - Base URI string or existing GraphDb instance
241
+ * @param {Object} options - Configuration options
242
+ * @param {string} options.ontology - Pre-built ontology TTL (BYOO)
243
+ * @param {boolean} options.autoExtract - Auto-extract schema on load (default: true)
244
+ * @param {string} options.kgId - Unique identifier for this KG (for cache key)
245
+ */
246
+ constructor(baseUriOrNativeDb, options = {}) {
247
+ // Handle both string (create new) and object (wrap existing)
248
+ if (typeof baseUriOrNativeDb === 'string') {
249
+ // Lazy load native GraphDb to avoid circular dependency
250
+ const { GraphDb } = require('./index')
251
+ this._db = new GraphDb(baseUriOrNativeDb)
252
+ this._baseUri = baseUriOrNativeDb
253
+ } else if (baseUriOrNativeDb && typeof baseUriOrNativeDb.querySelect === 'function') {
254
+ // Wrap existing GraphDb instance
255
+ this._db = baseUriOrNativeDb
256
+ this._baseUri = baseUriOrNativeDb.baseUri || options.kgId || 'wrapped-kg'
257
+ } else {
258
+ throw new Error('SchemaAwareGraphDB requires a base URI string or GraphDb instance')
259
+ }
260
+
261
+ // Configuration
262
+ this._autoExtract = options.autoExtract !== false // Default: true
263
+ this._kgId = options.kgId || this._baseUri
264
+ this._ontologyTtl = options.ontology || null
265
+
266
+ // Schema state
267
+ this._schema = null
268
+ this._schemaPromise = null
269
+ this._schemaReady = false
270
+ this._schemaExtracted = false // Has initial extraction been done?
271
+ this._dataModified = false // Has data been modified since last extraction?
272
+
273
+ // If ontology provided, parse it immediately
274
+ if (this._ontologyTtl) {
275
+ this._initOntologySchema()
276
+ }
277
+ }
278
+
279
+ /**
280
+ * Initialize schema from provided ontology (synchronous)
281
+ */
282
+ _initOntologySchema() {
283
+ const ontologyHash = this._computeHash(this._ontologyTtl)
284
+ const cached = SCHEMA_CACHE.get(this._kgId, ontologyHash)
285
+ if (cached) {
286
+ this._schema = cached
287
+ this._schemaReady = true
288
+ return
289
+ }
290
+
291
+ // Parse ontology synchronously (it's just string parsing)
292
+ this._schema = SchemaContext.fromOntology(this._db, this._ontologyTtl, {
293
+ source: 'ontology',
294
+ graphUri: 'http://hypermind.ai/ontology/'
295
+ })
296
+ SCHEMA_CACHE.set(this._kgId, this._schema, ontologyHash)
297
+ this._schemaReady = true
298
+ }
299
+
300
+ /**
301
+ * Simple hash for cache keys
302
+ */
303
+ _computeHash(str) {
304
+ if (!str) return null
305
+ let hash = 0
306
+ for (let i = 0; i < Math.min(str.length, 500); i++) {
307
+ hash = ((hash << 5) - hash) + str.charCodeAt(i)
308
+ hash = hash & hash
309
+ }
310
+ return 'h_' + Math.abs(hash).toString(16)
311
+ }
312
+
313
+ /**
314
+ * Trigger async schema extraction (non-blocking)
315
+ *
316
+ * TRIGGER CONDITIONS (schema extraction happens ONLY when):
317
+ * 1. loadTtl() or loadNtriples() called (new data)
318
+ * 2. updateInsert() called (data modified)
319
+ * 3. refreshSchema() explicitly called
320
+ * 4. First time (no schema yet)
321
+ *
322
+ * NO TRIGGER (reuses existing schema):
323
+ * - waitForSchema() - just waits for existing
324
+ * - getSchema() - returns cached
325
+ * - querySelect() - read only
326
+ *
327
+ * RACE CONDITION HANDLING:
328
+ * - If agent requests schema before extraction completes, it waits
329
+ * - If schema already in cache (TTL not expired), returns immediately
330
+ * - Promise is stored so multiple waiters share the same extraction
331
+ *
332
+ * @param {boolean} forceExtract - Force new extraction (used by load/insert)
333
+ */
334
+ _triggerSchemaExtraction(forceExtract = false) {
335
+ if (!this._autoExtract) return Promise.resolve(null)
336
+
337
+ // If schema already extracted and no data modifications, return existing
338
+ if (!forceExtract && this._schemaExtracted && this._schema && !this._dataModified) {
339
+ this._schemaReady = true
340
+ return Promise.resolve(this._schema)
341
+ }
342
+
343
+ // If extraction already in progress, return existing promise (deduplication)
344
+ if (this._schemaPromise) return this._schemaPromise
345
+
346
+ this._schemaPromise = (async () => {
347
+ try {
348
+ // Check cache first (covers TTL case - if cached and no modifications, use it)
349
+ if (!forceExtract && !this._dataModified) {
350
+ const cached = SCHEMA_CACHE.get(this._kgId)
351
+ if (cached) {
352
+ this._schema = cached
353
+ this._schemaReady = true
354
+ this._schemaExtracted = true
355
+ return cached
356
+ }
357
+ }
358
+
359
+ // Extract from KG (async)
360
+ const kgSchema = await SchemaContext.fromKG(this._db)
361
+
362
+ // If we have ontology, merge; otherwise use KG schema
363
+ if (this._ontologyTtl && this._schema) {
364
+ this._schema = SchemaContext.merge(this._schema, kgSchema)
365
+ } else {
366
+ this._schema = kgSchema
367
+ }
368
+
369
+ // Cache globally
370
+ SCHEMA_CACHE.set(this._kgId, this._schema)
371
+ this._schemaReady = true
372
+ this._schemaExtracted = true
373
+ this._dataModified = false // Reset modification flag
374
+
375
+ return this._schema
376
+ } catch (err) {
377
+ // Schema extraction failed - continue without schema
378
+ console.warn('Schema extraction failed:', err.message)
379
+ this._schemaReady = true
380
+ this._schemaExtracted = true
381
+ return null
382
+ } finally {
383
+ // Keep promise for a short time to handle rapid sequential calls
384
+ setTimeout(() => { this._schemaPromise = null }, 100)
385
+ }
386
+ })()
387
+
388
+ return this._schemaPromise
389
+ }
390
+
391
+ /**
392
+ * Wait for schema to be ready (BLOCKING for callers)
393
+ *
394
+ * This is the KEY method for handling race conditions:
395
+ * - If schema already ready → returns immediately
396
+ * - If extraction in progress → waits for completion
397
+ * - If not started → triggers extraction and waits
398
+ *
399
+ * Usage:
400
+ * ```javascript
401
+ * const db = new SchemaAwareGraphDB('http://example.org/')
402
+ * db.loadTtl(data, null) // Triggers async extraction
403
+ *
404
+ * // ... agent starts ...
405
+ * const schema = await db.waitForSchema() // Waits if needed
406
+ * // Now schema is guaranteed to be ready
407
+ * ```
408
+ *
409
+ * @param {number} timeoutMs - Maximum time to wait (default: 30000ms)
410
+ * @returns {Promise<SchemaContext>}
411
+ */
412
+ async waitForSchema(timeoutMs = 30000) {
413
+ // Fast path: schema already ready
414
+ if (this._schemaReady && this._schema) {
415
+ return this._schema
416
+ }
417
+
418
+ // Check cache (might have been populated by another agent)
419
+ const cached = SCHEMA_CACHE.get(this._kgId)
420
+ if (cached) {
421
+ this._schema = cached
422
+ this._schemaReady = true
423
+ return cached
424
+ }
425
+
426
+ // Wait for in-progress extraction or start new one
427
+ const extractionPromise = this._schemaPromise || this._triggerSchemaExtraction()
428
+ if (!extractionPromise) {
429
+ return null // autoExtract disabled
430
+ }
431
+
432
+ // Race between extraction and timeout
433
+ const timeoutPromise = new Promise((_, reject) => {
434
+ setTimeout(() => reject(new Error(`Schema extraction timeout after ${timeoutMs}ms`)), timeoutMs)
435
+ })
436
+
437
+ try {
438
+ return await Promise.race([extractionPromise, timeoutPromise])
439
+ } catch (err) {
440
+ // Timeout or error - return whatever we have
441
+ console.warn('waitForSchema:', err.message)
442
+ return this._schema || null
443
+ }
444
+ }
445
+
446
+ // =========================================================================
447
+ // WRAPPED METHODS - Intercept load operations for auto schema extraction
448
+ // =========================================================================
449
+
450
+ /**
451
+ * Load TTL data with automatic schema extraction
452
+ *
453
+ * Schema extraction is triggered ONCE after load completes.
454
+ * Subsequent loads will re-trigger extraction.
455
+ *
456
+ * @param {string} data - TTL/Turtle format data
457
+ * @param {string|null} graphUri - Named graph URI (null for default graph)
458
+ */
459
+ loadTtl(data, graphUri) {
460
+ const result = this._db.loadTtl(data, graphUri)
461
+
462
+ // Mark data as modified - schema needs refresh
463
+ this._dataModified = true
464
+ this._schemaReady = false
465
+
466
+ // Trigger async schema extraction (non-blocking)
467
+ // Schema will be ready by the time queries are issued
468
+ this._triggerSchemaExtraction(true) // forceExtract = true
469
+
470
+ return result
471
+ }
472
+
473
+ /**
474
+ * Load N-Triples data with automatic schema extraction
475
+ */
476
+ loadNtriples(data, graphUri) {
477
+ const result = this._db.loadNtriples(data, graphUri)
478
+
479
+ // Mark data as modified
480
+ this._dataModified = true
481
+ this._schemaReady = false
482
+
483
+ this._triggerSchemaExtraction(true) // forceExtract = true
484
+ return result
485
+ }
486
+
487
+ // =========================================================================
488
+ // SCHEMA ACCESS METHODS
489
+ // =========================================================================
490
+
491
+ /**
492
+ * Get extracted schema (synchronous - returns cached or null)
493
+ * @returns {SchemaContext|null}
494
+ */
495
+ getSchema() {
496
+ return this._schema
497
+ }
498
+
499
+ /**
500
+ * Wait for schema extraction to complete
501
+ * @returns {Promise<SchemaContext>}
502
+ */
503
+ async getSchemaAsync() {
504
+ if (this._schemaReady && this._schema) {
505
+ return this._schema
506
+ }
507
+ if (this._schemaPromise) {
508
+ return this._schemaPromise
509
+ }
510
+ // Trigger extraction if not started
511
+ return this._triggerSchemaExtraction()
512
+ }
513
+
514
+ /**
515
+ * Check if schema is ready (non-blocking)
516
+ */
517
+ isSchemaReady() {
518
+ return this._schemaReady
519
+ }
520
+
521
+ /**
522
+ * Force schema refresh
523
+ */
524
+ async refreshSchema() {
525
+ SCHEMA_CACHE.invalidate(this._kgId)
526
+ this._schemaReady = false
527
+ this._schema = null
528
+ this._schemaPromise = null
529
+ return this._triggerSchemaExtraction()
530
+ }
531
+
532
+ // =========================================================================
533
+ // PASSTHROUGH METHODS - Delegate to underlying GraphDb
534
+ // =========================================================================
535
+
536
+ querySelect(sparql) {
537
+ return this._db.querySelect(sparql)
538
+ }
539
+
540
+ queryAsk(sparql) {
541
+ return this._db.queryAsk(sparql)
542
+ }
543
+
544
+ queryConstruct(sparql) {
545
+ return this._db.queryConstruct(sparql)
546
+ }
547
+
548
+ updateInsert(sparql) {
549
+ const result = this._db.updateInsert(sparql)
550
+ // Schema might change after INSERT - mark for lazy refresh
551
+ this._dataModified = true
552
+ this._schemaReady = false
553
+ // Don't trigger extraction immediately - wait until schema is actually needed
554
+ // This is more efficient for batch inserts
555
+ return result
556
+ }
557
+
558
+ updateDelete(sparql) {
559
+ const result = this._db.updateDelete(sparql)
560
+ // Schema might change after DELETE (properties/classes removed)
561
+ this._dataModified = true
562
+ this._schemaReady = false
563
+ return result
564
+ }
565
+
566
+ count() {
567
+ return this._db.count()
568
+ }
569
+
570
+ countTriples() {
571
+ return this._db.countTriples ? this._db.countTriples() : this._db.count()
572
+ }
573
+
574
+ clear() {
575
+ const result = this._db.clear()
576
+ // Clear schema cache too
577
+ SCHEMA_CACHE.invalidate(this._kgId)
578
+ this._schema = null
579
+ this._schemaReady = false
580
+ return result
581
+ }
582
+
583
+ getVersion() {
584
+ return this._db.getVersion ? this._db.getVersion() : 'unknown'
585
+ }
586
+
587
+ getGraphUri() {
588
+ return this._db.getGraphUri ? this._db.getGraphUri() : this._baseUri
589
+ }
590
+
591
+ /**
592
+ * Get underlying native GraphDb instance
593
+ */
594
+ getNativeDb() {
595
+ return this._db
596
+ }
597
+
598
+ /**
599
+ * Get KG identifier (for cache key)
600
+ */
601
+ getKgId() {
602
+ return this._kgId
603
+ }
604
+ }
605
+
606
+ /**
607
+ * Factory function to create schema-aware GraphDB
608
+ *
609
+ * Usage:
610
+ * ```javascript
611
+ * const db = createSchemaAwareGraphDB('http://example.org/', {
612
+ * ontology: insuranceOntologyTtl, // Optional: BYOO
613
+ * autoExtract: true // Default: true
614
+ * })
615
+ * ```
616
+ */
617
+ function createSchemaAwareGraphDB(baseUri, options = {}) {
618
+ return new SchemaAwareGraphDB(baseUri, options)
619
+ }
620
+
621
+ /**
622
+ * Wrap existing GraphDb with schema awareness
623
+ *
624
+ * Usage:
625
+ * ```javascript
626
+ * const nativeDb = new GraphDb('http://example.org/')
627
+ * const smartDb = wrapWithSchemaAwareness(nativeDb, { kgId: 'my-kg' })
628
+ * ```
629
+ */
630
+ function wrapWithSchemaAwareness(nativeDb, options = {}) {
631
+ return new SchemaAwareGraphDB(nativeDb, options)
632
+ }
633
+
17
634
  // ============================================================================
18
635
  // TYPE SYSTEM (Hindley-Milner + Refinement Types)
19
636
  // ============================================================================
20
637
 
21
- /**
22
- * TypeId - Complete type system ensuring no hallucination
23
- * Every value has a proof of its type correctness
24
- */
25
- const TypeId = {
26
- // Base types
27
- String: 'String',
28
- Int64: 'Int64',
29
- Float64: 'Float64',
30
- Bool: 'Bool',
31
- Unit: 'Unit',
638
+ /**
639
+ * TypeId - Complete type system ensuring no hallucination
640
+ * Every value has a proof of its type correctness
641
+ */
642
+ const TypeId = {
643
+ // Base types
644
+ String: 'String',
645
+ Int64: 'Int64',
646
+ Float64: 'Float64',
647
+ Bool: 'Bool',
648
+ Unit: 'Unit',
649
+
650
+ // RDF-native types (knowledge graph first-class citizens)
651
+ Node: 'Node',
652
+ Triple: 'Triple',
653
+ Quad: 'Quad',
654
+ BindingSet: 'BindingSet',
655
+
656
+ // Compound types (higher-kinded)
657
+ List: (t) => `List<${t}>`,
658
+ Option: (t) => `Option<${t}>`,
659
+ Result: (t, e) => `Result<${t}, ${e}>`,
660
+ Map: (k, v) => `Map<${k}, ${v}>`,
661
+
662
+ // Refinement types (business domain values with constraints)
663
+ RiskScore: 'RiskScore', // Float64 where 0.0 <= x <= 1.0
664
+ PolicyNumber: 'PolicyNumber', // String matching /^POL-\d{4}-\d{4}$/
665
+ ClaimAmount: 'ClaimAmount', // Currency where amount > 0
666
+ ClaimId: 'ClaimId', // String matching /^CLM-\d{4}-\d+$/
667
+ CreditScore: 'CreditScore', // Int64 where 300 <= x <= 850
668
+ ConfidenceScore: 'ConfidenceScore', // Float64 where 0.0 <= x <= 1.0
669
+
670
+ // Schema types (for type-safe graph queries)
671
+ SchemaType: (name) => `Schema<${name}>`,
672
+
673
+ // Type checking utilities
674
+ isCompatible: (output, input) => {
675
+ if (output === input) return true
676
+ if (output === 'BindingSet' && input === 'String') return true
677
+ if (output.startsWith && output.startsWith('List<') && input === 'String') return true
678
+ return false
679
+ }
680
+ }
681
+
682
+ // ============================================================================
683
+ // CONTEXT THEORY - Type-theoretic foundations for SPARQL validation
684
+ // ============================================================================
685
+
686
+ /**
687
+ * SchemaContext (Γ) - Type-theoretic context for knowledge graph schema
688
+ *
689
+ * Mathematical Foundation (David Spivak's Ologs + Functorial Data Migration):
690
+ * - Schema S is a category where Objects = Classes, Morphisms = Properties
691
+ * - Context Γ = (Classes, Properties, Domains, Ranges, Constraints)
692
+ * - Type Judgment: Γ ⊢ e : τ ("in context Γ, expression e has type τ")
693
+ *
694
+ * References:
695
+ * - Spivak & Kent, "Ologs: A Categorical Framework for Knowledge Representation" (2012)
696
+ * - Spivak, "Functorial Data Migration" (2012)
697
+ * - TypeQL: "A Type-Theoretic & Polymorphic Query Language" (2024)
698
+ */
699
+ class SchemaContext {
700
+ constructor() {
701
+ // Classes (objects in schema category)
702
+ this.classes = new Map() // className → { uri, superclasses, constraints }
703
+
704
+ // Properties (morphisms in schema category)
705
+ this.properties = new Map() // propName → { uri, domain, range, functional, inverse }
706
+
707
+ // Variable bindings (typing context Γ)
708
+ this.bindings = new Map() // ?var → Type
709
+
710
+ // Path equations (functorial constraints)
711
+ this.pathEquations = [] // [{ lhs: [p1, p2], rhs: [p3] }] meaning p1;p2 = p3
712
+
713
+ // Schema signature hash (for determinism)
714
+ this._signatureHash = null
715
+ }
716
+
717
+ /**
718
+ * Build context from knowledge graph schema (Functorial extraction)
719
+ *
720
+ * Design: Schema is derived from KG, not hardcoded.
721
+ * This implements Spivak's Ologs: KG Instance → Schema Category
722
+ *
723
+ * Research-backed scalability for Enterprise KGs (ISWC 2024, ABSTAT-HD):
724
+ * 1. VoID-first: Try VoID descriptions (O(1) if available)
725
+ * 2. RDFS/OWL metadata: Extract explicit schema declarations
726
+ * 3. Frequency-based sampling: For very large KGs, sample by predicate frequency
727
+ * 4. ShEx generation: Human-readable schema for LLM consumption
728
+ *
729
+ * References:
730
+ * - VoID: https://www.w3.org/TR/void/
731
+ * - ABSTAT-HD: Scalable KG profiling
732
+ * - sparql-llm: RAG over SPARQL endpoints (2024)
733
+ */
734
+ static async fromKG(kg, options = {}) {
735
+ const ctx = new SchemaContext()
736
+
737
+ if (!kg) return ctx
738
+
739
+ // Merge options with CONFIG (allows override for enterprise scale)
740
+ const config = {
741
+ maxClasses: options.maxClasses || CONFIG.schema.maxClasses,
742
+ maxProperties: options.maxProperties || CONFIG.schema.maxProperties,
743
+ fallbackLimit: options.fallbackLimit || CONFIG.schema.fallbackLimit,
744
+ sampleSize: options.sampleSize || CONFIG.schema.maxSamples,
745
+ useExplicitSchemaOnly: options.useExplicitSchemaOnly || false,
746
+ useVoID: options.useVoID !== false // Try VoID by default
747
+ }
748
+
749
+ try {
750
+ // STRATEGY 1: Try VoID descriptions first (research-backed best practice)
751
+ // VoID provides schema metadata in O(1) if available
752
+ if (config.useVoID) {
753
+ const voidQuery = `
754
+ PREFIX void: <http://rdfs.org/ns/void#>
755
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
756
+ SELECT DISTINCT ?prop ?class WHERE {
757
+ { [] void:property ?prop }
758
+ UNION
759
+ { [] void:class ?class }
760
+ UNION
761
+ { [] void:classPartition [ void:class ?class ] }
762
+ UNION
763
+ { [] void:propertyPartition [ void:property ?prop ] }
764
+ } LIMIT ${config.maxProperties}
765
+ `
766
+ try {
767
+ const voidResults = kg.querySelect(voidQuery)
768
+ for (const r of voidResults) {
769
+ const prop = r.bindings?.prop || r.prop
770
+ const cls = r.bindings?.class || r.class
771
+ if (prop) ctx.properties.set(prop, { uri: prop, domain: null, range: null, functional: false, source: 'void' })
772
+ if (cls) ctx.classes.set(cls, { uri: cls, superclasses: [], constraints: [], source: 'void' })
773
+ }
774
+ } catch (e) {
775
+ // VoID not available, continue with other strategies
776
+ }
777
+ }
778
+
779
+ // STRATEGY 2: Extract RDFS/OWL explicit schema (if VoID incomplete)
780
+ if (ctx.classes.size < 10) {
781
+ const classQuery = `
782
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
783
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
784
+ SELECT DISTINCT ?class ?super ?label WHERE {
785
+ { ?class a rdfs:Class } UNION { ?class a owl:Class }
786
+ OPTIONAL { ?class rdfs:subClassOf ?super }
787
+ OPTIONAL { ?class rdfs:label ?label }
788
+ } LIMIT ${config.maxClasses}
789
+ `
790
+ const classResults = kg.querySelect(classQuery)
791
+ for (const r of classResults) {
792
+ const cls = r.bindings?.class || r.class
793
+ const sup = r.bindings?.super || r.super
794
+ const label = r.bindings?.label || r.label
795
+ if (cls && !ctx.classes.has(cls)) {
796
+ ctx.classes.set(cls, { uri: cls, label, superclasses: sup ? [sup] : [], constraints: [], source: 'rdfs' })
797
+ }
798
+ }
799
+ }
800
+
801
+ // STRATEGY 3: Extract property morphisms with domain/range
802
+ if (ctx.properties.size < 10) {
803
+ const propQuery = `
804
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
805
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
806
+ SELECT DISTINCT ?prop ?domain ?range ?label WHERE {
807
+ { ?prop a rdf:Property } UNION { ?prop a owl:ObjectProperty } UNION { ?prop a owl:DatatypeProperty }
808
+ OPTIONAL { ?prop rdfs:domain ?domain }
809
+ OPTIONAL { ?prop rdfs:range ?range }
810
+ OPTIONAL { ?prop rdfs:label ?label }
811
+ } LIMIT ${config.maxProperties}
812
+ `
813
+ const propResults = kg.querySelect(propQuery)
814
+ for (const r of propResults) {
815
+ const prop = r.bindings?.prop || r.prop
816
+ if (prop && !ctx.properties.has(prop)) {
817
+ ctx.properties.set(prop, {
818
+ uri: prop,
819
+ label: r.bindings?.label || r.label || null,
820
+ domain: r.bindings?.domain || r.domain || null,
821
+ range: r.bindings?.range || r.range || null,
822
+ functional: false,
823
+ source: 'rdfs'
824
+ })
825
+ }
826
+ }
827
+ }
828
+
829
+ // STRATEGY 4: Frequency-based sampling (for large KGs without explicit schema)
830
+ // This is O(sample_size), not O(total_triples) - ABSTAT-HD approach
831
+ if (ctx.properties.size === 0 && !config.useExplicitSchemaOnly) {
832
+ const instanceQuery = `SELECT DISTINCT ?p WHERE { ?s ?p ?o } LIMIT ${config.fallbackLimit}`
833
+ const instResults = kg.querySelect(instanceQuery)
834
+ for (const r of instResults) {
835
+ const prop = r.bindings?.p || r.p
836
+ if (prop) ctx.properties.set(prop, { uri: prop, domain: null, range: null, functional: false, source: 'instance' })
837
+ }
838
+ }
839
+
840
+ // STRATEGY 5: Infer classes from rdf:type usage (statistical sampling)
841
+ if (ctx.classes.size === 0 && !config.useExplicitSchemaOnly) {
842
+ const typeQuery = `
843
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
844
+ SELECT DISTINCT ?type WHERE { ?s rdf:type ?type } LIMIT ${config.fallbackLimit}
845
+ `
846
+ const typeResults = kg.querySelect(typeQuery)
847
+ for (const r of typeResults) {
848
+ const cls = r.bindings?.type || r.type
849
+ if (cls) ctx.classes.set(cls, { uri: cls, superclasses: [], constraints: [], source: 'instance' })
850
+ }
851
+ }
852
+
853
+ ctx._computeSignature()
854
+ } catch (err) {
855
+ // Schema extraction failed - return empty context
856
+ }
857
+
858
+ return ctx
859
+ }
860
+
861
+ /**
862
+ * Build context from existing ontology (Bring Your Own Ontology - BYOO)
863
+ *
864
+ * For enterprise organizations with dedicated ontology teams,
865
+ * this allows importing pre-built ontologies rather than deriving from KG.
866
+ *
867
+ * Supported formats:
868
+ * - TTL (Turtle) - Most common for ontologies
869
+ * - OWL/RDF/XML via KG loader
870
+ * - ShEx/SHACL shapes
871
+ *
872
+ * Design: Ontology-first approach aligns with enterprise data governance
873
+ * where schema is controlled and versioned separately from instance data.
874
+ *
875
+ * Mathematical Foundation (Spivak Ologs):
876
+ * - Classes map to Objects in schema category
877
+ * - Properties map to Morphisms with domain/range
878
+ * - Subclass relations map to functorial embeddings
879
+ *
880
+ * @param {Object} kg - GraphDB instance to load ontology into (optional)
881
+ * @param {string} ontologyTtl - Ontology in TTL format
882
+ * @param {Object} options - Configuration options
883
+ * @returns {SchemaContext} Populated schema context
884
+ */
885
+ static fromOntology(kg, ontologyTtl, options = {}) {
886
+ const ctx = new SchemaContext()
887
+
888
+ if (!ontologyTtl || typeof ontologyTtl !== 'string') {
889
+ return ctx
890
+ }
891
+
892
+ // Source marker for provenance
893
+ const source = options.source || 'ontology'
894
+ const namespace = options.namespace || 'http://example.org/'
895
+
896
+ // If KG provided, load ontology into a named graph for querying
897
+ let loadedKg = kg
898
+ if (kg && typeof kg.loadTtl === 'function') {
899
+ try {
900
+ const graphUri = options.graphUri || 'http://hypermind.ai/ontology/'
901
+ kg.loadTtl(ontologyTtl, graphUri)
902
+ loadedKg = kg
903
+ } catch (e) {
904
+ // Fall back to regex parsing if KG load fails
905
+ loadedKg = null
906
+ }
907
+ }
908
+
909
+ // Strategy 1: Use KG SPARQL if loaded successfully
910
+ if (loadedKg && typeof loadedKg.querySelect === 'function') {
911
+ try {
912
+ // Extract classes (Objects in schema category)
913
+ const classQuery = `
914
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
915
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
916
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
917
+ SELECT DISTINCT ?class ?super ?label ?comment WHERE {
918
+ { ?class a rdfs:Class }
919
+ UNION { ?class a owl:Class }
920
+ OPTIONAL { ?class rdfs:subClassOf ?super }
921
+ OPTIONAL { ?class rdfs:label ?label }
922
+ OPTIONAL { ?class rdfs:comment ?comment }
923
+ } LIMIT ${CONFIG.schema.maxClasses}
924
+ `
925
+ const classResults = loadedKg.querySelect(classQuery)
926
+ for (const r of classResults) {
927
+ const cls = r.bindings?.class || r.class
928
+ const sup = r.bindings?.super || r.super
929
+ const label = r.bindings?.label || r.label
930
+ const comment = r.bindings?.comment || r.comment
931
+ if (cls) {
932
+ const existing = ctx.classes.get(cls)
933
+ ctx.classes.set(cls, {
934
+ uri: cls,
935
+ label: label || existing?.label,
936
+ comment: comment || existing?.comment,
937
+ superclasses: sup ? [...(existing?.superclasses || []), sup] : (existing?.superclasses || []),
938
+ constraints: existing?.constraints || [],
939
+ source
940
+ })
941
+ }
942
+ }
943
+
944
+ // Extract properties (Morphisms with domain/range)
945
+ const propQuery = `
946
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
947
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
948
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
949
+ SELECT DISTINCT ?prop ?domain ?range ?label ?functional WHERE {
950
+ { ?prop a rdf:Property }
951
+ UNION { ?prop a owl:ObjectProperty }
952
+ UNION { ?prop a owl:DatatypeProperty }
953
+ OPTIONAL { ?prop rdfs:domain ?domain }
954
+ OPTIONAL { ?prop rdfs:range ?range }
955
+ OPTIONAL { ?prop rdfs:label ?label }
956
+ OPTIONAL { ?prop a owl:FunctionalProperty . BIND(true AS ?functional) }
957
+ } LIMIT ${CONFIG.schema.maxProperties}
958
+ `
959
+ const propResults = loadedKg.querySelect(propQuery)
960
+ for (const r of propResults) {
961
+ const prop = r.bindings?.prop || r.prop
962
+ const domain = r.bindings?.domain || r.domain
963
+ const range = r.bindings?.range || r.range
964
+ const label = r.bindings?.label || r.label
965
+ const functional = r.bindings?.functional || r.functional
966
+ if (prop) {
967
+ ctx.properties.set(prop, {
968
+ uri: prop,
969
+ domain: domain || null,
970
+ range: range || null,
971
+ label: label || null,
972
+ functional: !!functional,
973
+ source
974
+ })
975
+ }
976
+ }
977
+
978
+ // Extract inverse properties (category theory: adjoint functors)
979
+ const inverseQuery = `
980
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
981
+ SELECT ?prop ?inverse WHERE {
982
+ ?prop owl:inverseOf ?inverse
983
+ }
984
+ `
985
+ try {
986
+ const inverseResults = loadedKg.querySelect(inverseQuery)
987
+ for (const r of inverseResults) {
988
+ const prop = r.bindings?.prop || r.prop
989
+ const inverse = r.bindings?.inverse || r.inverse
990
+ if (prop && inverse && ctx.properties.has(prop)) {
991
+ ctx.properties.get(prop).inverse = inverse
992
+ }
993
+ }
994
+ } catch (e) {
995
+ // Inverse query not supported - continue
996
+ }
997
+
998
+ } catch (e) {
999
+ // SPARQL extraction failed - fall back to regex
1000
+ loadedKg = null
1001
+ }
1002
+ }
1003
+
1004
+ // Strategy 2: Regex parsing (fallback for when no KG available)
1005
+ if (!loadedKg || ctx.classes.size === 0) {
1006
+ // Parse classes: @prefix lines, rdfs:Class, owl:Class declarations
1007
+ const classPatterns = [
1008
+ /<([^>]+)>\s+a\s+(rdfs:Class|owl:Class)/gi,
1009
+ /<([^>]+)>\s+rdf:type\s+(rdfs:Class|owl:Class)/gi,
1010
+ /:(\w+)\s+a\s+(rdfs:Class|owl:Class)/gi
1011
+ ]
1012
+ for (const pattern of classPatterns) {
1013
+ let match
1014
+ while ((match = pattern.exec(ontologyTtl)) !== null) {
1015
+ const uri = match[1].includes(':') ? match[1] : namespace + match[1]
1016
+ if (!ctx.classes.has(uri)) {
1017
+ ctx.classes.set(uri, { uri, superclasses: [], constraints: [], source })
1018
+ }
1019
+ }
1020
+ }
1021
+
1022
+ // Parse properties: rdf:Property, owl:ObjectProperty, owl:DatatypeProperty
1023
+ const propPatterns = [
1024
+ /<([^>]+)>\s+a\s+(rdf:Property|owl:ObjectProperty|owl:DatatypeProperty)/gi,
1025
+ /:(\w+)\s+a\s+(rdf:Property|owl:ObjectProperty|owl:DatatypeProperty)/gi
1026
+ ]
1027
+ for (const pattern of propPatterns) {
1028
+ let match
1029
+ while ((match = pattern.exec(ontologyTtl)) !== null) {
1030
+ const uri = match[1].includes(':') ? match[1] : namespace + match[1]
1031
+ if (!ctx.properties.has(uri)) {
1032
+ ctx.properties.set(uri, { uri, domain: null, range: null, functional: false, source })
1033
+ }
1034
+ }
1035
+ }
1036
+
1037
+ // Parse domain/range from TTL
1038
+ const domainPattern = /<([^>]+)>\s+rdfs:domain\s+<([^>]+)>/gi
1039
+ let domainMatch
1040
+ while ((domainMatch = domainPattern.exec(ontologyTtl)) !== null) {
1041
+ const prop = domainMatch[1]
1042
+ const domain = domainMatch[2]
1043
+ if (ctx.properties.has(prop)) {
1044
+ ctx.properties.get(prop).domain = domain
1045
+ }
1046
+ }
1047
+
1048
+ const rangePattern = /<([^>]+)>\s+rdfs:range\s+<([^>]+)>/gi
1049
+ let rangeMatch
1050
+ while ((rangeMatch = rangePattern.exec(ontologyTtl)) !== null) {
1051
+ const prop = rangeMatch[1]
1052
+ const range = rangeMatch[2]
1053
+ if (ctx.properties.has(prop)) {
1054
+ ctx.properties.get(prop).range = range
1055
+ }
1056
+ }
1057
+ }
1058
+
1059
+ ctx._computeSignature()
1060
+ return ctx
1061
+ }
1062
+
1063
+ /**
1064
+ * Create a merged context from multiple sources (KG + Ontology)
1065
+ *
1066
+ * For enterprise scenarios where:
1067
+ * 1. Core ontology is maintained by ontology team
1068
+ * 2. Extensions/instances are discovered from KG
1069
+ *
1070
+ * @param {SchemaContext[]} contexts - Array of contexts to merge
1071
+ * @returns {SchemaContext} Merged context
1072
+ */
1073
+ static merge(...contexts) {
1074
+ const merged = new SchemaContext()
1075
+
1076
+ for (const ctx of contexts) {
1077
+ if (!ctx) continue
1078
+
1079
+ // Merge classes (later contexts override earlier)
1080
+ for (const [uri, cls] of ctx.classes) {
1081
+ const existing = merged.classes.get(uri)
1082
+ merged.classes.set(uri, {
1083
+ ...cls,
1084
+ superclasses: [...new Set([...(existing?.superclasses || []), ...cls.superclasses])],
1085
+ source: existing ? `${existing.source}+${cls.source}` : cls.source
1086
+ })
1087
+ }
1088
+
1089
+ // Merge properties
1090
+ for (const [uri, prop] of ctx.properties) {
1091
+ const existing = merged.properties.get(uri)
1092
+ merged.properties.set(uri, {
1093
+ ...prop,
1094
+ domain: prop.domain || existing?.domain,
1095
+ range: prop.range || existing?.range,
1096
+ source: existing ? `${existing.source}+${prop.source}` : prop.source
1097
+ })
1098
+ }
1099
+
1100
+ // Merge bindings
1101
+ for (const [varName, type] of ctx.bindings) {
1102
+ merged.bindings.set(varName, type)
1103
+ }
1104
+
1105
+ // Merge path equations
1106
+ merged.pathEquations.push(...ctx.pathEquations)
1107
+ }
1108
+
1109
+ merged._computeSignature()
1110
+ return merged
1111
+ }
1112
+
1113
+ /**
1114
+ * Convert to simple schema format (backward compatibility)
1115
+ */
1116
+ toSimpleSchema() {
1117
+ return {
1118
+ predicates: Array.from(this.properties.keys()),
1119
+ classes: Array.from(this.classes.keys()),
1120
+ examples: [], // Derived on demand
1121
+ timestamp: new Date().toISOString()
1122
+ }
1123
+ }
1124
+
1125
+ /**
1126
+ * Compute deterministic signature hash for the schema
1127
+ * Same schema → same hash (ensures idempotent query generation)
1128
+ */
1129
+ _computeSignature() {
1130
+ const classKeys = Array.from(this.classes.keys()).sort()
1131
+ const propKeys = Array.from(this.properties.keys()).sort()
1132
+ const signature = JSON.stringify({ classes: classKeys, properties: propKeys })
1133
+
1134
+ // Simple hash function
1135
+ let hash = 0
1136
+ for (let i = 0; i < signature.length; i++) {
1137
+ const char = signature.charCodeAt(i)
1138
+ hash = ((hash << 5) - hash) + char
1139
+ hash = hash & hash
1140
+ }
1141
+ this._signatureHash = 'sig_' + Math.abs(hash).toString(16)
1142
+ }
1143
+
1144
+ /**
1145
+ * Introduce variable binding: Γ, ?x : T
1146
+ */
1147
+ bindVariable(varName, type) {
1148
+ const normalized = varName.startsWith('?') ? varName : '?' + varName
1149
+ this.bindings.set(normalized, type)
1150
+ return this
1151
+ }
1152
+
1153
+ /**
1154
+ * Type lookup: Γ ⊢ ?x : τ
1155
+ */
1156
+ getType(varName) {
1157
+ const normalized = varName.startsWith('?') ? varName : '?' + varName
1158
+ return this.bindings.get(normalized) || 'Any'
1159
+ }
1160
+
1161
+ /**
1162
+ * Check if property P has domain D: Γ contains (P : D → ?)
1163
+ */
1164
+ getDomain(propertyUri) {
1165
+ const prop = this.properties.get(propertyUri)
1166
+ return prop?.domain || null
1167
+ }
1168
+
1169
+ /**
1170
+ * Check if property P has range R: Γ contains (P : ? → R)
1171
+ */
1172
+ getRange(propertyUri) {
1173
+ const prop = this.properties.get(propertyUri)
1174
+ return prop?.range || null
1175
+ }
1176
+
1177
+ /**
1178
+ * Get all properties with given domain
1179
+ */
1180
+ getPropertiesForClass(classUri) {
1181
+ const result = []
1182
+ for (const [uri, prop] of this.properties) {
1183
+ if (prop.domain === classUri || prop.domain === null) {
1184
+ result.push(uri)
1185
+ }
1186
+ }
1187
+ return result
1188
+ }
1189
+
1190
+ /**
1191
+ * Serialize context for hashing (determinism)
1192
+ */
1193
+ toCanonical() {
1194
+ return {
1195
+ signature: this._signatureHash,
1196
+ classCount: this.classes.size,
1197
+ propertyCount: this.properties.size,
1198
+ bindings: Object.fromEntries(this.bindings)
1199
+ }
1200
+ }
1201
+ }
1202
+
1203
+ /**
1204
+ * TypeJudgment - Formal type judgment Γ ⊢ e : τ
1205
+ *
1206
+ * Based on Hindley-Milner type inference with extensions for:
1207
+ * - Dependent types (property domain/range)
1208
+ * - Refinement types (business constraints)
1209
+ */
1210
+ class TypeJudgment {
1211
+ constructor(context, expression, type, rule) {
1212
+ this.context = context // Γ (SchemaContext)
1213
+ this.expression = expression // e (SPARQL triple pattern or expression)
1214
+ this.type = type // τ (the derived type)
1215
+ this.rule = rule // derivation rule name
1216
+ this.premises = [] // sub-judgments (for proof tree)
1217
+ this.timestamp = Date.now()
1218
+ }
1219
+
1220
+ /**
1221
+ * Add premise (sub-proof)
1222
+ */
1223
+ addPremise(judgment) {
1224
+ this.premises.push(judgment)
1225
+ return this
1226
+ }
1227
+
1228
+ /**
1229
+ * Check if judgment is valid (all premises valid)
1230
+ */
1231
+ isValid() {
1232
+ if (this.premises.length === 0) return true
1233
+ return this.premises.every(p => p.isValid())
1234
+ }
1235
+
1236
+ /**
1237
+ * Convert to proof tree string
1238
+ */
1239
+ toProofTree(indent = 0) {
1240
+ const pad = ' '.repeat(indent)
1241
+ let result = `${pad}${this.rule}: ${this.expression} : ${this.type}\n`
1242
+ for (const premise of this.premises) {
1243
+ result += premise.toProofTree(indent + 1)
1244
+ }
1245
+ return result
1246
+ }
1247
+
1248
+ /**
1249
+ * Compute deterministic hash of judgment
1250
+ */
1251
+ hash() {
1252
+ const content = JSON.stringify({
1253
+ ctx: this.context.toCanonical(),
1254
+ expr: this.expression,
1255
+ type: this.type,
1256
+ rule: this.rule
1257
+ })
1258
+ let hash = 0
1259
+ for (let i = 0; i < content.length; i++) {
1260
+ hash = ((hash << 5) - hash) + content.charCodeAt(i)
1261
+ hash = hash & hash
1262
+ }
1263
+ return 'judge_' + Math.abs(hash).toString(16)
1264
+ }
1265
+ }
1266
+
1267
+ /**
1268
+ * QueryValidator - Validates SPARQL queries using type-theoretic derivation rules
1269
+ *
1270
+ * Derivation Rules (based on categorical semantics):
1271
+ *
1272
+ * 1. VAR-INTRO (Variable Introduction):
1273
+ * ────────────────
1274
+ * Γ ⊢ ?x : Fresh
1275
+ *
1276
+ * 2. TYPE-INTRO (Type Introduction via rdf:type):
1277
+ * Γ ⊢ ?x rdf:type C : Valid
1278
+ * ─────────────────────────
1279
+ * Γ, ?x : C ⊢ ... : Valid
1280
+ *
1281
+ * 3. PROP-CHECK (Property Domain/Range Check):
1282
+ * Γ ⊢ P : D → R Γ ⊢ ?s : D Γ ⊢ ?o : R
1283
+ * ─────────────────────────────────────────
1284
+ * Γ ⊢ (?s P ?o) : Valid
1285
+ *
1286
+ * 4. COMPOSE (Morphism Composition - Category Theory):
1287
+ * Γ ⊢ P₁ : A → B Γ ⊢ P₂ : B → C
1288
+ * ─────────────────────────────────
1289
+ * Γ ⊢ P₁ ; P₂ : A → C
1290
+ */
1291
+ class QueryValidator {
1292
+ constructor(context) {
1293
+ this.context = context
1294
+ this.derivations = []
1295
+ this.errors = []
1296
+ this.warnings = []
1297
+ }
1298
+
1299
+ /**
1300
+ * Validate a SPARQL triple pattern
1301
+ * Returns TypeJudgment with proof tree
1302
+ */
1303
+ validateTriplePattern(subject, predicate, object) {
1304
+ // Rule: VAR-INTRO for subject
1305
+ const subjectType = this._inferType(subject)
1306
+ const subjectJudgment = new TypeJudgment(
1307
+ this.context, subject, subjectType, 'VAR-INTRO'
1308
+ )
1309
+
1310
+ // Rule: PROP-CHECK for predicate
1311
+ const domain = this.context.getDomain(predicate)
1312
+ const range = this.context.getRange(predicate)
1313
+
1314
+ // If predicate not in schema, warn but allow
1315
+ if (!this.context.properties.has(predicate)) {
1316
+ this.warnings.push({
1317
+ code: 'UNKNOWN_PREDICATE',
1318
+ message: `Predicate not in schema: ${predicate}`,
1319
+ suggestion: this._suggestPredicate(predicate)
1320
+ })
1321
+ }
1322
+
1323
+ // Rule: TYPE-INTRO if predicate is rdf:type
1324
+ if (predicate === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' ||
1325
+ predicate === 'rdf:type' || predicate === 'a') {
1326
+ this.context.bindVariable(subject, object)
1327
+ return new TypeJudgment(
1328
+ this.context,
1329
+ `${subject} rdf:type ${object}`,
1330
+ 'Valid',
1331
+ 'TYPE-INTRO'
1332
+ ).addPremise(subjectJudgment)
1333
+ }
1334
+
1335
+ // Rule: PROP-CHECK with domain/range validation
1336
+ const objectType = this._inferType(object)
1337
+ const objectJudgment = new TypeJudgment(
1338
+ this.context, object, objectType, 'VAR-INTRO'
1339
+ )
1340
+
1341
+ // Check domain compatibility
1342
+ if (domain && subjectType !== 'Any' && subjectType !== domain) {
1343
+ this.errors.push({
1344
+ code: 'DOMAIN_MISMATCH',
1345
+ message: `Subject type ${subjectType} incompatible with property domain ${domain}`,
1346
+ expression: `${subject} ${predicate} ${object}`
1347
+ })
1348
+ }
1349
+
1350
+ // Check range compatibility
1351
+ if (range && objectType !== 'Any' && objectType !== range) {
1352
+ this.errors.push({
1353
+ code: 'RANGE_MISMATCH',
1354
+ message: `Object type ${objectType} incompatible with property range ${range}`,
1355
+ expression: `${subject} ${predicate} ${object}`
1356
+ })
1357
+ }
1358
+
1359
+ const judgment = new TypeJudgment(
1360
+ this.context,
1361
+ `${subject} ${predicate} ${object}`,
1362
+ this.errors.length === 0 ? 'Valid' : 'Invalid',
1363
+ 'PROP-CHECK'
1364
+ ).addPremise(subjectJudgment).addPremise(objectJudgment)
1365
+
1366
+ this.derivations.push(judgment)
1367
+ return judgment
1368
+ }
1369
+
1370
+ /**
1371
+ * Validate morphism composition (property path)
1372
+ * Implements COMPOSE rule from category theory
1373
+ */
1374
+ validateComposition(property1, property2) {
1375
+ const range1 = this.context.getRange(property1)
1376
+ const domain2 = this.context.getDomain(property2)
1377
+
1378
+ // Check composition validity: range of P1 must match domain of P2
1379
+ if (range1 && domain2 && range1 !== domain2) {
1380
+ this.errors.push({
1381
+ code: 'COMPOSITION_INVALID',
1382
+ message: `Cannot compose ${property1} (range: ${range1}) with ${property2} (domain: ${domain2})`,
1383
+ rule: 'COMPOSE'
1384
+ })
1385
+ return new TypeJudgment(
1386
+ this.context,
1387
+ `${property1} ; ${property2}`,
1388
+ 'Invalid',
1389
+ 'COMPOSE'
1390
+ )
1391
+ }
1392
+
1393
+ const domain1 = this.context.getDomain(property1)
1394
+ const range2 = this.context.getRange(property2)
1395
+
1396
+ return new TypeJudgment(
1397
+ this.context,
1398
+ `${property1} ; ${property2}`,
1399
+ `${domain1 || 'Any'} → ${range2 || 'Any'}`,
1400
+ 'COMPOSE'
1401
+ )
1402
+ }
1403
+
1404
+ /**
1405
+ * Infer type of expression
1406
+ */
1407
+ _inferType(expr) {
1408
+ if (typeof expr !== 'string') return 'Any'
1409
+
1410
+ // Variable: check context
1411
+ if (expr.startsWith('?')) {
1412
+ return this.context.getType(expr)
1413
+ }
1414
+
1415
+ // Literal
1416
+ if (expr.startsWith('"') || expr.startsWith("'")) {
1417
+ if (expr.includes('^^')) {
1418
+ const datatypeMatch = expr.match(/\^\^<?([^>]+)>?$/)
1419
+ if (datatypeMatch) return datatypeMatch[1]
1420
+ }
1421
+ return 'xsd:string'
1422
+ }
1423
+
1424
+ // IRI - check if it's a class
1425
+ if (this.context.classes.has(expr)) {
1426
+ return 'Class'
1427
+ }
1428
+
1429
+ return 'IRI'
1430
+ }
1431
+
1432
+ /**
1433
+ * Suggest similar predicate from schema (fuzzy matching)
1434
+ */
1435
+ _suggestPredicate(predicate) {
1436
+ const predicates = Array.from(this.context.properties.keys())
1437
+ const localName = predicate.split(/[#/]/).pop().toLowerCase()
1438
+
1439
+ let bestMatch = null
1440
+ let bestScore = 0
1441
+
1442
+ for (const p of predicates) {
1443
+ const pLocal = p.split(/[#/]/).pop().toLowerCase()
1444
+ const score = this._similarityScore(localName, pLocal)
1445
+ if (score > bestScore && score > 0.5) {
1446
+ bestScore = score
1447
+ bestMatch = p
1448
+ }
1449
+ }
1450
+
1451
+ return bestMatch
1452
+ }
1453
+
1454
+ /**
1455
+ * Simple string similarity (Jaccard on character bigrams)
1456
+ */
1457
+ _similarityScore(a, b) {
1458
+ if (a === b) return 1.0
1459
+ const bigramsA = new Set()
1460
+ const bigramsB = new Set()
1461
+ for (let i = 0; i < a.length - 1; i++) bigramsA.add(a.slice(i, i + 2))
1462
+ for (let i = 0; i < b.length - 1; i++) bigramsB.add(b.slice(i, i + 2))
1463
+ const intersection = new Set([...bigramsA].filter(x => bigramsB.has(x)))
1464
+ const union = new Set([...bigramsA, ...bigramsB])
1465
+ return union.size > 0 ? intersection.size / union.size : 0
1466
+ }
1467
+
1468
+ /**
1469
+ * Get validation result
1470
+ */
1471
+ getResult() {
1472
+ return {
1473
+ valid: this.errors.length === 0,
1474
+ errors: this.errors,
1475
+ warnings: this.warnings,
1476
+ derivations: this.derivations.map(d => ({
1477
+ expression: d.expression,
1478
+ type: d.type,
1479
+ rule: d.rule,
1480
+ hash: d.hash()
1481
+ })),
1482
+ proofTree: this.derivations.map(d => d.toProofTree()).join('\n')
1483
+ }
1484
+ }
1485
+ }
1486
+
1487
+ /**
1488
+ * ProofDAG - Directed Acyclic Graph of reasoning steps (Curry-Howard)
1489
+ *
1490
+ * Every answer produced by the agent has a proof showing:
1491
+ * 1. What SPARQL queries were executed
1492
+ * 2. What rules were applied
1493
+ * 3. What intermediate results were derived
1494
+ * 4. Full chain from question to answer
1495
+ *
1496
+ * Based on Curry-Howard correspondence:
1497
+ * - Types ↔ Propositions
1498
+ * - Programs ↔ Proofs
1499
+ * - Tool executions ↔ Inference steps
1500
+ */
1501
+ class ProofDAG {
1502
+ constructor(rootClaim) {
1503
+ this.rootClaim = rootClaim // The final answer/claim
1504
+ this.nodes = new Map() // nodeId → { claim, evidence, rule, children }
1505
+ this.edges = [] // { from, to, relation }
1506
+ this._nodeCounter = 0
1507
+
1508
+ // Create root node
1509
+ this.rootId = this._addNode(rootClaim, null, 'ROOT')
1510
+ }
1511
+
1512
+ /**
1513
+ * Add node to proof DAG
1514
+ */
1515
+ _addNode(claim, evidence, rule) {
1516
+ const nodeId = `node_${++this._nodeCounter}`
1517
+ this.nodes.set(nodeId, {
1518
+ id: nodeId,
1519
+ claim,
1520
+ evidence,
1521
+ rule,
1522
+ children: [],
1523
+ timestamp: Date.now()
1524
+ })
1525
+ return nodeId
1526
+ }
1527
+
1528
+ /**
1529
+ * Add evidence (sub-proof) supporting a claim
1530
+ */
1531
+ addEvidence(parentId, claim, evidence, rule) {
1532
+ const nodeId = this._addNode(claim, evidence, rule)
1533
+ const parent = this.nodes.get(parentId)
1534
+ if (parent) {
1535
+ parent.children.push(nodeId)
1536
+ this.edges.push({ from: parentId, to: nodeId, relation: 'supports' })
1537
+ }
1538
+ return nodeId
1539
+ }
1540
+
1541
+ /**
1542
+ * Add SPARQL query execution as evidence
1543
+ */
1544
+ addSparqlEvidence(parentId, sparql, bindings) {
1545
+ return this.addEvidence(
1546
+ parentId,
1547
+ `Query returned ${bindings.length} results`,
1548
+ { type: 'sparql', query: sparql, resultCount: bindings.length },
1549
+ 'SPARQL_EXEC'
1550
+ )
1551
+ }
1552
+
1553
+ /**
1554
+ * Add Datalog inference as evidence
1555
+ */
1556
+ addDatalogEvidence(parentId, rules, inferredFacts) {
1557
+ return this.addEvidence(
1558
+ parentId,
1559
+ `Inferred ${inferredFacts.length} facts from ${rules.length} rules`,
1560
+ { type: 'datalog', rules, factCount: inferredFacts.length },
1561
+ 'DATALOG_INFER'
1562
+ )
1563
+ }
1564
+
1565
+ /**
1566
+ * Add embedding similarity as evidence
1567
+ */
1568
+ addEmbeddingEvidence(parentId, entity, similar, threshold) {
1569
+ return this.addEvidence(
1570
+ parentId,
1571
+ `Found ${similar.length} entities similar to ${entity}`,
1572
+ { type: 'embedding', entity, similarCount: similar.length, threshold },
1573
+ 'EMBEDDING_SEARCH'
1574
+ )
1575
+ }
1576
+
1577
+ /**
1578
+ * Add memory retrieval as evidence
1579
+ */
1580
+ addMemoryEvidence(parentId, episodes) {
1581
+ return this.addEvidence(
1582
+ parentId,
1583
+ `Retrieved ${episodes.length} relevant episodes from memory`,
1584
+ { type: 'memory', episodeCount: episodes.length },
1585
+ 'MEMORY_RETRIEVAL'
1586
+ )
1587
+ }
32
1588
 
33
- // RDF-native types (knowledge graph first-class citizens)
34
- Node: 'Node',
35
- Triple: 'Triple',
36
- Quad: 'Quad',
37
- BindingSet: 'BindingSet',
1589
+ /**
1590
+ * Compute deterministic hash of entire proof
1591
+ */
1592
+ computeHash() {
1593
+ const content = JSON.stringify({
1594
+ root: this.rootClaim,
1595
+ nodes: Array.from(this.nodes.values()).map(n => ({
1596
+ claim: n.claim,
1597
+ rule: n.rule,
1598
+ children: n.children
1599
+ }))
1600
+ })
38
1601
 
39
- // Compound types (higher-kinded)
40
- List: (t) => `List<${t}>`,
41
- Option: (t) => `Option<${t}>`,
42
- Result: (t, e) => `Result<${t}, ${e}>`,
43
- Map: (k, v) => `Map<${k}, ${v}>`,
1602
+ let hash = 0
1603
+ for (let i = 0; i < content.length; i++) {
1604
+ hash = ((hash << 5) - hash) + content.charCodeAt(i)
1605
+ hash = hash & hash
1606
+ }
1607
+ return 'proof_' + Math.abs(hash).toString(16)
1608
+ }
44
1609
 
45
- // Refinement types (business domain values with constraints)
46
- RiskScore: 'RiskScore', // Float64 where 0.0 <= x <= 1.0
47
- PolicyNumber: 'PolicyNumber', // String matching /^POL-\d{4}-\d{4}$/
48
- ClaimAmount: 'ClaimAmount', // Currency where amount > 0
49
- ClaimId: 'ClaimId', // String matching /^CLM-\d{4}-\d+$/
50
- CreditScore: 'CreditScore', // Int64 where 300 <= x <= 850
51
- ConfidenceScore: 'ConfidenceScore', // Float64 where 0.0 <= x <= 1.0
1610
+ /**
1611
+ * Verify proof integrity (all nodes have valid parents except root)
1612
+ */
1613
+ verify() {
1614
+ const visited = new Set()
1615
+ const queue = [this.rootId]
1616
+
1617
+ while (queue.length > 0) {
1618
+ const nodeId = queue.shift()
1619
+ if (visited.has(nodeId)) {
1620
+ return { valid: false, error: `Cycle detected at ${nodeId}` }
1621
+ }
1622
+ visited.add(nodeId)
52
1623
 
53
- // Schema types (for type-safe graph queries)
54
- SchemaType: (name) => `Schema<${name}>`,
1624
+ const node = this.nodes.get(nodeId)
1625
+ if (!node) {
1626
+ return { valid: false, error: `Missing node ${nodeId}` }
1627
+ }
55
1628
 
56
- // Type checking utilities
57
- isCompatible: (output, input) => {
58
- if (output === input) return true
59
- if (output === 'BindingSet' && input === 'String') return true
60
- if (output.startsWith && output.startsWith('List<') && input === 'String') return true
61
- return false
1629
+ queue.push(...node.children)
1630
+ }
1631
+
1632
+ return { valid: true, nodeCount: visited.size }
1633
+ }
1634
+
1635
+ /**
1636
+ * Serialize proof for storage/transmission
1637
+ */
1638
+ serialize() {
1639
+ return {
1640
+ rootClaim: this.rootClaim,
1641
+ rootId: this.rootId,
1642
+ proofHash: this.computeHash(),
1643
+ nodes: Object.fromEntries(this.nodes),
1644
+ edges: this.edges,
1645
+ verification: this.verify()
1646
+ }
1647
+ }
1648
+
1649
+ /**
1650
+ * Human-readable proof trace
1651
+ */
1652
+ toExplanation(nodeId = this.rootId, indent = 0) {
1653
+ const node = this.nodes.get(nodeId)
1654
+ if (!node) return ''
1655
+
1656
+ const pad = ' '.repeat(indent)
1657
+ let result = `${pad}[${node.rule}] ${node.claim}\n`
1658
+
1659
+ if (node.evidence) {
1660
+ if (node.evidence.type === 'sparql') {
1661
+ result += `${pad} Query: ${node.evidence.query.slice(0, 100)}...\n`
1662
+ } else if (node.evidence.type === 'datalog') {
1663
+ result += `${pad} Applied ${node.evidence.rules.length} rules\n`
1664
+ } else if (node.evidence.type === 'embedding') {
1665
+ result += `${pad} Similarity search for: ${node.evidence.entity}\n`
1666
+ } else if (node.evidence.type === 'memory') {
1667
+ result += `${pad} From ${node.evidence.episodeCount} past episodes\n`
1668
+ }
1669
+ }
1670
+
1671
+ for (const childId of node.children) {
1672
+ result += this.toExplanation(childId, indent + 1)
1673
+ }
1674
+
1675
+ return result
62
1676
  }
63
1677
  }
64
1678
 
@@ -157,59 +1771,523 @@ const TOOL_REGISTRY = {
157
1771
  // ============================================================================
158
1772
 
159
1773
  /**
160
- * LLMPlanner - Converts natural language prompts into validated execution plans
161
- * Uses type checking (Curry-Howard correspondence) to ensure correctness
1774
+ * LLMPlanner - Schema-aware planner with Context Theory validation
1775
+ *
1776
+ * Architecture (based on David Spivak's Ologs + Functorial Data Migration):
1777
+ * 1. Schema Extraction: Build SchemaContext (Γ) from KG
1778
+ * 2. Type-theoretic Validation: Validate queries using derivation rules
1779
+ * 3. Deterministic Generation: Same schema + same intent = same query
1780
+ * 4. LLM for Summarization Only: Not for critical reasoning paths
1781
+ * 5. Proof DAG: Every answer has verifiable reasoning chain
1782
+ *
1783
+ * Mathematical Foundation:
1784
+ * - Schema S is a category: Objects = Classes, Morphisms = Properties
1785
+ * - Context Γ = (Classes, Properties, Domains, Ranges, Constraints)
1786
+ * - Type Judgment: Γ ⊢ e : τ ensures query validity
1787
+ * - Derivation Rules: VAR-INTRO, TYPE-INTRO, PROP-CHECK, COMPOSE
1788
+ *
1789
+ * Three modes:
1790
+ * - Demo Mode: Pattern matching with hardcoded templates (no LLM)
1791
+ * - Validated Mode: Schema context + type-theoretic validation
1792
+ * - Production Mode: LLM for intent + context-validated SPARQL
162
1793
  */
163
1794
  class LLMPlanner {
164
- constructor(model, tools = TOOL_REGISTRY) {
165
- this.model = model
166
- this.tools = tools
1795
+ /**
1796
+ * @param {Object} config - Planner configuration
1797
+ * @param {Object} config.kg - Knowledge graph instance (required for schema)
1798
+ * @param {string} config.model - LLM model name (e.g., 'claude-sonnet-4', 'gpt-4o')
1799
+ * @param {string} config.apiKey - API key for LLM provider
1800
+ * @param {Object} config.tools - Tool registry (defaults to TOOL_REGISTRY)
1801
+ */
1802
+ constructor(config = {}) {
1803
+ this.kg = config.kg || null
1804
+ this.model = config.model || null
1805
+ this.apiKey = config.apiKey || null
1806
+ this.tools = config.tools || TOOL_REGISTRY
1807
+
1808
+ // Bring Your Own Ontology (BYOO) support
1809
+ // For enterprise orgs with dedicated ontology teams
1810
+ this._ontologyTtl = config.ontology || null
1811
+ this._ontologyHash = this._ontologyTtl ? this._computeOntologyHash(config.ontology) : null
1812
+
1813
+ // Schema cache (simple schema for backward compat)
1814
+ this._schemaCache = null
1815
+ this._schemaCacheExpiry = 0
1816
+
1817
+ // Context Theory: Type-theoretic schema context (Γ)
1818
+ // NOTE: Uses global SCHEMA_CACHE for cross-agent sharing
1819
+ this._schemaContext = null
1820
+ this._contextCacheExpiry = 0
1821
+
1822
+ // KG identifier for cache key
1823
+ this._kgBaseUri = config.kgBaseUri || (config.kg?.baseUri) || 'default-kg'
1824
+
1825
+ // Intent patterns (deterministic - not LLM dependent)
1826
+ this.intentPatterns = {
1827
+ query: ['find', 'search', 'list', 'show', 'get', 'select'],
1828
+ infer: ['infer', 'deduce', 'derive', 'reason', 'conclude'],
1829
+ similar: ['similar', 'like', 'related', 'nearest', 'closest'],
1830
+ pattern: ['pattern', 'motif', 'circular', 'cycle', 'ring', 'fraud', 'suspicious'],
1831
+ rank: ['rank', 'important', 'pagerank', 'score', 'risk'],
1832
+ compliance: ['compliance', 'check', 'validate', 'verify'],
1833
+ aggregate: ['count', 'total', 'how many', 'sum', 'average']
1834
+ }
1835
+
1836
+ // Query template registry (deterministic - schema-based)
1837
+ this._queryTemplates = new Map()
1838
+ }
1839
+
1840
+ /**
1841
+ * Compute hash of ontology TTL for cache key
1842
+ */
1843
+ _computeOntologyHash(ttl) {
1844
+ if (!ttl) return null
1845
+ let hash = 0
1846
+ for (let i = 0; i < Math.min(ttl.length, 1000); i++) {
1847
+ hash = ((hash << 5) - hash) + ttl.charCodeAt(i)
1848
+ hash = hash & hash
1849
+ }
1850
+ return 'onto_' + Math.abs(hash).toString(16)
1851
+ }
1852
+
1853
+ /**
1854
+ * Build type-theoretic schema context (Γ) from KG or imported ontology
1855
+ *
1856
+ * Uses global SCHEMA_CACHE for cross-agent sharing:
1857
+ * - Same KG/ontology → same cached schema
1858
+ * - Multiple agents share schema (efficiency)
1859
+ * - TTL-based expiry (freshness)
1860
+ *
1861
+ * Schema Sources (in priority order):
1862
+ * 1. Imported ontology (BYOO) - for enterprise ontology teams
1863
+ * 2. KG-derived schema - extract from instance data
1864
+ * 3. Merged (ontology + KG extensions) - hybrid approach
1865
+ *
1866
+ * @param {boolean} forceRefresh - Force schema refresh
1867
+ * @returns {Promise<SchemaContext>}
1868
+ */
1869
+ async buildSchemaContext(forceRefresh = false) {
1870
+ // Try global cache first (cross-agent sharing)
1871
+ if (!forceRefresh) {
1872
+ const cached = SCHEMA_CACHE.get(this._kgBaseUri, this._ontologyHash)
1873
+ if (cached) {
1874
+ this._schemaContext = cached
1875
+ return cached
1876
+ }
1877
+ }
1878
+
1879
+ // Build schema from appropriate source
1880
+ let schemaContext
1881
+
1882
+ if (this._ontologyTtl) {
1883
+ // BYOO: Use imported ontology
1884
+ const ontologySchema = SchemaContext.fromOntology(this.kg, this._ontologyTtl, {
1885
+ source: 'ontology',
1886
+ graphUri: 'http://hypermind.ai/ontology/'
1887
+ })
1888
+
1889
+ // Optionally merge with KG-derived extensions
1890
+ if (this.kg) {
1891
+ const kgSchema = await SchemaContext.fromKG(this.kg, { useExplicitSchemaOnly: false })
1892
+ schemaContext = SchemaContext.merge(ontologySchema, kgSchema)
1893
+ } else {
1894
+ schemaContext = ontologySchema
1895
+ }
1896
+ } else if (this.kg) {
1897
+ // KG-derived schema only
1898
+ schemaContext = await SchemaContext.fromKG(this.kg)
1899
+ } else {
1900
+ // Empty schema
1901
+ schemaContext = new SchemaContext()
1902
+ }
1903
+
1904
+ // Store in global cache for cross-agent sharing
1905
+ SCHEMA_CACHE.set(this._kgBaseUri, schemaContext, this._ontologyHash)
1906
+
1907
+ // Also store local reference
1908
+ this._schemaContext = schemaContext
1909
+ this._contextCacheExpiry = Date.now() + CONFIG.schema.cacheExpiryMs
1910
+
1911
+ return schemaContext
1912
+ }
1913
+
1914
+ /**
1915
+ * Get schema cache statistics (for monitoring/debugging)
1916
+ */
1917
+ getSchemaCacheStats() {
1918
+ return SCHEMA_CACHE.getStats()
1919
+ }
1920
+
1921
+ /**
1922
+ * Invalidate schema cache (call when schema changes)
1923
+ */
1924
+ invalidateSchemaCache() {
1925
+ SCHEMA_CACHE.invalidate(this._kgBaseUri, this._ontologyHash)
1926
+ this._schemaContext = null
1927
+ this._contextCacheExpiry = 0
1928
+ }
1929
+
1930
+ /**
1931
+ * Validate SPARQL query using type-theoretic derivation rules
1932
+ * Returns validation result with proof tree
1933
+ */
1934
+ validateQuery(sparql, schemaContext) {
1935
+ const validator = new QueryValidator(schemaContext || this._schemaContext || new SchemaContext())
1936
+
1937
+ // Parse SPARQL and extract triple patterns (simplified)
1938
+ const triplePatterns = this._extractTriplePatterns(sparql)
1939
+
1940
+ for (const { s, p, o } of triplePatterns) {
1941
+ validator.validateTriplePattern(s, p, o)
1942
+ }
1943
+
1944
+ return validator.getResult()
1945
+ }
1946
+
1947
+ /**
1948
+ * Extract triple patterns from SPARQL query (simplified parser)
1949
+ */
1950
+ _extractTriplePatterns(sparql) {
1951
+ const patterns = []
1952
+ // Match triple patterns: ?var <uri> ?var or ?var prefix:local ?var
1953
+ const tripleRegex = /([?]\w+|<[^>]+>)\s+([?]\w+|<[^>]+>|[\w]+:[\w]+)\s+([?]\w+|<[^>]+>|"[^"]*")/g
1954
+ let match
1955
+ while ((match = tripleRegex.exec(sparql)) !== null) {
1956
+ patterns.push({ s: match[1], p: match[2], o: match[3] })
1957
+ }
1958
+ return patterns
1959
+ }
1960
+
1961
+ /**
1962
+ * Generate deterministic query hash for caching
1963
+ * Same schema + same intent = same hash
1964
+ */
1965
+ _computeQueryHash(intent, schemaContext) {
1966
+ const intentKey = Object.entries(intent).filter(([_, v]) => v).map(([k]) => k).sort().join(':')
1967
+ const schemaKey = schemaContext?.toCanonical?.()?.signature || 'no-schema'
1968
+ const content = `${intentKey}|${schemaKey}`
1969
+
1970
+ let hash = 0
1971
+ for (let i = 0; i < content.length; i++) {
1972
+ hash = ((hash << 5) - hash) + content.charCodeAt(i)
1973
+ hash = hash & hash
1974
+ }
1975
+ return 'qhash_' + Math.abs(hash).toString(16)
1976
+ }
1977
+
1978
+ /**
1979
+ * Extract schema from knowledge graph
1980
+ * @returns {Object} Schema with predicates, classes, examples
1981
+ */
1982
+ async extractSchema(forceRefresh = false) {
1983
+ if (!this.kg) return { predicates: [], classes: [], examples: [] }
1984
+
1985
+ const now = Date.now()
1986
+ if (!forceRefresh && this._schemaCache && now < this._schemaCacheExpiry) {
1987
+ return this._schemaCache
1988
+ }
1989
+
1990
+ const schema = { predicates: [], classes: [], examples: [], timestamp: new Date().toISOString() }
1991
+
1992
+ try {
1993
+ // Get unique predicates
1994
+ const predResults = this.kg.querySelect('SELECT DISTINCT ?p WHERE { ?s ?p ?o } LIMIT 200')
1995
+ schema.predicates = predResults.map(r => r.bindings?.p || r.p).filter(Boolean)
1996
+
1997
+ // Get RDF types
1998
+ const typeResults = this.kg.querySelect(`
1999
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
2000
+ SELECT DISTINCT ?type WHERE { ?s rdf:type ?type } LIMIT 100
2001
+ `)
2002
+ schema.classes = typeResults.map(r => r.bindings?.type || r.type).filter(Boolean)
2003
+
2004
+ // Get sample triples
2005
+ const sampleResults = this.kg.querySelect('SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 30')
2006
+ schema.examples = sampleResults.map(r => ({
2007
+ s: r.bindings?.s || r.s,
2008
+ p: r.bindings?.p || r.p,
2009
+ o: r.bindings?.o || r.o
2010
+ }))
2011
+ } catch (err) {
2012
+ schema.error = err.message
2013
+ }
2014
+
2015
+ this._schemaCache = schema
2016
+ this._schemaCacheExpiry = now + 5 * 60 * 1000 // 5 minute cache
2017
+ return schema
167
2018
  }
168
2019
 
169
2020
  /**
170
2021
  * Generate execution plan from natural language
2022
+ *
2023
+ * Context Theory Integration:
2024
+ * 1. Build SchemaContext (Γ) for type-theoretic validation
2025
+ * 2. Deterministic intent classification (not LLM dependent)
2026
+ * 3. Schema-validated SPARQL generation
2027
+ * 4. ProofDAG for verifiable reasoning chain
2028
+ * 5. LLM used ONLY for summarization (not query generation)
2029
+ *
2030
+ * Guarantees:
2031
+ * - Same input + same schema = same output (deterministic)
2032
+ * - All queries validated against schema context
2033
+ * - Full proof chain for every answer
2034
+ *
171
2035
  * @param {string} prompt - Natural language query
172
- * @param {Object} context - Optional context for planning
173
- * @returns {Promise<Object>} - Execution plan with typed steps
2036
+ * @param {Object} context - Optional context (memories, schema)
2037
+ * @returns {Object} Execution plan with typed steps and proof
174
2038
  */
175
2039
  async plan(prompt, context = {}) {
176
2040
  const planId = `plan-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
177
2041
 
178
- // Analyze prompt to determine intent and required tools
2042
+ // STEP 1: Build type-theoretic schema context (Γ)
2043
+ const schemaContext = await this.buildSchemaContext()
2044
+ const schema = context.schema || await this.extractSchema()
2045
+
2046
+ // STEP 2: Deterministic intent classification (NOT LLM dependent)
2047
+ // This ensures same input → same intent (idempotent)
179
2048
  const intent = this._analyzeIntent(prompt)
180
2049
 
181
- // Generate typed steps based on intent
182
- const steps = this._generateSteps(intent, context)
2050
+ // STEP 3: Compute deterministic query hash
2051
+ // Same schema + same intent = same hash (for caching/reproducibility)
2052
+ const queryHash = this._computeQueryHash(intent, schemaContext)
2053
+
2054
+ // STEP 4: Generate steps using schema context
2055
+ const steps = this._generateSteps(intent, { ...context, schema, schemaContext })
2056
+
2057
+ // STEP 5: Extract and validate SPARQL queries
2058
+ const sparqlSteps = steps.filter(s => s.tool === 'kg.sparql.query')
2059
+ let validation = { valid: true, errors: [], warnings: [], derivations: [] }
2060
+
2061
+ if (sparqlSteps.length > 0 && sparqlSteps[0].args?.sparql) {
2062
+ validation = this.validateQuery(sparqlSteps[0].args.sparql, schemaContext)
2063
+ }
2064
+
2065
+ // STEP 6: Create ProofDAG for reasoning chain
2066
+ const proof = new ProofDAG(`Answer to: "${prompt.slice(0, 100)}"`)
2067
+ const planNode = proof.addEvidence(
2068
+ proof.rootId,
2069
+ `Plan generated with ${steps.length} steps`,
2070
+ { type: 'plan', stepCount: steps.length, intent },
2071
+ 'PLAN_GEN'
2072
+ )
2073
+
2074
+ // Add schema evidence
2075
+ proof.addEvidence(
2076
+ planNode,
2077
+ `Schema context: ${schemaContext.properties.size} properties, ${schemaContext.classes.size} classes`,
2078
+ { type: 'schema', signature: schemaContext._signatureHash },
2079
+ 'SCHEMA_EXTRACT'
2080
+ )
2081
+
2082
+ // Add validation evidence
2083
+ if (sparqlSteps.length > 0) {
2084
+ proof.addEvidence(
2085
+ planNode,
2086
+ validation.valid ? 'Query validated against schema' : `Validation errors: ${validation.errors.length}`,
2087
+ { type: 'validation', valid: validation.valid, errors: validation.errors },
2088
+ 'QUERY_VALIDATE'
2089
+ )
2090
+ }
183
2091
 
184
- // Build type chain for composition validation
185
- const typeChain = this._buildTypeChain(steps)
2092
+ // STEP 7: Optional LLM for summarization (NOT for query generation)
2093
+ let llmSummary = null
2094
+ if (this.model && this.apiKey && context.useLLMSummary) {
2095
+ llmSummary = await this._summarizeWithLLM(prompt, steps, validation)
2096
+ }
186
2097
 
187
2098
  return {
188
2099
  id: planId,
189
2100
  prompt,
190
2101
  intent,
191
2102
  steps,
192
- type_chain: typeChain,
193
- confidence: this._calculateConfidence(steps, intent),
194
- explanation: this._generateExplanation(steps, intent)
2103
+
2104
+ // Context Theory outputs
2105
+ schemaContext: schemaContext.toCanonical(),
2106
+ queryHash,
2107
+ validation,
2108
+
2109
+ // Proof chain
2110
+ proof: proof.serialize(),
2111
+ proofHash: proof.computeHash(),
2112
+
2113
+ // Metadata
2114
+ schema_used: !!schema.predicates.length,
2115
+ llm_used: !!llmSummary,
2116
+ type_chain: this._buildTypeChain(steps),
2117
+ confidence: validation.valid ? 0.95 : 0.6,
2118
+ explanation: llmSummary || this._generateExplanation(steps, intent)
195
2119
  }
196
2120
  }
197
2121
 
198
- _analyzeIntent(prompt) {
199
- const lowerPrompt = prompt.toLowerCase()
2122
+ /**
2123
+ * LLM used ONLY for summarization, not for query generation
2124
+ * This ensures deterministic queries while allowing natural language output
2125
+ */
2126
+ async _summarizeWithLLM(prompt, steps, validation) {
2127
+ if (!this.model || !this.apiKey) return null
200
2128
 
201
- // Intent detection based on keywords
202
- const intents = {
203
- query: ['find', 'search', 'list', 'show', 'get', 'select'],
204
- infer: ['infer', 'deduce', 'derive', 'reason', 'conclude'],
205
- similar: ['similar', 'like', 'related', 'nearest', 'closest'],
206
- pattern: ['pattern', 'motif', 'circular', 'cycle', 'ring'],
207
- rank: ['rank', 'important', 'pagerank', 'score'],
208
- compliance: ['compliance', 'check', 'validate', 'verify']
2129
+ const systemPrompt = `You are a summarizer. Given a query plan, produce a one-sentence summary.
2130
+ Do NOT generate queries. Only summarize what the plan will do.`
2131
+
2132
+ const userPrompt = `Plan for "${prompt}":
2133
+ Steps: ${steps.map(s => s.tool).join(' ')}
2134
+ Validation: ${validation.valid ? 'PASSED' : 'FAILED'}
2135
+
2136
+ Summarize in one sentence.`
2137
+
2138
+ try {
2139
+ return await this._callLLM(systemPrompt, userPrompt)
2140
+ } catch (err) {
2141
+ return null
2142
+ }
2143
+ }
2144
+
2145
+ async _planWithLLM(prompt, schema, memories) {
2146
+ if (!this.model || !this.apiKey) return null
2147
+
2148
+ const systemPrompt = this._buildSystemPrompt(schema, memories)
2149
+ const userPrompt = `User query: "${prompt}"\n\nGenerate intent classification and SPARQL query.`
2150
+
2151
+ try {
2152
+ const response = await this._callLLM(systemPrompt, userPrompt)
2153
+ return this._parseLLMResponse(response)
2154
+ } catch (err) {
2155
+ // LLM call failed - fall back to pattern matching
2156
+ return null
2157
+ }
2158
+ }
2159
+
2160
+ _buildSystemPrompt(schema, memories) {
2161
+ let schemaText = '## Knowledge Graph Schema\n\n'
2162
+
2163
+ if (schema.classes.length > 0) {
2164
+ schemaText += '### Classes:\n' + schema.classes.slice(0, 15).map(c => `- ${c}`).join('\n') + '\n\n'
2165
+ }
2166
+ if (schema.predicates.length > 0) {
2167
+ schemaText += '### Predicates:\n' + schema.predicates.slice(0, 25).map(p => `- ${p}`).join('\n') + '\n\n'
2168
+ }
2169
+ if (schema.examples.length > 0) {
2170
+ schemaText += '### Sample Triples:\n' + schema.examples.slice(0, 8).map(t => `- <${t.s}> <${t.p}> ${t.o}`).join('\n') + '\n'
2171
+ }
2172
+
2173
+ let memoryText = ''
2174
+ if (memories.length > 0) {
2175
+ memoryText = '\n## Recent Episodes:\n' + memories.slice(0, 5).map((m, i) =>
2176
+ `${i + 1}. "${m.episode?.prompt || m.prompt}" (${m.episode?.success ?? m.success ? 'success' : 'failed'})`
2177
+ ).join('\n')
2178
+ }
2179
+
2180
+ return `You are a knowledge graph query planner.
2181
+
2182
+ ${schemaText}
2183
+ ${memoryText}
2184
+
2185
+ RULES:
2186
+ - ONLY use predicates from the schema above
2187
+ - NEVER invent predicate names
2188
+ - If schema doesn't match user's request, set intent to "schema_mismatch"
2189
+ - Use proper SPARQL syntax
2190
+
2191
+ Respond in JSON:
2192
+ {
2193
+ "intent": "<type>",
2194
+ "sparql": "<query or null>",
2195
+ "confidence": <0.0-1.0>,
2196
+ "reasoning": "<explanation>"
2197
+ }
2198
+
2199
+ Intent types: detect_fraud, find_similar, explain, find_patterns, aggregate, general_query, schema_mismatch`
2200
+ }
2201
+
2202
+ async _callLLM(systemPrompt, userPrompt) {
2203
+ const model = this.model.toLowerCase()
2204
+ const isAnthropic = model.includes('claude') || model.includes('anthropic')
2205
+
2206
+ const endpoint = isAnthropic
2207
+ ? 'https://api.anthropic.com/v1/messages'
2208
+ : 'https://api.openai.com/v1/chat/completions'
2209
+
2210
+ const headers = isAnthropic
2211
+ ? { 'Content-Type': 'application/json', 'x-api-key': this.apiKey, 'anthropic-version': '2023-06-01' }
2212
+ : { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }
2213
+
2214
+ const body = isAnthropic
2215
+ ? { model: this.model, max_tokens: 1024, system: systemPrompt, messages: [{ role: 'user', content: userPrompt }] }
2216
+ : { model: this.model, messages: [{ role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt }], temperature: 0.1 }
2217
+
2218
+ const response = await fetch(endpoint, { method: 'POST', headers, body: JSON.stringify(body) })
2219
+ if (!response.ok) throw new Error(`API error: ${response.status}`)
2220
+
2221
+ const data = await response.json()
2222
+ return isAnthropic ? data.content[0].text : data.choices[0].message.content
2223
+ }
2224
+
2225
+ _parseLLMResponse(response) {
2226
+ try {
2227
+ let jsonStr = response
2228
+ const match = response.match(/```json\s*([\s\S]*?)\s*```/) || response.match(/\{[\s\S]*\}/)
2229
+ if (match) jsonStr = match[1] || match[0]
2230
+
2231
+ const parsed = JSON.parse(jsonStr)
2232
+ return {
2233
+ type: parsed.intent || 'general_query',
2234
+ sparql: parsed.sparql,
2235
+ confidence: parsed.confidence || 0.8,
2236
+ reasoning: parsed.reasoning,
2237
+ tools: this._getToolsForIntent(parsed.intent)
2238
+ }
2239
+ } catch (err) {
2240
+ return null
2241
+ }
2242
+ }
2243
+
2244
+ _getToolsForIntent(intent) {
2245
+ const toolMap = {
2246
+ 'detect_fraud': ['kg.sparql.query', 'kg.datalog.apply'],
2247
+ 'find_similar': ['kg.embeddings.search'],
2248
+ 'explain': ['kg.datalog.apply'],
2249
+ 'find_patterns': ['kg.motif.find'],
2250
+ 'aggregate': ['kg.sparql.query'],
2251
+ 'general_query': ['kg.sparql.query'],
2252
+ 'schema_mismatch': []
2253
+ }
2254
+ return toolMap[intent] || ['kg.sparql.query']
2255
+ }
2256
+
2257
+ _generateStepsFromLLM(llmResult, sparql) {
2258
+ const steps = []
2259
+ let stepId = 1
2260
+
2261
+ if (sparql) {
2262
+ steps.push({
2263
+ id: stepId++,
2264
+ tool: 'kg.sparql.query',
2265
+ input_type: 'Query',
2266
+ output_type: 'BindingSet',
2267
+ args: { sparql }
2268
+ })
209
2269
  }
210
2270
 
2271
+ // Add additional tools based on intent
2272
+ const additionalTools = llmResult.tools.filter(t => t !== 'kg.sparql.query')
2273
+ additionalTools.forEach(tool => {
2274
+ steps.push({
2275
+ id: stepId++,
2276
+ tool,
2277
+ input_type: this.tools[tool]?.input || 'Any',
2278
+ output_type: this.tools[tool]?.output || 'Any',
2279
+ args: {}
2280
+ })
2281
+ })
2282
+
2283
+ return steps
2284
+ }
2285
+
2286
+ _analyzeIntent(prompt) {
2287
+ const lowerPrompt = prompt.toLowerCase()
211
2288
  const detected = {}
212
- for (const [intentType, keywords] of Object.entries(intents)) {
2289
+
2290
+ for (const [intentType, keywords] of Object.entries(this.intentPatterns)) {
213
2291
  detected[intentType] = keywords.some(k => lowerPrompt.includes(k))
214
2292
  }
215
2293
 
@@ -219,19 +2297,20 @@ class LLMPlanner {
219
2297
  _generateSteps(intent, context) {
220
2298
  const steps = []
221
2299
  let stepId = 1
2300
+ const schema = context.schema || { predicates: [], classes: [] }
222
2301
 
223
- // Add SPARQL query step if query intent detected
224
- if (intent.query || intent.compliance) {
2302
+ // Generate SPARQL based on intent and schema
2303
+ if (intent.query || intent.compliance || intent.aggregate) {
2304
+ const sparql = this._generateSchemaSparql(intent, schema, context)
225
2305
  steps.push({
226
2306
  id: stepId++,
227
2307
  tool: 'kg.sparql.query',
228
2308
  input_type: 'Query',
229
2309
  output_type: 'BindingSet',
230
- args: { sparql: context.sparql || 'SELECT * WHERE { ?s ?p ?o } LIMIT 100' }
2310
+ args: { sparql }
231
2311
  })
232
2312
  }
233
2313
 
234
- // Add pattern finding if pattern intent detected
235
2314
  if (intent.pattern) {
236
2315
  steps.push({
237
2316
  id: stepId++,
@@ -242,7 +2321,6 @@ class LLMPlanner {
242
2321
  })
243
2322
  }
244
2323
 
245
- // Add inference if infer intent detected
246
2324
  if (intent.infer) {
247
2325
  steps.push({
248
2326
  id: stepId++,
@@ -253,7 +2331,6 @@ class LLMPlanner {
253
2331
  })
254
2332
  }
255
2333
 
256
- // Add similarity search if similar intent detected
257
2334
  if (intent.similar) {
258
2335
  steps.push({
259
2336
  id: stepId++,
@@ -264,7 +2341,6 @@ class LLMPlanner {
264
2341
  })
265
2342
  }
266
2343
 
267
- // Add ranking if rank intent detected
268
2344
  if (intent.rank) {
269
2345
  steps.push({
270
2346
  id: stepId++,
@@ -275,34 +2351,58 @@ class LLMPlanner {
275
2351
  })
276
2352
  }
277
2353
 
278
- // Default to SPARQL query if no specific intent detected
2354
+ // Default query if no steps
279
2355
  if (steps.length === 0) {
280
2356
  steps.push({
281
2357
  id: stepId++,
282
2358
  tool: 'kg.sparql.query',
283
2359
  input_type: 'Query',
284
2360
  output_type: 'BindingSet',
285
- args: { sparql: 'SELECT * WHERE { ?s ?p ?o } LIMIT 100' }
2361
+ args: { sparql: 'SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 100' }
286
2362
  })
287
2363
  }
288
2364
 
289
2365
  return steps
290
2366
  }
291
2367
 
2368
+ _generateSchemaSparql(intent, schema, context) {
2369
+ // Use schema-aware SPARQL generation
2370
+ if (context.sparql) return context.sparql
2371
+
2372
+ // Check if schema has relevant predicates
2373
+ const predicates = schema.predicates || []
2374
+
2375
+ if (intent.aggregate) {
2376
+ return 'SELECT (COUNT(*) as ?count) WHERE { ?s ?p ?o }'
2377
+ }
2378
+
2379
+ // Try to match predicates based on intent
2380
+ const riskPreds = predicates.filter(p => p.toLowerCase().includes('risk') || p.toLowerCase().includes('score'))
2381
+ const typePreds = predicates.filter(p => p.includes('type') || p.includes('Type'))
2382
+
2383
+ if (intent.pattern || intent.rank) {
2384
+ if (riskPreds.length > 0) {
2385
+ return `SELECT ?s ?score WHERE { ?s <${riskPreds[0]}> ?score } ORDER BY DESC(?score) LIMIT 100`
2386
+ }
2387
+ }
2388
+
2389
+ // Default: return all triples
2390
+ return 'SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 100'
2391
+ }
2392
+
292
2393
  _buildTypeChain(steps) {
293
2394
  return steps.map(s => `${s.input_type} → ${s.output_type}`).join(' ; ')
294
2395
  }
295
2396
 
296
2397
  _calculateConfidence(steps, intent) {
297
- // Higher confidence if intent matches tool selection
298
2398
  const matchedIntents = Object.values(intent).filter(v => v).length
299
2399
  return Math.min(0.95, 0.7 + (matchedIntents * 0.05))
300
2400
  }
301
2401
 
302
2402
  _generateExplanation(steps, intent) {
303
2403
  const toolNames = steps.map(s => s.tool).join(', ')
304
- return `Plan uses ${steps.length} tool(s): ${toolNames}. ` +
305
- `Detected intents: ${Object.entries(intent).filter(([_, v]) => v).map(([k]) => k).join(', ') || 'general query'}.`
2404
+ const detectedIntents = Object.entries(intent).filter(([_, v]) => v).map(([k]) => k).join(', ')
2405
+ return `Plan uses ${steps.length} tool(s): ${toolNames}. Detected intents: ${detectedIntents || 'general query'}.`
306
2406
  }
307
2407
  }
308
2408
 
@@ -658,7 +2758,15 @@ class MemoryManager {
658
2758
 
659
2759
  async _getEmbedding(text) {
660
2760
  if (!this.embeddingService) return null
661
- return this.embeddingService.embed(text)
2761
+ // EmbeddingService doesn't have embed() - it's for vector storage/search
2762
+ // For text embedding, we generate a simple deterministic hash-based embedding
2763
+ // In production, integrate with OpenAI/Anthropic embedding APIs
2764
+ const hash = text.split('').reduce((acc, char) => ((acc << 5) - acc) + char.charCodeAt(0), 0)
2765
+ const embedding = new Float32Array(384)
2766
+ for (let i = 0; i < 384; i++) {
2767
+ embedding[i] = Math.sin(hash * (i + 1) * 0.01) * 0.5
2768
+ }
2769
+ return Array.from(embedding)
662
2770
  }
663
2771
 
664
2772
  _episodeToTurtle(episode) {
@@ -796,14 +2904,31 @@ class HyperMindAgent {
796
2904
  this.memory = config.memory || new MemoryManager(config.kg, config.embeddings)
797
2905
  this.embeddings = config.embeddings || null
798
2906
  this.apiKey = config.apiKey || null
2907
+ this.model = config.model || null
799
2908
  this.rules = config.rules || new DatalogRuleSet()
800
2909
  this.sandbox = new WasmSandbox(config.sandbox || {})
801
2910
  this.name = config.name || 'hypermind-agent'
802
2911
 
803
- // Intent patterns for natural language -> tool mapping
2912
+ // LLMPlanner for schema-aware planning (delegates all LLM/schema logic)
2913
+ this.planner = new LLMPlanner({
2914
+ kg: config.kg,
2915
+ model: config.model,
2916
+ apiKey: config.apiKey,
2917
+ tools: TOOL_REGISTRY
2918
+ })
2919
+
2920
+ // Intent patterns for fallback mode
804
2921
  this.intentPatterns = this._buildIntentPatterns()
805
2922
  }
806
2923
 
2924
+ /**
2925
+ * Extract schema from KG (delegates to planner)
2926
+ * @returns {Object} Schema with predicates, classes, examples
2927
+ */
2928
+ async extractSchema(forceRefresh = false) {
2929
+ return this.planner.extractSchema(forceRefresh)
2930
+ }
2931
+
807
2932
  /**
808
2933
  * Execute a natural language request
809
2934
  * Returns answer + full explainable AI output
@@ -1879,6 +4004,26 @@ module.exports = {
1879
4004
  LLMPlanner,
1880
4005
  TOOL_REGISTRY,
1881
4006
 
4007
+ // Context Theory (v0.6.11+) - Type-theoretic foundations for SPARQL validation
4008
+ // Based on: Spivak's Ologs, Functorial Data Migration, TypeQL
4009
+ SchemaContext, // Γ context with classes, properties, bindings
4010
+ TypeJudgment, // Γ ⊢ e : τ formal type judgment
4011
+ QueryValidator, // Validates SPARQL using derivation rules
4012
+ ProofDAG, // Curry-Howard proof of reasoning chain
4013
+
4014
+ // Schema Caching (v0.6.12+) - Cross-agent schema sharing
4015
+ SchemaCache, // Cache class for schema storage
4016
+ SCHEMA_CACHE, // Global singleton instance (shared across all agents)
4017
+
4018
+ // Schema-Aware GraphDB (v0.6.13+) - Auto schema extraction on load
4019
+ // Schema is extracted ONCE after data load (not on every access)
4020
+ SchemaAwareGraphDB, // Wrapper with auto schema extraction
4021
+ createSchemaAwareGraphDB, // Factory function
4022
+ wrapWithSchemaAwareness, // Wrap existing GraphDb
4023
+
4024
+ // Configuration (v0.6.11+) - Centralized tunable parameters
4025
+ CONFIG, // All CONFIG values (no hardcoding)
4026
+
1882
4027
  // Supporting Classes
1883
4028
  MemoryManager,
1884
4029
  DatalogRuleSet,