@soulcraft/cortex 1.3.1 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,515 @@
1
+ /**
2
+ * NativeHNSWWrapper — Thin TS wrapper around the Rust HNSW graph engine
3
+ *
4
+ * Bridges brainy's HNSWIndex API to the native Rust NativeHNSWIndex.
5
+ * The Rust engine handles all graph operations (insert, search, remove) in-memory
6
+ * while this wrapper handles async storage I/O for persistence.
7
+ *
8
+ * Key design:
9
+ * - Uses addItemFull() for single-FFI-call inserts (returns node + neighbors + system)
10
+ * - Persistence modes: 'immediate' (safe) or 'deferred' (30-50x faster for cloud)
11
+ * - COW via native fork() (Arc-based copy-on-write in Rust)
12
+ * - rebuild() restores pre-computed graph from storage (O(N) vs O(N log N) rebuild)
13
+ */
14
+ import { loadNativeModule } from '../native/index.js';
15
+ import { getGlobalCache, prodLog } from '@soulcraft/brainy/internals';
16
+ const DEFAULT_CONFIG = {
17
+ M: 16,
18
+ efConstruction: 200,
19
+ efSearch: 50,
20
+ ml: 16,
21
+ };
22
+ export class NativeHNSWWrapper {
23
+ native;
24
+ config;
25
+ distanceFunction;
26
+ storage;
27
+ persistMode;
28
+ dirtyNodes = new Set();
29
+ dirtySystem = false;
30
+ useParallelization = true;
31
+ unifiedCache;
32
+ // COW support
33
+ cowEnabled = false;
34
+ constructor(config = {}, distanceFunction, options = {}) {
35
+ this.config = { ...DEFAULT_CONFIG, ...config };
36
+ this.distanceFunction = distanceFunction;
37
+ this.storage = options.storage || null;
38
+ this.persistMode = options.persistMode || 'immediate';
39
+ this.unifiedCache = getGlobalCache();
40
+ const bindings = loadNativeModule();
41
+ const nativeConfig = {
42
+ m: this.config.M,
43
+ efConstruction: this.config.efConstruction,
44
+ efSearch: this.config.efSearch,
45
+ ml: this.config.ml,
46
+ };
47
+ this.native = new bindings.NativeHNSWIndex(nativeConfig);
48
+ prodLog.info(`NativeHNSWWrapper initialized (M=${this.config.M}, ef=${this.config.efSearch}, ` +
49
+ `persist=${this.persistMode})`);
50
+ }
51
+ // ---------------------------------------------------------------------------
52
+ // Core CRUD
53
+ // ---------------------------------------------------------------------------
54
+ async addItem(item) {
55
+ if (!item)
56
+ throw new Error('Item is undefined or null');
57
+ if (!item.vector)
58
+ throw new Error('Vector is undefined or null');
59
+ const { id, vector } = item;
60
+ // Use addItemFull — single FFI call returns all persistence data
61
+ const result = this.native.addItemFull(id, vector);
62
+ if (this.persistMode === 'immediate' && this.storage) {
63
+ // Persist new node
64
+ await this.storage.saveHNSWData(result.id, result.nodeData).catch((e) => {
65
+ console.error(`[HNSW native] Failed to persist node ${result.id}:`, e);
66
+ });
67
+ // Persist modified neighbors
68
+ const neighborPromises = result.modifiedNeighbors.map(neighbor => this.storage.saveHNSWData(neighbor.id, {
69
+ level: neighbor.level,
70
+ connections: neighbor.connections,
71
+ }).catch((e) => {
72
+ console.error(`[HNSW native] Failed to persist neighbor ${neighbor.id}:`, e);
73
+ }));
74
+ await Promise.allSettled(neighborPromises);
75
+ // Persist system data
76
+ await this.storage.saveHNSWSystem(result.systemData).catch((e) => {
77
+ console.error('[HNSW native] Failed to persist system data:', e);
78
+ });
79
+ }
80
+ else if (this.persistMode === 'deferred') {
81
+ this.dirtyNodes.add(result.id);
82
+ for (const neighbor of result.modifiedNeighbors) {
83
+ this.dirtyNodes.add(neighbor.id);
84
+ }
85
+ this.dirtySystem = true;
86
+ }
87
+ return result.id;
88
+ }
89
+ async removeItem(id) {
90
+ if (!this.native.hasNode(id))
91
+ return false;
92
+ // Get neighbors before removal (they'll be modified)
93
+ const nodeData = this.native.getNodeData(id);
94
+ const neighborIds = new Set();
95
+ if (nodeData) {
96
+ for (const ids of Object.values(nodeData.connections)) {
97
+ for (const nid of ids) {
98
+ neighborIds.add(nid);
99
+ }
100
+ }
101
+ }
102
+ const removed = this.native.removeItem(id);
103
+ if (!removed)
104
+ return false;
105
+ if (this.persistMode === 'immediate' && this.storage) {
106
+ // Persist updated neighbors (their connections changed)
107
+ const promises = Array.from(neighborIds).map(async (nid) => {
108
+ const nData = this.native.getNodeData(nid);
109
+ if (nData) {
110
+ await this.storage.saveHNSWData(nid, nData).catch((e) => {
111
+ console.error(`[HNSW native] Failed to persist neighbor ${nid} after removal:`, e);
112
+ });
113
+ }
114
+ });
115
+ await Promise.allSettled(promises);
116
+ // Persist system data (entry point may have changed)
117
+ const sysData = this.native.getSystemData();
118
+ await this.storage.saveHNSWSystem(sysData).catch((e) => {
119
+ console.error('[HNSW native] Failed to persist system data after removal:', e);
120
+ });
121
+ }
122
+ else if (this.persistMode === 'deferred') {
123
+ for (const nid of neighborIds) {
124
+ this.dirtyNodes.add(nid);
125
+ }
126
+ // Remove the deleted node from dirty set (no need to persist it)
127
+ this.dirtyNodes.delete(id);
128
+ this.dirtySystem = true;
129
+ }
130
+ return true;
131
+ }
132
+ async search(queryVector, k = 10, filter, options) {
133
+ if (this.native.size() === 0)
134
+ return [];
135
+ if (!queryVector)
136
+ throw new Error('Query vector is undefined or null');
137
+ let results;
138
+ if (options?.candidateIds && options.candidateIds.length > 0) {
139
+ // Pre-filtered search: use native searchWithCandidates for optimal performance
140
+ // The Rust engine only traverses candidate nodes, avoiding unnecessary distance calculations
141
+ results = this.native.searchWithCandidates(queryVector, k, this.config.efSearch, options.candidateIds);
142
+ }
143
+ else if (filter) {
144
+ // Fallback: search with larger k and post-filter asynchronously
145
+ const overRetrieve = k * 5;
146
+ const rawResults = this.native.search(queryVector, overRetrieve, this.config.efSearch);
147
+ const filtered = [];
148
+ for (const r of rawResults) {
149
+ if (await filter(r.id)) {
150
+ filtered.push(r);
151
+ if (filtered.length >= k)
152
+ break;
153
+ }
154
+ }
155
+ results = filtered;
156
+ }
157
+ else {
158
+ results = this.native.search(queryVector, k, this.config.efSearch);
159
+ }
160
+ // Convert NativeSearchResult[] to [string, number][] tuples
161
+ return results.map(r => [r.id, r.distance]);
162
+ }
163
+ // ---------------------------------------------------------------------------
164
+ // Persistence
165
+ // ---------------------------------------------------------------------------
166
+ async flush() {
167
+ if (!this.storage)
168
+ return 0;
169
+ if (this.dirtyNodes.size === 0 && !this.dirtySystem)
170
+ return 0;
171
+ const startTime = Date.now();
172
+ const nodeCount = this.dirtyNodes.size;
173
+ // Batch persist dirty nodes
174
+ if (this.dirtyNodes.size > 0) {
175
+ const batchSize = 50;
176
+ const nodeIds = Array.from(this.dirtyNodes);
177
+ for (let i = 0; i < nodeIds.length; i += batchSize) {
178
+ const batch = nodeIds.slice(i, i + batchSize);
179
+ const promises = batch.map(nodeId => {
180
+ const nodeData = this.native.getNodeData(nodeId);
181
+ if (!nodeData)
182
+ return Promise.resolve(); // Node was deleted
183
+ return this.storage.saveHNSWData(nodeId, nodeData).catch((e) => {
184
+ console.error(`[HNSW native flush] Failed to persist node ${nodeId}:`, e);
185
+ });
186
+ });
187
+ await Promise.allSettled(promises);
188
+ }
189
+ this.dirtyNodes.clear();
190
+ }
191
+ // Persist system data
192
+ if (this.dirtySystem) {
193
+ const sysData = this.native.getSystemData();
194
+ await this.storage.saveHNSWSystem(sysData).catch((e) => {
195
+ console.error('[HNSW native flush] Failed to persist system data:', e);
196
+ });
197
+ this.dirtySystem = false;
198
+ }
199
+ const duration = Date.now() - startTime;
200
+ if (nodeCount > 0) {
201
+ prodLog.info(`[HNSW native] Flushed ${nodeCount} dirty nodes in ${duration}ms`);
202
+ }
203
+ return nodeCount;
204
+ }
205
+ getDirtyNodeCount() {
206
+ return this.dirtyNodes.size;
207
+ }
208
+ getPersistMode() {
209
+ return this.persistMode;
210
+ }
211
+ // ---------------------------------------------------------------------------
212
+ // Rebuild from storage
213
+ // ---------------------------------------------------------------------------
214
+ async rebuild(options = {}) {
215
+ if (!this.storage) {
216
+ prodLog.warn('NativeHNSWWrapper rebuild skipped: no storage adapter configured');
217
+ return;
218
+ }
219
+ const batchSize = options.batchSize || 1000;
220
+ try {
221
+ // Step 1: Clear native index
222
+ this.native.clear();
223
+ // Step 2: Load system data
224
+ const systemData = await this.storage.getHNSWSystem();
225
+ if (systemData && systemData.entryPointId) {
226
+ this.native.setSystemData(systemData.entryPointId, systemData.maxLevel);
227
+ }
228
+ // Step 3: Load nouns from storage and restore into native engine
229
+ const storageType = this.storage?.constructor.name || '';
230
+ const isLocalStorage = storageType === 'FileSystemStorage' ||
231
+ storageType === 'MemoryStorage' ||
232
+ storageType === 'OPFSStorage' ||
233
+ storageType === 'MmapFileSystemStorage';
234
+ let loadedCount = 0;
235
+ let totalCount;
236
+ if (isLocalStorage) {
237
+ // Local: load all at once
238
+ const result = await this.storage.getNounsWithPagination({
239
+ limit: 10000000
240
+ });
241
+ totalCount = result.totalCount || result.items.length;
242
+ for (const nounData of result.items) {
243
+ try {
244
+ const hnswData = await this.storage.getHNSWData(nounData.id);
245
+ if (!hnswData)
246
+ continue;
247
+ this.native.restoreNode(nounData.id, nounData.vector, hnswData.level, hnswData.connections);
248
+ loadedCount++;
249
+ }
250
+ catch (error) {
251
+ console.error(`Failed to rebuild HNSW data for ${nounData.id}:`, error);
252
+ }
253
+ }
254
+ if (options.onProgress && totalCount !== undefined) {
255
+ options.onProgress(loadedCount, totalCount);
256
+ }
257
+ prodLog.info(`NativeHNSW: Loaded ${loadedCount.toLocaleString()} nodes at once (local storage)`);
258
+ }
259
+ else {
260
+ // Cloud: paginated loading
261
+ let hasMore = true;
262
+ let offset = 0;
263
+ while (hasMore) {
264
+ const result = await this.storage.getNounsWithPagination({
265
+ limit: batchSize,
266
+ offset,
267
+ });
268
+ if (totalCount === undefined && result.totalCount !== undefined) {
269
+ totalCount = result.totalCount;
270
+ }
271
+ for (const nounData of result.items) {
272
+ try {
273
+ const hnswData = await this.storage.getHNSWData(nounData.id);
274
+ if (!hnswData)
275
+ continue;
276
+ this.native.restoreNode(nounData.id, nounData.vector, hnswData.level, hnswData.connections);
277
+ loadedCount++;
278
+ }
279
+ catch (error) {
280
+ console.error(`Failed to rebuild HNSW data for ${nounData.id}:`, error);
281
+ }
282
+ }
283
+ if (options.onProgress && totalCount !== undefined) {
284
+ options.onProgress(loadedCount, totalCount);
285
+ }
286
+ hasMore = result.hasMore;
287
+ offset += batchSize;
288
+ }
289
+ }
290
+ // Step 4: Recover entry point if needed
291
+ if (this.native.size() > 0 && !this.native.getEntryPointId()) {
292
+ // Find highest level node from native
293
+ const allIds = this.native.getAllIds();
294
+ let bestId = null;
295
+ let bestLevel = -1;
296
+ for (const nid of allIds) {
297
+ const nd = this.native.getNodeData(nid);
298
+ if (nd && nd.level > bestLevel) {
299
+ bestLevel = nd.level;
300
+ bestId = nid;
301
+ }
302
+ }
303
+ if (bestId) {
304
+ this.native.setSystemData(bestId, bestLevel);
305
+ prodLog.info(`NativeHNSW entry point recovered: ${bestId} at level ${bestLevel}`);
306
+ }
307
+ }
308
+ prodLog.info(`NativeHNSW index rebuilt: ${loadedCount.toLocaleString()} entities, ` +
309
+ `${this.native.getMaxLevel() + 1} levels, ` +
310
+ `entry point: ${this.native.getEntryPointId() || 'none'}`);
311
+ }
312
+ catch (error) {
313
+ prodLog.error('NativeHNSW rebuild failed:', error);
314
+ throw new Error(`Failed to rebuild NativeHNSW index: ${error}`);
315
+ }
316
+ }
317
+ // ---------------------------------------------------------------------------
318
+ // COW
319
+ // ---------------------------------------------------------------------------
320
+ enableCOW(parent) {
321
+ this.cowEnabled = true;
322
+ this.native = parent.native.fork();
323
+ this.config = parent.config;
324
+ this.distanceFunction = parent.distanceFunction;
325
+ this.useParallelization = parent.useParallelization;
326
+ this.unifiedCache = parent.unifiedCache;
327
+ prodLog.info(`NativeHNSW COW enabled: ${this.native.size()} nodes (Arc-based fork)`);
328
+ }
329
+ setUseParallelization(useParallelization) {
330
+ this.useParallelization = useParallelization;
331
+ }
332
+ getUseParallelization() {
333
+ return this.useParallelization;
334
+ }
335
+ // ---------------------------------------------------------------------------
336
+ // Info / Introspection
337
+ // ---------------------------------------------------------------------------
338
+ size() {
339
+ return this.native.size();
340
+ }
341
+ clear() {
342
+ this.native.clear();
343
+ this.dirtyNodes.clear();
344
+ this.dirtySystem = false;
345
+ }
346
+ getEntryPointId() {
347
+ return this.native.getEntryPointId() || null;
348
+ }
349
+ getMaxLevel() {
350
+ return this.native.getMaxLevel();
351
+ }
352
+ getDimension() {
353
+ return this.native.getDimension() ?? null;
354
+ }
355
+ getConfig() {
356
+ return { ...this.config };
357
+ }
358
+ getDistanceFunction() {
359
+ return this.distanceFunction;
360
+ }
361
+ /**
362
+ * Get all nouns — builds HNSWNoun objects from native data.
363
+ * @deprecated Use getNounsPaginated() instead
364
+ */
365
+ getNouns() {
366
+ const result = new Map();
367
+ const allIds = this.native.getAllIds();
368
+ for (const id of allIds) {
369
+ const nodeData = this.native.getNodeData(id);
370
+ const vector = this.native.getVector(id);
371
+ if (!nodeData || !vector)
372
+ continue;
373
+ const connections = new Map();
374
+ for (const [levelStr, ids] of Object.entries(nodeData.connections)) {
375
+ connections.set(parseInt(levelStr, 10), new Set(ids));
376
+ }
377
+ result.set(id, {
378
+ id,
379
+ vector,
380
+ connections,
381
+ level: nodeData.level,
382
+ });
383
+ }
384
+ return result;
385
+ }
386
+ getNounsPaginated(options = {}) {
387
+ const offset = options.offset || 0;
388
+ const limit = options.limit || 100;
389
+ const allIds = this.native.getAllIds();
390
+ const totalCount = allIds.length;
391
+ // Build noun objects for the requested page
392
+ const items = new Map();
393
+ let added = 0;
394
+ let skipped = 0;
395
+ for (const id of allIds) {
396
+ const nodeData = this.native.getNodeData(id);
397
+ const vector = this.native.getVector(id);
398
+ if (!nodeData || !vector)
399
+ continue;
400
+ const connections = new Map();
401
+ for (const [levelStr, ids] of Object.entries(nodeData.connections)) {
402
+ connections.set(parseInt(levelStr, 10), new Set(ids));
403
+ }
404
+ const noun = { id, vector, connections, level: nodeData.level };
405
+ if (options.filter && !options.filter(noun))
406
+ continue;
407
+ if (skipped < offset) {
408
+ skipped++;
409
+ continue;
410
+ }
411
+ items.set(id, noun);
412
+ added++;
413
+ if (added >= limit)
414
+ break;
415
+ }
416
+ return {
417
+ items,
418
+ totalCount,
419
+ hasMore: offset + limit < totalCount,
420
+ };
421
+ }
422
+ getNodesAtLevel(level) {
423
+ const ids = this.native.getNodesAtLevel(level);
424
+ const nodes = [];
425
+ for (const id of ids) {
426
+ const nodeData = this.native.getNodeData(id);
427
+ const vector = this.native.getVector(id);
428
+ if (!nodeData || !vector)
429
+ continue;
430
+ const connections = new Map();
431
+ for (const [levelStr, nids] of Object.entries(nodeData.connections)) {
432
+ connections.set(parseInt(levelStr, 10), new Set(nids));
433
+ }
434
+ nodes.push({ id, vector, connections, level: nodeData.level });
435
+ }
436
+ return nodes;
437
+ }
438
+ getLevelStats() {
439
+ const nativeStats = this.native.getLevelStats();
440
+ return nativeStats.map(s => ({
441
+ level: s.level,
442
+ nodeCount: s.nodeCount,
443
+ avgConnections: s.avgConnections,
444
+ }));
445
+ }
446
+ getIndexHealth() {
447
+ const health = this.native.getIndexHealth();
448
+ return {
449
+ averageConnections: health.averageConnections,
450
+ layerDistribution: health.layerDistribution,
451
+ maxLayer: health.maxLayer,
452
+ totalNodes: health.totalNodes,
453
+ };
454
+ }
455
+ getCacheStats() {
456
+ const cacheStats = this.unifiedCache.getStats();
457
+ const entityCount = this.native.size();
458
+ const vectorDimension = this.native.getDimension() || 384;
459
+ const bytesPerVector = vectorDimension * 4;
460
+ const estimatedVectorMemoryMB = (entityCount * bytesPerVector) / (1024 * 1024);
461
+ const availableCacheMB = (cacheStats.maxSize * 0.8) / (1024 * 1024);
462
+ const vectorsInCache = cacheStats.typeCounts.hnsw || 0;
463
+ const hnswMemoryBytes = cacheStats.typeSizes.hnsw || 0;
464
+ const hnswAccessCount = cacheStats.typeAccessCounts.hnsw || 0;
465
+ const totalAccessCount = cacheStats.totalAccessCount;
466
+ const hnswAccessPercent = totalAccessCount > 0 ? (hnswAccessCount / totalAccessCount) * 100 : 0;
467
+ const hnswCachePercent = cacheStats.maxSize > 0 ? (hnswMemoryBytes / cacheStats.maxSize) * 100 : 0;
468
+ const fairnessViolation = hnswCachePercent > 90 && hnswAccessPercent < 10;
469
+ const hitRatePercent = (cacheStats.hitRate * 100) || 0;
470
+ const cachingStrategy = estimatedVectorMemoryMB < availableCacheMB ? 'preloaded' : 'on-demand';
471
+ const recommendations = [];
472
+ if (cachingStrategy === 'on-demand' && hitRatePercent < 50) {
473
+ recommendations.push(`Low cache hit rate (${hitRatePercent.toFixed(1)}%). Consider increasing UnifiedCache size`);
474
+ }
475
+ if (fairnessViolation) {
476
+ recommendations.push(`Fairness violation: HNSW using ${hnswCachePercent.toFixed(1)}% cache with only ${hnswAccessPercent.toFixed(1)}% access`);
477
+ }
478
+ if (recommendations.length === 0) {
479
+ recommendations.push('All metrics healthy - no action needed');
480
+ }
481
+ return {
482
+ cachingStrategy,
483
+ autoDetection: {
484
+ entityCount,
485
+ estimatedVectorMemoryMB: parseFloat(estimatedVectorMemoryMB.toFixed(2)),
486
+ availableCacheMB: parseFloat(availableCacheMB.toFixed(2)),
487
+ threshold: 0.8,
488
+ rationale: cachingStrategy === 'preloaded'
489
+ ? `Vectors in native Rust memory (${estimatedVectorMemoryMB.toFixed(1)}MB)`
490
+ : `Adaptive on-demand loading (${estimatedVectorMemoryMB.toFixed(1)}MB > ${availableCacheMB.toFixed(1)}MB threshold)`
491
+ },
492
+ unifiedCache: {
493
+ totalSize: cacheStats.totalSize,
494
+ maxSize: cacheStats.maxSize,
495
+ utilizationPercent: parseFloat((cacheStats.utilization * 100).toFixed(2)),
496
+ itemCount: cacheStats.itemCount,
497
+ hitRatePercent: parseFloat(hitRatePercent.toFixed(2)),
498
+ totalAccessCount: cacheStats.totalAccessCount,
499
+ },
500
+ hnswCache: {
501
+ vectorsInCache,
502
+ cacheKeyPrefix: 'hnsw:vector:',
503
+ estimatedMemoryMB: parseFloat((hnswMemoryBytes / (1024 * 1024)).toFixed(2)),
504
+ },
505
+ fairness: {
506
+ hnswAccessCount,
507
+ hnswAccessPercent: parseFloat(hnswAccessPercent.toFixed(2)),
508
+ totalAccessCount,
509
+ fairnessViolation,
510
+ },
511
+ recommendations,
512
+ };
513
+ }
514
+ }
515
+ //# sourceMappingURL=NativeHNSWWrapper.js.map
package/dist/plugin.d.ts CHANGED
@@ -8,13 +8,17 @@
8
8
  * can always access their data. Compute acceleration requires a valid license.
9
9
  *
10
10
  * Provider registration order:
11
- * 1. storage:mmap-filesystem — ALWAYS registered (data access)
11
+ * 1. storage:filesystem — mmap-enhanced FileSystemStorage (ALWAYS registered)
12
12
  * 2. distance — SIMD-accelerated cosine (licensed)
13
- * 3. metadataIndex — Native Rust query/mutation engine (licensed)
14
- * 4. graphIndex — Native 4 LSM-trees with verb tracking (licensed)
15
- * 5. embeddings Candle ML native engine (CPU/CUDA/Metal) (licensed)
16
- * 6. roaring CRoaring bitmap bindings (licensed)
17
- * 7. msgpack Native encode/decode (licensed)
13
+ * 3. hnsw — Native Rust HNSW graph engine (licensed)
14
+ * 4. cache — Native Rust eviction engine with JS data storage (licensed)
15
+ * 5. metadataIndex Native Rust query/mutation engine (licensed)
16
+ * 6. graphIndex Native 4 LSM-trees with verb tracking (licensed)
17
+ * 7. embeddings Candle ML native engine (CPU/CUDA/Metal) (licensed)
18
+ * 8. embedBatch — Native batch embedding (single forward pass) (licensed)
19
+ * 9. entityIdMapper — Native UUID ↔ integer mapping (licensed)
20
+ * 10. roaring — CRoaring bitmap bindings (licensed)
21
+ * 11. msgpack — Native encode/decode (licensed)
18
22
  */
19
23
  import type { BrainyPlugin } from '@soulcraft/brainy/plugin';
20
24
  declare const cortexPlugin: BrainyPlugin;
package/dist/plugin.js CHANGED
@@ -8,25 +8,33 @@
8
8
  * can always access their data. Compute acceleration requires a valid license.
9
9
  *
10
10
  * Provider registration order:
11
- * 1. storage:mmap-filesystem — ALWAYS registered (data access)
11
+ * 1. storage:filesystem — mmap-enhanced FileSystemStorage (ALWAYS registered)
12
12
  * 2. distance — SIMD-accelerated cosine (licensed)
13
- * 3. metadataIndex — Native Rust query/mutation engine (licensed)
14
- * 4. graphIndex — Native 4 LSM-trees with verb tracking (licensed)
15
- * 5. embeddings Candle ML native engine (CPU/CUDA/Metal) (licensed)
16
- * 6. roaring CRoaring bitmap bindings (licensed)
17
- * 7. msgpack Native encode/decode (licensed)
13
+ * 3. hnsw — Native Rust HNSW graph engine (licensed)
14
+ * 4. cache — Native Rust eviction engine with JS data storage (licensed)
15
+ * 5. metadataIndex Native Rust query/mutation engine (licensed)
16
+ * 6. graphIndex Native 4 LSM-trees with verb tracking (licensed)
17
+ * 7. embeddings Candle ML native engine (CPU/CUDA/Metal) (licensed)
18
+ * 8. embedBatch — Native batch embedding (single forward pass) (licensed)
19
+ * 9. entityIdMapper — Native UUID ↔ integer mapping (licensed)
20
+ * 10. roaring — CRoaring bitmap bindings (licensed)
21
+ * 11. msgpack — Native encode/decode (licensed)
18
22
  */
19
23
  import { loadNativeModule, isNativeAvailable } from './native/index.js';
20
24
  import { validateLicense } from './license.js';
21
25
  const cortexPlugin = {
22
26
  name: '@soulcraft/cortex',
23
27
  async activate(context) {
24
- // Storage adapters are ALWAYS registered users must always be able
25
- // to access their data, even if the license expires. Brainy falls back
26
- // to JS compute but needs the storage adapter to read the files.
28
+ // Storage: Override built-in FileSystemStorage with mmap-enhanced version.
29
+ // MmapFileSystemStorage extends FileSystemStorage, adding binary blob methods
30
+ // (saveBinaryBlob/loadBinaryBlob/getBinaryBlobPath) that enable Rust native
31
+ // indexes to mmap SSTable files directly. Zero-config — users with
32
+ // type: 'filesystem' automatically get the enhanced version.
33
+ //
34
+ // ALWAYS registered (even without a license) so users can always access data.
27
35
  const { MmapFileSystemStorage } = await import('./storage/mmapFileSystemStorage.js');
28
- context.registerProvider('storage:mmap-filesystem', {
29
- name: 'mmap-filesystem',
36
+ context.registerProvider('storage:filesystem', {
37
+ name: 'filesystem',
30
38
  create: (config) => new MmapFileSystemStorage(config.rootDirectory, {
31
39
  compression: config.compression,
32
40
  compressionLevel: config.compressionLevel
@@ -70,6 +78,15 @@ const cortexPlugin = {
70
78
  });
71
79
  // Product Quantization: 16-32x compression for large datasets
72
80
  context.registerProvider('quantization:pq', native.NativePqCodebook);
81
+ // HNSW: Native Rust graph engine with SIMD distance and Arc-based COW
82
+ const { NativeHNSWWrapper } = await import('./hnsw/NativeHNSWWrapper.js');
83
+ context.registerProvider('hnsw', (config, distanceFn, options) => {
84
+ return new NativeHNSWWrapper(config, distanceFn, options);
85
+ });
86
+ // Cache: Native Rust eviction engine with JS data storage
87
+ // Registered before indexes so they use native cache for memory management
88
+ const { NativeUnifiedCacheWrapper } = await import('./utils/NativeUnifiedCache.js');
89
+ context.registerProvider('cache', new NativeUnifiedCacheWrapper());
73
90
  // Metadata index: Native Rust query/mutation engine
74
91
  const { MetadataIndexManager: NativeMetadataIndex } = await import('./utils/NativeMetadataIndex.js');
75
92
  context.registerProvider('metadataIndex', (storage) => new NativeMetadataIndex(storage));
@@ -88,6 +105,19 @@ const cortexPlugin = {
88
105
  }
89
106
  return engine.embed(text);
90
107
  });
108
+ // Batch embeddings: single Rust forward pass for multiple texts
109
+ // Brainy uses this for bulk operations (import, reindex, batch add)
110
+ // when available, bypassing the N-individual-calls path
111
+ context.registerProvider('embedBatch', async (texts) => {
112
+ if (!engine.isInitialized()) {
113
+ await engine.initialize();
114
+ }
115
+ return engine.embedBatch(texts);
116
+ });
117
+ // Entity ID mapper: Native UUID ↔ integer mapping in Rust (O(1) HashMap)
118
+ // Used by MetadataIndexManager for roaring bitmap integration
119
+ const { NativeEntityIdMapperWrapper } = await import('./utils/nativeEntityIdMapper.js');
120
+ context.registerProvider('entityIdMapper', (storage) => new NativeEntityIdMapperWrapper({ storage }));
91
121
  // Roaring bitmaps: CRoaring bindings (binary-compatible with roaring-wasm)
92
122
  const { RoaringBitmap32 } = await import('./native/NativeRoaringBitmap32.js');
93
123
  context.registerProvider('roaring', RoaringBitmap32);
@@ -879,7 +879,7 @@ export class MetadataIndexManager {
879
879
  catch { }
880
880
  // Adaptive rebuild strategy
881
881
  const storageType = this.storage.constructor.name;
882
- const isLocalStorage = storageType === 'FileSystemStorage' || storageType === 'MemoryStorage';
882
+ const isLocalStorage = storageType === 'FileSystemStorage' || storageType === 'MmapFileSystemStorage' || storageType === 'MemoryStorage';
883
883
  let totalNounsProcessed = 0;
884
884
  if (isLocalStorage) {
885
885
  const result = await this.storage.getNouns({