@soulcraft/cortex 2.7.0 → 2.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
File without changes
@@ -30,6 +30,8 @@ export declare class NativeHNSWWrapper implements HnswProvider {
30
30
  private unifiedCache;
31
31
  private cowEnabled;
32
32
  private mmapStore;
33
+ private mmapStorePath;
34
+ private mmapEnabled;
33
35
  private connectionsCodec;
34
36
  constructor(config: (Partial<HNSWConfig> & {
35
37
  distanceFunction?: DistanceFunction;
@@ -37,6 +39,33 @@ export declare class NativeHNSWWrapper implements HnswProvider {
37
39
  storage?: StorageAdapter | null;
38
40
  persistMode?: 'immediate' | 'deferred';
39
41
  });
42
+ /**
43
+ * Resolve the storage root directory for the `_hnsw.bin` mmap file. brainy
44
+ * >= 7.31.8 exposes a public `rootDirectory` getter; earlier 7.x kept the
45
+ * value in the (compile-time-protected, runtime-present) `rootDir` field.
46
+ * Accept either so the mmap fast-path engages on every filesystem brainy.
47
+ */
48
+ private resolveRootDir;
49
+ /**
50
+ * Resolve the directory that holds the ACTIVE branch's HNSW data — where the
51
+ * `_hnsw.bin` snapshot must live so write + load agree. brainy >= 7.31 has
52
+ * mandatory COW: all entity data is written under
53
+ * `branches/<currentBranch>/entities/nouns/hnsw/` (baseStorage.resolveBranchPath).
54
+ * The snapshot is a DIRECT filesystem write (it bypasses brainy's object
55
+ * layer), so cortex must reproduce that branch prefix itself. Falls back to
56
+ * the storage root for non-branched / pre-COW storage. NOTE: this couples to
57
+ * brainy's 7.x branch layout; brainy 8.0 replaces it with a generation layout
58
+ * (and cortex 3.0 uses DiskANN, not `_hnsw.bin`), so this resolution is 2.x-only.
59
+ */
60
+ private resolveHnswDir;
61
+ /** Absolute path of the `_hnsw.bin` snapshot for the active branch, or null. */
62
+ private resolveHnswBinPath;
63
+ /**
64
+ * Lazily (re)create the mmap store bound to the current branch-active path,
65
+ * creating its directory if needed. Recreates when the active branch (hence
66
+ * path) changed. Returns null when mmap isn't enabled / no path resolvable.
67
+ */
68
+ private ensureMmapStore;
40
69
  addItem(item: VectorDocument): Promise<string>;
41
70
  /**
42
71
  * Batch insert — inserts all items in a single native call, then persists
@@ -36,8 +36,15 @@ export class NativeHNSWWrapper {
36
36
  unifiedCache;
37
37
  // COW support
38
38
  cowEnabled = false;
39
- // Mmap binary HNSW store (Phase 4 — optional, used when storage has rootDirectory)
39
+ // Mmap binary HNSW store (Phase 4 — optional). Lazily (re)created at the
40
+ // BRANCH-ACTIVE path so the snapshot co-locates with the branch's HNSW data on
41
+ // COW/branch-layout brains (brainy >= 7.31 mandatory COW writes entity data
42
+ // under branches/<currentBranch>/…). `mmapEnabled` records that storage is
43
+ // filesystem-backed; the store + path are resolved at flush/load time because
44
+ // the active branch may be set after this provider is constructed.
40
45
  mmapStore = null;
46
+ mmapStorePath = null;
47
+ mmapEnabled = false;
41
48
  // Brainy ConnectionsCodec (brainy >= 7.27 `wireConnectionsCodec`). Stored for
42
49
  // introspection but not consulted on the read/write path — see
43
50
  // `setConnectionsCodec` below for the architectural rationale.
@@ -56,14 +63,67 @@ export class NativeHNSWWrapper {
56
63
  ml: this.config.ml,
57
64
  };
58
65
  this.native = new bindings.NativeHNSWIndex(nativeConfig);
59
- // Initialize mmap binary store if storage has a rootDirectory (filesystem storage)
60
- const rootDir = this.storage?.rootDirectory;
61
- if (rootDir && bindings.MmapHnswStore) {
62
- const { join } = require('node:path');
63
- this.mmapStore = new bindings.MmapHnswStore(join(rootDir, '_hnsw.bin'));
64
- }
66
+ // Enable the mmap binary fast-path on filesystem storage. We do NOT pin the
67
+ // path here — it's resolved per flush/load via resolveHnswBinPath(), because
68
+ // the active branch (which the path depends on) may be set after this
69
+ // provider is constructed. See ensureMmapStore().
70
+ this.mmapEnabled = !!(this.resolveRootDir() && bindings.MmapHnswStore);
65
71
  prodLog.info(`NativeHNSWWrapper initialized (M=${this.config.M}, ef=${this.config.efSearch}, ` +
66
- `persist=${this.persistMode}, mmap=${!!this.mmapStore})`);
72
+ `persist=${this.persistMode}, mmap=${this.mmapEnabled})`);
73
+ }
74
+ /**
75
+ * Resolve the storage root directory for the `_hnsw.bin` mmap file. brainy
76
+ * >= 7.31.8 exposes a public `rootDirectory` getter; earlier 7.x kept the
77
+ * value in the (compile-time-protected, runtime-present) `rootDir` field.
78
+ * Accept either so the mmap fast-path engages on every filesystem brainy.
79
+ */
80
+ resolveRootDir() {
81
+ const s = this.storage;
82
+ return s?.rootDirectory ?? s?.rootDir ?? undefined;
83
+ }
84
+ /**
85
+ * Resolve the directory that holds the ACTIVE branch's HNSW data — where the
86
+ * `_hnsw.bin` snapshot must live so write + load agree. brainy >= 7.31 has
87
+ * mandatory COW: all entity data is written under
88
+ * `branches/<currentBranch>/entities/nouns/hnsw/` (baseStorage.resolveBranchPath).
89
+ * The snapshot is a DIRECT filesystem write (it bypasses brainy's object
90
+ * layer), so cortex must reproduce that branch prefix itself. Falls back to
91
+ * the storage root for non-branched / pre-COW storage. NOTE: this couples to
92
+ * brainy's 7.x branch layout; brainy 8.0 replaces it with a generation layout
93
+ * (and cortex 3.0 uses DiskANN, not `_hnsw.bin`), so this resolution is 2.x-only.
94
+ */
95
+ resolveHnswDir() {
96
+ const root = this.resolveRootDir();
97
+ if (!root)
98
+ return undefined;
99
+ const branch = this.storage?.currentBranch;
100
+ if (!branch)
101
+ return root;
102
+ return require('node:path').join(root, 'branches', String(branch), 'entities', 'nouns', 'hnsw');
103
+ }
104
+ /** Absolute path of the `_hnsw.bin` snapshot for the active branch, or null. */
105
+ resolveHnswBinPath() {
106
+ const dir = this.resolveHnswDir();
107
+ return dir ? require('node:path').join(dir, '_hnsw.bin') : null;
108
+ }
109
+ /**
110
+ * Lazily (re)create the mmap store bound to the current branch-active path,
111
+ * creating its directory if needed. Recreates when the active branch (hence
112
+ * path) changed. Returns null when mmap isn't enabled / no path resolvable.
113
+ */
114
+ ensureMmapStore() {
115
+ if (!this.mmapEnabled)
116
+ return null;
117
+ const p = this.resolveHnswBinPath();
118
+ if (!p)
119
+ return null;
120
+ if (this.mmapStore && this.mmapStorePath === p)
121
+ return this.mmapStore;
122
+ const path = require('node:path');
123
+ require('node:fs').mkdirSync(path.dirname(p), { recursive: true });
124
+ this.mmapStore = new (loadNativeModule().MmapHnswStore)(p);
125
+ this.mmapStorePath = p;
126
+ return this.mmapStore;
67
127
  }
68
128
  // ---------------------------------------------------------------------------
69
129
  // Core CRUD
@@ -227,7 +287,14 @@ export class NativeHNSWWrapper {
227
287
  async flush() {
228
288
  if (!this.storage)
229
289
  return 0;
230
- if (this.dirtyNodes.size === 0 && !this.dirtySystem)
290
+ // The mmap binary is a FULL snapshot of the in-memory graph, independent of
291
+ // the per-node dirty set. In 'immediate' mode (brainy's filesystem default)
292
+ // nothing is ever marked dirty — nodes persist inline on add — so flush()'s
293
+ // real job there is to write that snapshot. It must NOT be skipped just
294
+ // because the dirty set is empty; only fast-return when there is genuinely
295
+ // nothing to write (no dirty data AND no graph to snapshot).
296
+ const needsMmapSnapshot = this.mmapEnabled && this.native.size() > 0;
297
+ if (this.dirtyNodes.size === 0 && !this.dirtySystem && !needsMmapSnapshot)
231
298
  return 0;
232
299
  const startTime = Date.now();
233
300
  const nodeCount = this.dirtyNodes.size;
@@ -260,15 +327,14 @@ export class NativeHNSWWrapper {
260
327
  // Write binary mmap file and switch to mmap backend for search.
261
328
  // After this, search reads vectors from mmap pages (zero-copy).
262
329
  // The write buffer (HashMap) is only used for new mutations.
263
- if (this.mmapStore && this.native.size() > 0) {
330
+ if (this.mmapEnabled && this.native.size() > 0) {
264
331
  try {
265
332
  this.writeMmapFile();
266
- // Switch to mmap backend — search now reads from kernel page cache
267
- const rootDir = this.storage?.rootDirectory;
268
- if (rootDir) {
269
- const { join } = require('node:path');
270
- this.native.setMmapBackend(join(rootDir, '_hnsw.bin'));
271
- }
333
+ // Switch to mmap backend — search now reads from kernel page cache.
334
+ // Same branch-active path the snapshot was just written to.
335
+ const p = this.resolveHnswBinPath();
336
+ if (p)
337
+ this.native.setMmapBackend(p);
272
338
  }
273
339
  catch (e) {
274
340
  console.error('[HNSW native] Failed to write mmap binary:', e);
@@ -277,7 +343,7 @@ export class NativeHNSWWrapper {
277
343
  const duration = Date.now() - startTime;
278
344
  if (nodeCount > 0) {
279
345
  prodLog.info(`[HNSW native] Flushed ${nodeCount} dirty nodes in ${duration}ms` +
280
- (this.mmapStore ? ' + binary mmap file' : ''));
346
+ (this.mmapEnabled ? ' + binary mmap file' : ''));
281
347
  }
282
348
  return nodeCount;
283
349
  }
@@ -302,7 +368,8 @@ export class NativeHNSWWrapper {
302
368
  * Called during flush() for fast reload on next init.
303
369
  */
304
370
  writeMmapFile() {
305
- if (!this.mmapStore)
371
+ const store = this.ensureMmapStore();
372
+ if (!store)
306
373
  return;
307
374
  const size = this.native.size();
308
375
  if (size === 0)
@@ -335,7 +402,7 @@ export class NativeHNSWWrapper {
335
402
  }
336
403
  const systemData = this.native.getSystemData();
337
404
  const dimensions = allVectors[0]?.length || 0;
338
- this.mmapStore.writeGraph(allIds, allVectors, allLevels, allConnections, systemData.entryPointId || null, systemData.maxLevel, dimensions);
405
+ store.writeGraph(allIds, allVectors, allLevels, allConnections, systemData.entryPointId || null, systemData.maxLevel, dimensions);
339
406
  prodLog.info(`[HNSW native] Binary mmap file written (${allIds.length} nodes, ${dimensions}d)`);
340
407
  }
341
408
  /**
@@ -348,13 +415,13 @@ export class NativeHNSWWrapper {
348
415
  * no data is copied to the HashMap. Zero heap allocation for vector data.
349
416
  */
350
417
  loadFromMmap() {
351
- if (!this.mmapStore)
418
+ if (!this.mmapEnabled)
419
+ return false;
420
+ // Load from the branch-active snapshot path (same path the write uses).
421
+ const p = this.resolveHnswBinPath();
422
+ if (!p)
352
423
  return false;
353
- const mmapPath = this.mmapStore.path ?? this.mmapStore.path;
354
- // Use the native index's mmap backend directly
355
- const loaded = this.native.setMmapBackend(this.storage?.rootDirectory
356
- ? require('node:path').join(this.storage.rootDirectory, '_hnsw.bin')
357
- : '');
424
+ const loaded = this.native.setMmapBackend(p);
358
425
  if (!loaded)
359
426
  return false;
360
427
  const size = this.native.size();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/cortex",
3
- "version": "2.7.0",
3
+ "version": "2.7.2",
4
4
  "description": "Native Rust acceleration for Brainy — SIMD distance, vector quantization, zero-copy mmap, native embeddings. Free tier for storage, Pro license for compute acceleration.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -1,161 +0,0 @@
1
- /**
2
- * @module hnsw/NativeDiskAnnWrapper
3
- * @description TypeScript wrapper around cortex's native DiskANN engine
4
- * that satisfies brainy's `HnswProvider` contract. From brainy's
5
- * perspective this is interchangeable with `NativeHNSWWrapper` — same
6
- * `addItem` / `search` / `rebuild` surface — but underneath it drives
7
- * the billion-scale Vamana + PQ index.
8
- *
9
- * @example
10
- * ```typescript
11
- * import { BrainyData } from '@soulcraft/brainy'
12
- * import { register as registerCortex } from '@soulcraft/cortex'
13
- *
14
- * const brain = new BrainyData({
15
- * storage: { type: 'filesystem', rootDirectory: '/data/idx' }
16
- * })
17
- * await registerCortex(brain)
18
- * await brain.init() // [brainy] DiskANN engaged (path=..., dim=384)
19
- *
20
- * await brain.add({ data: 'native rust acceleration', type: 'concept' })
21
- * const hits = await brain.search('billion scale ann', 10)
22
- * ```
23
- *
24
- * @example
25
- * ```typescript
26
- * // Explicit billion-scale build config
27
- * const brain = new BrainyData({
28
- * storage: { type: 'filesystem', rootDirectory: '/data/idx' },
29
- * index: {
30
- * type: 'diskann',
31
- * diskann: {
32
- * pqM: 16,
33
- * maxDegree: 64,
34
- * searchListSize: 100,
35
- * useMmapAdjacency: true, // required >100M nodes
36
- * mmapAdjacencyPath: '/data/scratch/diskann-build.adj'
37
- * }
38
- * }
39
- * })
40
- * ```
41
- *
42
- * ## Operating model
43
- *
44
- * DiskANN is build-once, query-many by design: the on-disk file
45
- * embeds the Vamana graph, PQ codebook, codes, and full vectors in a
46
- * single contiguous mmap-able layout. Dynamic insertions go to a
47
- * small **delta buffer** that brute-force-searches alongside the main
48
- * index until the next `rebuild()` folds them in. This matches
49
- * FreshDiskANN's published online-update model.
50
- *
51
- * ## Search path
52
- *
53
- * 1. Query the main index via the native DiskANN searcher: PQ-greedy
54
- * walk in RAM, full-vector re-rank on the candidate set.
55
- * 2. Brute-force the delta buffer (typically <0.1% of total size after
56
- * a recent rebuild).
57
- * 3. Merge + sort + truncate to `k`.
58
- *
59
- * ## When this wrapper engages
60
- *
61
- * Brainy's `wireDiskAnn()` decides at init time whether to instantiate
62
- * this wrapper or the standard HNSW one. The criteria
63
- * ([ADR-002](../../docs/ADR-002-diskann-100-percent-rust.md)):
64
- * - Cortex's `index:diskann` provider is registered (this file).
65
- * - The storage adapter exposes a local filesystem path
66
- * (`getBinaryBlobPath` is the canonical check).
67
- * - The metadata index has a stable `idMapper` (the cortex 2.4.0 #23
68
- * foundation).
69
- * - `config.index.type !== 'hnsw'` (opt-out path).
70
- */
71
- import type { Vector, VectorDocument, DistanceFunction, StorageAdapter } from '@soulcraft/brainy';
72
- import type { HnswProvider } from '../providerContracts.js';
73
- export interface DiskAnnIndexConfig {
74
- /** Vector dimension (e.g. 384 for all-MiniLM-L6-v2). */
75
- dimensions: number;
76
- /** Output path for the on-disk DiskANN file. */
77
- indexPath: string;
78
- /** PQ subspaces. Default 16. dim must be divisible by m. */
79
- pqM?: number;
80
- /** Centroids per subspace. Default 256 (8-bit codes). */
81
- pqKsub?: number;
82
- /** Vamana max degree (R). Default 64. */
83
- maxDegree?: number;
84
- /** Build-time candidate list size (L). Default 100. */
85
- searchListSize?: number;
86
- /** α-pruning density factor. Default 1.2. */
87
- alpha?: number;
88
- /** Default search-time candidate list size. `2*k` is a good baseline. */
89
- defaultLSearch?: number;
90
- /** Default padding factor for re-rank over-fetch. Default 1.2. */
91
- defaultPaddingFactor?: number;
92
- /** Use a file-backed adjacency during build. Required >~100M nodes. */
93
- useMmapAdjacency?: boolean;
94
- /** Scratch file path when `useMmapAdjacency` is true. */
95
- mmapAdjacencyPath?: string;
96
- }
97
- export declare class NativeDiskAnnWrapper implements HnswProvider {
98
- private config;
99
- private distanceFunction;
100
- private storage;
101
- private persistMode;
102
- /** Live searcher instance — null until the first build. */
103
- private native;
104
- /** Newly added entries since the last build. Brute-force searched. */
105
- private delta;
106
- /** Removed entries — filtered out at search time. */
107
- private tombstones;
108
- /** Bidirectional UUID ↔ slot map for the main index. */
109
- private slotByUuid;
110
- private uuidBySlot;
111
- constructor(config: DiskAnnIndexConfig & {
112
- distanceFunction?: DistanceFunction;
113
- }, distanceFunction: DistanceFunction, options?: {
114
- storage?: StorageAdapter | null;
115
- persistMode?: 'immediate' | 'deferred';
116
- });
117
- /**
118
- * Append an entry to the delta buffer. Persisted by the next
119
- * `rebuild()` call, which folds the delta into the main index.
120
- */
121
- addItem(item: VectorDocument): Promise<string>;
122
- /**
123
- * Mark an entry as removed. Filtered out at search time; physically
124
- * removed at the next `rebuild()`.
125
- */
126
- removeItem(id: string): Promise<boolean>;
127
- search(queryVector: Vector, k?: number, filter?: (id: string) => Promise<boolean>, options?: {
128
- rerank?: {
129
- multiplier: number;
130
- };
131
- candidateIds?: string[];
132
- }): Promise<Array<[string, number]>>;
133
- size(): number;
134
- clear(): void;
135
- /**
136
- * Rebuild the main index from scratch: concatenate (current main −
137
- * tombstones) ∪ delta, run a full DiskANN build, swap the searcher
138
- * atomically.
139
- *
140
- * At billion-scale this is the expensive operation (hours of build
141
- * time). Operators schedule it during off-peak; the delta buffer
142
- * absorbs writes in between.
143
- */
144
- rebuild(options?: {
145
- pqM?: number;
146
- pqKsub?: number;
147
- maxDegree?: number;
148
- searchListSize?: number;
149
- alpha?: number;
150
- }): Promise<void>;
151
- /**
152
- * Flush the delta buffer to disk. For DiskANN the delta is in-memory
153
- * by design (a few MB at most between rebuilds); returns the buffer
154
- * size for parity with HNSW's flush contract.
155
- */
156
- flush(): Promise<number>;
157
- getPersistMode(): 'immediate' | 'deferred';
158
- private tryOpenExisting;
159
- private countMainTombstones;
160
- }
161
- //# sourceMappingURL=NativeDiskAnnWrapper.d.ts.map
@@ -1,329 +0,0 @@
1
- /**
2
- * @module hnsw/NativeDiskAnnWrapper
3
- * @description TypeScript wrapper around cortex's native DiskANN engine
4
- * that satisfies brainy's `HnswProvider` contract. From brainy's
5
- * perspective this is interchangeable with `NativeHNSWWrapper` — same
6
- * `addItem` / `search` / `rebuild` surface — but underneath it drives
7
- * the billion-scale Vamana + PQ index.
8
- *
9
- * @example
10
- * ```typescript
11
- * import { BrainyData } from '@soulcraft/brainy'
12
- * import { register as registerCortex } from '@soulcraft/cortex'
13
- *
14
- * const brain = new BrainyData({
15
- * storage: { type: 'filesystem', rootDirectory: '/data/idx' }
16
- * })
17
- * await registerCortex(brain)
18
- * await brain.init() // [brainy] DiskANN engaged (path=..., dim=384)
19
- *
20
- * await brain.add({ data: 'native rust acceleration', type: 'concept' })
21
- * const hits = await brain.search('billion scale ann', 10)
22
- * ```
23
- *
24
- * @example
25
- * ```typescript
26
- * // Explicit billion-scale build config
27
- * const brain = new BrainyData({
28
- * storage: { type: 'filesystem', rootDirectory: '/data/idx' },
29
- * index: {
30
- * type: 'diskann',
31
- * diskann: {
32
- * pqM: 16,
33
- * maxDegree: 64,
34
- * searchListSize: 100,
35
- * useMmapAdjacency: true, // required >100M nodes
36
- * mmapAdjacencyPath: '/data/scratch/diskann-build.adj'
37
- * }
38
- * }
39
- * })
40
- * ```
41
- *
42
- * ## Operating model
43
- *
44
- * DiskANN is build-once, query-many by design: the on-disk file
45
- * embeds the Vamana graph, PQ codebook, codes, and full vectors in a
46
- * single contiguous mmap-able layout. Dynamic insertions go to a
47
- * small **delta buffer** that brute-force-searches alongside the main
48
- * index until the next `rebuild()` folds them in. This matches
49
- * FreshDiskANN's published online-update model.
50
- *
51
- * ## Search path
52
- *
53
- * 1. Query the main index via the native DiskANN searcher: PQ-greedy
54
- * walk in RAM, full-vector re-rank on the candidate set.
55
- * 2. Brute-force the delta buffer (typically <0.1% of total size after
56
- * a recent rebuild).
57
- * 3. Merge + sort + truncate to `k`.
58
- *
59
- * ## When this wrapper engages
60
- *
61
- * Brainy's `wireDiskAnn()` decides at init time whether to instantiate
62
- * this wrapper or the standard HNSW one. The criteria
63
- * ([ADR-002](../../docs/ADR-002-diskann-100-percent-rust.md)):
64
- * - Cortex's `index:diskann` provider is registered (this file).
65
- * - The storage adapter exposes a local filesystem path
66
- * (`getBinaryBlobPath` is the canonical check).
67
- * - The metadata index has a stable `idMapper` (the cortex 2.4.0 #23
68
- * foundation).
69
- * - `config.index.type !== 'hnsw'` (opt-out path).
70
- */
71
- import { loadNativeModule } from '../native/index.js';
72
- import { prodLog } from '@soulcraft/brainy/internals';
73
- const DEFAULTS = {
74
- pqM: 16,
75
- pqKsub: 256,
76
- maxDegree: 64,
77
- searchListSize: 100,
78
- alpha: 1.2,
79
- defaultLSearch: 100,
80
- defaultPaddingFactor: 1.2,
81
- useMmapAdjacency: false,
82
- };
83
- export class NativeDiskAnnWrapper {
84
- config;
85
- distanceFunction;
86
- storage;
87
- persistMode;
88
- /** Live searcher instance — null until the first build. */
89
- native = null;
90
- /** Newly added entries since the last build. Brute-force searched. */
91
- delta = new Map();
92
- /** Removed entries — filtered out at search time. */
93
- tombstones = new Set();
94
- /** Bidirectional UUID ↔ slot map for the main index. */
95
- slotByUuid = new Map();
96
- uuidBySlot = new Map();
97
- constructor(config, distanceFunction, options = {}) {
98
- this.config = { ...DEFAULTS, ...config };
99
- this.distanceFunction = distanceFunction;
100
- this.storage = options.storage ?? null;
101
- this.persistMode = options.persistMode ?? 'immediate';
102
- // Try to open an existing file. If absent, the index stays
103
- // empty until the first rebuild() flushes the delta buffer.
104
- this.tryOpenExisting();
105
- }
106
- /**
107
- * Append an entry to the delta buffer. Persisted by the next
108
- * `rebuild()` call, which folds the delta into the main index.
109
- */
110
- async addItem(item) {
111
- if (this.tombstones.has(item.id)) {
112
- this.tombstones.delete(item.id);
113
- }
114
- this.delta.set(item.id, item.vector);
115
- return item.id;
116
- }
117
- /**
118
- * Mark an entry as removed. Filtered out at search time; physically
119
- * removed at the next `rebuild()`.
120
- */
121
- async removeItem(id) {
122
- const inDelta = this.delta.delete(id);
123
- const inMain = this.slotByUuid.has(id);
124
- if (inMain)
125
- this.tombstones.add(id);
126
- return inDelta || inMain;
127
- }
128
- async search(queryVector, k = 10, filter, options) {
129
- const lSearch = Math.max(this.config.defaultLSearch, k * 2);
130
- const padding = options?.rerank?.multiplier ?? this.config.defaultPaddingFactor;
131
- // 1. Main-index PQ-greedy walk (returns slot ids).
132
- const mainHits = this.native
133
- ? this.native.search(Array.from(queryVector), k * 2, // over-fetch so filter / tombstone losses don't starve final result
134
- lSearch, padding)
135
- : [];
136
- // 2. Hydrate slot → uuid; drop tombstoned + filter-rejected.
137
- const merged = [];
138
- for (const hit of mainHits) {
139
- const uuid = this.uuidBySlot.get(hit.slot);
140
- if (!uuid)
141
- continue;
142
- if (this.tombstones.has(uuid))
143
- continue;
144
- if (filter && !(await filter(uuid)))
145
- continue;
146
- merged.push([uuid, hit.distance]);
147
- }
148
- // 3. Brute-force the delta buffer.
149
- for (const [id, v] of this.delta) {
150
- if (filter && !(await filter(id)))
151
- continue;
152
- const d = this.distanceFunction(queryVector, v);
153
- merged.push([id, d]);
154
- }
155
- // 4. Sort ascending by distance, truncate to k.
156
- merged.sort((a, b) => a[1] - b[1]);
157
- return merged.slice(0, k);
158
- }
159
- size() {
160
- const mainSize = this.native ? this.native.size() : 0;
161
- return (mainSize +
162
- this.delta.size -
163
- // Tombstones from the main index reduce effective size.
164
- this.countMainTombstones());
165
- }
166
- clear() {
167
- this.delta.clear();
168
- this.tombstones.clear();
169
- this.slotByUuid.clear();
170
- this.uuidBySlot.clear();
171
- this.native = null;
172
- }
173
- /**
174
- * Rebuild the main index from scratch: concatenate (current main −
175
- * tombstones) ∪ delta, run a full DiskANN build, swap the searcher
176
- * atomically.
177
- *
178
- * At billion-scale this is the expensive operation (hours of build
179
- * time). Operators schedule it during off-peak; the delta buffer
180
- * absorbs writes in between.
181
- */
182
- async rebuild(options) {
183
- const bindings = loadNativeModule();
184
- // napi-rs exports the class as `NativeDiskAnn` (PascalCase
185
- // normalization of the Rust ident `NativeDiskANN`). The TS type
186
- // alias `NativeDiskANN = NativeDiskAnn` in `native/index.d.ts` is
187
- // for backwards-compat in *types* only — at runtime there's a
188
- // single export under the napi-normalized name.
189
- const NativeDiskANN = bindings.NativeDiskAnn;
190
- if (!NativeDiskANN) {
191
- throw new Error('NativeDiskANN binding missing — rebuild requires the cortex native module');
192
- }
193
- // Build the new logical slot ordering: (live old slots) + (delta).
194
- // **Critical for billion-scale correctness**: the old vectors stay
195
- // mmap'd inside the native module — we only pass slot IDs across
196
- // the FFI boundary, not the vector data itself. At 1B × 1536 × 4
197
- // bytes = ~6 TB this is the difference between "rebuild works" and
198
- // "rebuild OOMs."
199
- const liveOldSlots = [];
200
- const newUuids = [];
201
- if (this.native) {
202
- // Iterate in slot order so the new index's first n_live slots
203
- // mirror the OLD index's surviving subset in deterministic order.
204
- // We deliberately iterate by sorted slot id rather than uuidBySlot
205
- // insertion order — sorting keeps the Vamana entry point stable
206
- // and the on-disk vector section's locality similar to the
207
- // pre-rebuild file (less page-cache turnover during the post-
208
- // rebuild warm-up).
209
- const sortedSlots = Array.from(this.uuidBySlot.keys()).sort((a, b) => a - b);
210
- for (const slot of sortedSlots) {
211
- const uuid = this.uuidBySlot.get(slot);
212
- if (this.tombstones.has(uuid))
213
- continue;
214
- liveOldSlots.push(slot);
215
- newUuids.push(uuid);
216
- }
217
- }
218
- const dim = this.config.dimensions;
219
- const deltaCount = this.delta.size;
220
- let deltaBuf = null;
221
- if (deltaCount > 0) {
222
- deltaBuf = new Float32Array(deltaCount * dim);
223
- let idx = 0;
224
- for (const [uuid, vector] of this.delta) {
225
- if (vector.length !== dim) {
226
- throw new Error(`NativeDiskAnnWrapper.rebuild: vector dim ${vector.length} ≠ index dim ${dim}`);
227
- }
228
- deltaBuf.set(vector, idx * dim);
229
- newUuids.push(uuid);
230
- idx++;
231
- }
232
- }
233
- if (liveOldSlots.length + deltaCount === 0) {
234
- prodLog?.warn?.('NativeDiskAnnWrapper.rebuild: nothing to build');
235
- return;
236
- }
237
- const totalCount = liveOldSlots.length + deltaCount;
238
- const cfg = {
239
- vamana: {
240
- maxDegree: options?.maxDegree ?? this.config.maxDegree,
241
- searchListSize: options?.searchListSize ?? this.config.searchListSize,
242
- alpha: options?.alpha ?? this.config.alpha,
243
- seed: BigInt(0xd15ca4440ffff00dn),
244
- parallel: true,
245
- parallelBatch: 64,
246
- },
247
- pq: {
248
- m: options?.pqM ?? this.config.pqM,
249
- ksub: options?.pqKsub ?? this.config.pqKsub,
250
- iterations: 25,
251
- trainingSample: Math.min(200_000, totalCount),
252
- },
253
- adjacency: this.config.useMmapAdjacency
254
- ? {
255
- kind: 'mmap',
256
- mmapPath: this.config.mmapAdjacencyPath ?? `${this.config.indexPath}.adj`,
257
- }
258
- : { kind: 'ram' },
259
- };
260
- const newNative = NativeDiskANN.rebuildFromExisting({
261
- existingPath: this.native ? this.config.indexPath : undefined,
262
- liveOldSlots,
263
- deltaVectors: deltaBuf != null
264
- ? Buffer.from(deltaBuf.buffer, deltaBuf.byteOffset, deltaBuf.byteLength)
265
- : undefined,
266
- deltaCount,
267
- dim,
268
- outputPath: this.config.indexPath,
269
- cfg,
270
- });
271
- // Rebuild the bidirectional UUID↔slot maps from `newUuids`. New
272
- // slot `i` corresponds to `newUuids[i]` — this matches the napi
273
- // layout invariant (live old slots first, delta tail second).
274
- const newSlotByUuid = new Map();
275
- const newUuidBySlot = new Map();
276
- for (let i = 0; i < newUuids.length; i++) {
277
- newSlotByUuid.set(newUuids[i], i);
278
- newUuidBySlot.set(i, newUuids[i]);
279
- }
280
- // Atomic swap.
281
- this.native = newNative;
282
- this.slotByUuid = newSlotByUuid;
283
- this.uuidBySlot = newUuidBySlot;
284
- this.delta.clear();
285
- this.tombstones.clear();
286
- }
287
- /**
288
- * Flush the delta buffer to disk. For DiskANN the delta is in-memory
289
- * by design (a few MB at most between rebuilds); returns the buffer
290
- * size for parity with HNSW's flush contract.
291
- */
292
- async flush() {
293
- return this.delta.size;
294
- }
295
- getPersistMode() {
296
- return this.persistMode;
297
- }
298
- tryOpenExisting() {
299
- try {
300
- const bindings = loadNativeModule();
301
- // napi-rs exports the class as `NativeDiskAnn` (PascalCase
302
- // normalization of the Rust ident `NativeDiskANN`). The TS type
303
- // alias `NativeDiskANN = NativeDiskAnn` in `native/index.d.ts` is
304
- // for backwards-compat in *types* only — at runtime there's a
305
- // single export under the napi-normalized name.
306
- const NativeDiskANN = bindings.NativeDiskAnn;
307
- if (!NativeDiskANN)
308
- return;
309
- this.native = NativeDiskANN.openExisting(this.config.indexPath);
310
- // Populate slot maps from the storage adapter — these are persisted
311
- // alongside the index file in production. For 35c we read from a
312
- // sibling `.slots.json` that rebuild() writes.
313
- // (Stub for now; the real path lands when storage integration ships.)
314
- }
315
- catch {
316
- // No existing file — index stays empty until first rebuild().
317
- this.native = null;
318
- }
319
- }
320
- countMainTombstones() {
321
- let n = 0;
322
- for (const uuid of this.tombstones) {
323
- if (this.slotByUuid.has(uuid))
324
- n++;
325
- }
326
- return n;
327
- }
328
- }
329
- //# sourceMappingURL=NativeDiskAnnWrapper.js.map
@@ -1,194 +0,0 @@
1
- /**
2
- * @module utils/nativeBinaryEntityIdMapper
3
- * @description TypeScript wrapper around cortex's native binary
4
- * `BinaryIdMapper`. Implements brainy's `EntityIdMapperProvider` so the
5
- * mmap-backed billion-scale mapper is a drop-in for the existing
6
- * JSON-persisted one.
7
- *
8
- * ## When this engages
9
- *
10
- * The cortex plugin registers this wrapper as the `'entityIdMapper'`
11
- * provider when the storage adapter exposes `getBinaryBlobPath()` (i.e.
12
- * filesystem-backed storage with cortex's 2.4.0 #2 mmap-vector layer).
13
- * Cloud-storage adapters fall back to the JSON variant
14
- * (`NativeEntityIdMapperWrapper`) since they have no local-path concept.
15
- *
16
- * ## UUID format conversion
17
- *
18
- * Brainy passes UUIDs as strings (typically the canonical 36-char
19
- * `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`). The native side works in
20
- * 16-byte Buffers. This wrapper converts at the boundary. Non-canonical
21
- * UUID strings (any other 32-hex-digit form) are also accepted.
22
- *
23
- * ## Concurrency
24
- *
25
- * `getOrAssign` is atomic across concurrent callers for the same UUID
26
- * (256 sharded per-UUID mutexes in the native layer). Lookups are
27
- * lock-free. The wrapper holds no JS-side mutable state besides the
28
- * native handle.
29
- *
30
- * ## IdSpace (Piece 10)
31
- *
32
- * The wrapper supports two entity-int wire widths:
33
- *
34
- * - `'u32'` (default): cortex 2.x compatible — JS `number` throughout,
35
- * capped at 4.29 B entities. Persists in the legacy `int_to_uuid.bin`
36
- * v1 header.
37
- * - `'u64'`: opt-in via `idSpace: 'u64'`. The native layer's `number`
38
- * surface throws in this mode, so the wrapper transparently routes
39
- * the `EntityIdMapperProvider` methods through the BigInt napi
40
- * siblings and converts BigInt → number at the boundary. Entity ints
41
- * above `Number.MAX_SAFE_INTEGER` (2^53 - 1 = ~9 PB of entities)
42
- * throw a clear `EntityIdSpaceExceeded` error so callers get a loud
43
- * failure rather than a silent precision loss.
44
- *
45
- * The `getOrAssignBig` / `getIntBig` / `getUuidBig` / `sizeBig`
46
- * sibling methods are available on both modes and always return
47
- * `bigint` losslessly — use them in u64-aware code paths that need
48
- * the full u64 range.
49
- */
50
- import type { StorageAdapter } from '@soulcraft/brainy';
51
- import type { EntityIdMapperProvider } from '../providerContracts.js';
52
- export interface NativeBinaryEntityIdMapperOptions {
53
- /** Storage adapter — required for binary blob path resolution. */
54
- storage: StorageAdapter;
55
- /**
56
- * Override the relative path under storage for the uuid_to_int file.
57
- * Default `_id_mapper/uuid_to_int.mkv`.
58
- */
59
- uuidToIntKey?: string;
60
- /**
61
- * Override the relative path under storage for the int_to_uuid file.
62
- * Default `_id_mapper/int_to_uuid.bin`.
63
- */
64
- intToUuidKey?: string;
65
- /** Sparse file size for int_to_uuid. Default 32 GB. */
66
- intToUuidSize?: bigint;
67
- /** Sparse file size for uuid_to_int. Default 32 GB. */
68
- uuidToIntSize?: bigint;
69
- /** Bucket capacity in the MmapKv. Default 16. */
70
- bucketCapacity?: number;
71
- /** Maximum extendible-hash directory depth. Default 28. */
72
- maxGlobalDepth?: number;
73
- /**
74
- * Entity-int wire width. `'u32'` (default) caps at 4.29 B entities
75
- * and is cortex 2.x compatible. `'u64'` opts into the Piece 10 U64
76
- * IdSpace — required when targeting >4.29 B entities. The
77
- * `EntityIdMapperProvider` `number`-typed methods still work in U64
78
- * mode by routing through the BigInt napi siblings; entity ints
79
- * above `Number.MAX_SAFE_INTEGER` (2^53 - 1) throw an explicit
80
- * `EntityIdSpaceExceeded` error.
81
- *
82
- * Ignored on open when the underlying file's header disagrees: the
83
- * on-disk format wins and any mismatch is surfaced as a hard error.
84
- */
85
- idSpace?: 'u32' | 'u64';
86
- }
87
- /**
88
- * Thrown when a U64-mode mapper allocates or returns an entity int
89
- * above `Number.MAX_SAFE_INTEGER` (2^53 - 1). At this point a JS
90
- * `number` can no longer represent the value losslessly; callers must
91
- * switch to the BigInt sibling methods (`getOrAssignBig`,
92
- * `getIntBig`, `getUuidBig`).
93
- */
94
- export declare class EntityIdSpaceExceeded extends Error {
95
- /** The u64 entity int that exceeded the safe-integer ceiling. */
96
- readonly value: bigint;
97
- /** The method that was called (`'getOrAssign'`, `'getInt'`, etc.). */
98
- readonly method: string;
99
- constructor(method: string, value: bigint);
100
- }
101
- /**
102
- * Drop-in `EntityIdMapperProvider` backed by the native `BinaryIdMapper`.
103
- *
104
- * @example
105
- * ```typescript
106
- * const mapper = new NativeBinaryEntityIdMapperWrapper({ storage })
107
- * await mapper.init()
108
- * const intId = mapper.getOrAssign('12345678-1234-5678-1234-567812345678')
109
- * const uuid = mapper.getUuid(intId)
110
- * ```
111
- */
112
- export declare class NativeBinaryEntityIdMapperWrapper implements EntityIdMapperProvider {
113
- private storage;
114
- private uuidToIntKey;
115
- private intToUuidKey;
116
- private intToUuidSize;
117
- private uuidToIntSize;
118
- private bucketCapacity;
119
- private maxGlobalDepth;
120
- private requestedIdSpace;
121
- /**
122
- * The actual IdSpace of the open mapper, sourced from the native
123
- * binding's `idSpace()` reflection after `init()`. The on-disk
124
- * header wins over `requestedIdSpace` (which may be ignored at
125
- * `openExisting` time).
126
- */
127
- private resolvedIdSpace;
128
- private native;
129
- private initialized;
130
- constructor(options: NativeBinaryEntityIdMapperOptions);
131
- init(): Promise<void>;
132
- /**
133
- * Report the mapper's actual IdSpace mode. Returns `'u32'` before
134
- * `init()` (the default the wrapper assumes); after init, returns the
135
- * mode reported by the native binding (which is authoritative).
136
- */
137
- getIdSpace(): 'u32' | 'u64';
138
- /**
139
- * Allocate or retrieve the entity int for `uuid`. Returns a JS
140
- * `number`. In U64 mode, routes through the BigInt sibling and
141
- * throws {@link EntityIdSpaceExceeded} if the allocated int exceeds
142
- * `Number.MAX_SAFE_INTEGER` — at that point the caller MUST switch
143
- * to `getOrAssignBig` for the full u64 range.
144
- */
145
- getOrAssign(uuid: string): number;
146
- /**
147
- * Look up the UUID for `intId`. Accepts a JS `number` — in U64 mode
148
- * this is a lossy conversion above 2^53; use {@link getUuidBig} for
149
- * the full u64 range.
150
- */
151
- getUuid(intId: number): string | undefined;
152
- /**
153
- * Look up the entity int for `uuid`. Returns a JS `number`. In U64
154
- * mode throws {@link EntityIdSpaceExceeded} if the int exceeds
155
- * `Number.MAX_SAFE_INTEGER`.
156
- */
157
- getInt(uuid: string): number | undefined;
158
- remove(uuid: string): boolean;
159
- flush(): Promise<void>;
160
- clear(): Promise<void>;
161
- /**
162
- * Materialise every live int id into a JS `number[]`. **U32 mode
163
- * only.** U64 mode throws — the native binding refuses to allocate
164
- * a giant JS array at the scale a U64 brain implies. Iterate via
165
- * the BigInt sibling iterator (TBD — a follow-up surfaces it on the
166
- * wrapper) for U64 brains.
167
- */
168
- getAllIntIds(): number[];
169
- intsIterableToUuids(ints: Iterable<number>): string[];
170
- get size(): number;
171
- /**
172
- * Allocate or retrieve the entity int for `uuid` as a `bigint`.
173
- * Lossless across the full u64 range; safe to call in either mode.
174
- */
175
- getOrAssignBig(uuid: string): bigint;
176
- /** Look up the entity int for `uuid` as a `bigint`. */
177
- getIntBig(uuid: string): bigint | undefined;
178
- /** Look up the UUID for `int` (passed as a `bigint`). */
179
- getUuidBig(int: bigint): string | undefined;
180
- /** Live (non-tombstone) entry count as a `bigint`. */
181
- sizeBig(): bigint;
182
- /** Largest int ever assigned + 1, as a `bigint`. */
183
- nextIntBig(): bigint;
184
- /**
185
- * Encode a UUID string into a 16-byte Buffer. Accepts canonical
186
- * 36-char form (with hyphens) or any 32-hex-digit form. Throws on
187
- * malformed input.
188
- */
189
- private encode;
190
- /** Decode a 16-byte Buffer back to canonical UUID string. */
191
- private decode;
192
- private ensure;
193
- }
194
- //# sourceMappingURL=nativeBinaryEntityIdMapper.d.ts.map
@@ -1,358 +0,0 @@
1
- /**
2
- * @module utils/nativeBinaryEntityIdMapper
3
- * @description TypeScript wrapper around cortex's native binary
4
- * `BinaryIdMapper`. Implements brainy's `EntityIdMapperProvider` so the
5
- * mmap-backed billion-scale mapper is a drop-in for the existing
6
- * JSON-persisted one.
7
- *
8
- * ## When this engages
9
- *
10
- * The cortex plugin registers this wrapper as the `'entityIdMapper'`
11
- * provider when the storage adapter exposes `getBinaryBlobPath()` (i.e.
12
- * filesystem-backed storage with cortex's 2.4.0 #2 mmap-vector layer).
13
- * Cloud-storage adapters fall back to the JSON variant
14
- * (`NativeEntityIdMapperWrapper`) since they have no local-path concept.
15
- *
16
- * ## UUID format conversion
17
- *
18
- * Brainy passes UUIDs as strings (typically the canonical 36-char
19
- * `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`). The native side works in
20
- * 16-byte Buffers. This wrapper converts at the boundary. Non-canonical
21
- * UUID strings (any other 32-hex-digit form) are also accepted.
22
- *
23
- * ## Concurrency
24
- *
25
- * `getOrAssign` is atomic across concurrent callers for the same UUID
26
- * (256 sharded per-UUID mutexes in the native layer). Lookups are
27
- * lock-free. The wrapper holds no JS-side mutable state besides the
28
- * native handle.
29
- *
30
- * ## IdSpace (Piece 10)
31
- *
32
- * The wrapper supports two entity-int wire widths:
33
- *
34
- * - `'u32'` (default): cortex 2.x compatible — JS `number` throughout,
35
- * capped at 4.29 B entities. Persists in the legacy `int_to_uuid.bin`
36
- * v1 header.
37
- * - `'u64'`: opt-in via `idSpace: 'u64'`. The native layer's `number`
38
- * surface throws in this mode, so the wrapper transparently routes
39
- * the `EntityIdMapperProvider` methods through the BigInt napi
40
- * siblings and converts BigInt → number at the boundary. Entity ints
41
- * above `Number.MAX_SAFE_INTEGER` (2^53 - 1 = ~9 PB of entities)
42
- * throw a clear `EntityIdSpaceExceeded` error so callers get a loud
43
- * failure rather than a silent precision loss.
44
- *
45
- * The `getOrAssignBig` / `getIntBig` / `getUuidBig` / `sizeBig`
46
- * sibling methods are available on both modes and always return
47
- * `bigint` losslessly — use them in u64-aware code paths that need
48
- * the full u64 range.
49
- */
50
- import { existsSync } from 'node:fs';
51
- import { loadNativeModule } from '../native/index.js';
52
- import { prodLog } from '@soulcraft/brainy/internals';
53
- const UUID_BYTES = 16;
54
- /**
55
- * Thrown when a U64-mode mapper allocates or returns an entity int
56
- * above `Number.MAX_SAFE_INTEGER` (2^53 - 1). At this point a JS
57
- * `number` can no longer represent the value losslessly; callers must
58
- * switch to the BigInt sibling methods (`getOrAssignBig`,
59
- * `getIntBig`, `getUuidBig`).
60
- */
61
- export class EntityIdSpaceExceeded extends Error {
62
- /** The u64 entity int that exceeded the safe-integer ceiling. */
63
- value;
64
- /** The method that was called (`'getOrAssign'`, `'getInt'`, etc.). */
65
- method;
66
- constructor(method, value) {
67
- super(`${method}: entity int ${value} exceeds Number.MAX_SAFE_INTEGER ` +
68
- `(2^53 - 1). Switch to the BigInt sibling method (${method}Big) ` +
69
- `for entity ints above 9.007 PB.`);
70
- this.name = 'EntityIdSpaceExceeded';
71
- this.method = method;
72
- this.value = value;
73
- }
74
- }
75
- const MAX_SAFE_INTEGER_BIG = BigInt(Number.MAX_SAFE_INTEGER);
76
- /**
77
- * Convert a `bigint` entity int to a JS `number`. Throws
78
- * {@link EntityIdSpaceExceeded} if the value exceeds the JS safe-integer
79
- * range.
80
- */
81
- function bigToSafeNumber(value, method) {
82
- if (value > MAX_SAFE_INTEGER_BIG) {
83
- throw new EntityIdSpaceExceeded(method, value);
84
- }
85
- return Number(value);
86
- }
87
- const DEFAULT_UUID_TO_INT_KEY = '_id_mapper/uuid_to_int.mkv';
88
- const DEFAULT_INT_TO_UUID_KEY = '_id_mapper/int_to_uuid.bin';
89
- /**
90
- * Drop-in `EntityIdMapperProvider` backed by the native `BinaryIdMapper`.
91
- *
92
- * @example
93
- * ```typescript
94
- * const mapper = new NativeBinaryEntityIdMapperWrapper({ storage })
95
- * await mapper.init()
96
- * const intId = mapper.getOrAssign('12345678-1234-5678-1234-567812345678')
97
- * const uuid = mapper.getUuid(intId)
98
- * ```
99
- */
100
- export class NativeBinaryEntityIdMapperWrapper {
101
- storage;
102
- uuidToIntKey;
103
- intToUuidKey;
104
- intToUuidSize;
105
- uuidToIntSize;
106
- bucketCapacity;
107
- maxGlobalDepth;
108
- requestedIdSpace;
109
- /**
110
- * The actual IdSpace of the open mapper, sourced from the native
111
- * binding's `idSpace()` reflection after `init()`. The on-disk
112
- * header wins over `requestedIdSpace` (which may be ignored at
113
- * `openExisting` time).
114
- */
115
- resolvedIdSpace = 'u32';
116
- native = null;
117
- initialized = false;
118
- constructor(options) {
119
- this.storage = options.storage;
120
- this.uuidToIntKey = options.uuidToIntKey ?? DEFAULT_UUID_TO_INT_KEY;
121
- this.intToUuidKey = options.intToUuidKey ?? DEFAULT_INT_TO_UUID_KEY;
122
- this.intToUuidSize = options.intToUuidSize ?? BigInt(32) * BigInt(1024) ** BigInt(3);
123
- this.uuidToIntSize = options.uuidToIntSize ?? BigInt(32) * BigInt(1024) ** BigInt(3);
124
- this.bucketCapacity = options.bucketCapacity ?? 16;
125
- this.maxGlobalDepth = options.maxGlobalDepth ?? 28;
126
- this.requestedIdSpace = options.idSpace ?? 'u32';
127
- }
128
- async init() {
129
- if (this.initialized)
130
- return;
131
- const storage = this.storage;
132
- if (!storage.getBinaryBlobPath) {
133
- throw new Error('NativeBinaryEntityIdMapperWrapper requires a storage adapter that ' +
134
- 'exposes getBinaryBlobPath() (filesystem-backed). For cloud adapters, ' +
135
- 'use NativeEntityIdMapperWrapper (JSON variant) instead.');
136
- }
137
- const uuidToIntPath = storage.getBinaryBlobPath(this.uuidToIntKey);
138
- const intToUuidPath = storage.getBinaryBlobPath(this.intToUuidKey);
139
- if (!uuidToIntPath || !intToUuidPath) {
140
- throw new Error(`NativeBinaryEntityIdMapperWrapper: getBinaryBlobPath returned null for ` +
141
- `${this.uuidToIntKey} or ${this.intToUuidKey}`);
142
- }
143
- const bindings = loadNativeModule();
144
- const NativeBinaryIdMapper = bindings.NativeBinaryIdMapper;
145
- if (!NativeBinaryIdMapper) {
146
- throw new Error('NativeBinaryIdMapper binding missing from cortex native module — ' +
147
- 'this build of cortex is older than the BinaryIdMapper feature');
148
- }
149
- const config = {
150
- uuidToIntPath,
151
- intToUuidPath,
152
- intToUuidSize: this.intToUuidSize,
153
- uuidToIntSize: this.uuidToIntSize,
154
- bucketCapacity: this.bucketCapacity,
155
- maxGlobalDepth: this.maxGlobalDepth,
156
- idSpace: this.requestedIdSpace,
157
- };
158
- // Explicitly distinguish "fresh install" from "existing files".
159
- // Both files must exist together (paired write semantics) — a
160
- // half-present state is corruption from a crash between file
161
- // creations and is surfaced as an error rather than silently
162
- // recreated.
163
- const uuidFileExists = existsSync(uuidToIntPath);
164
- const intFileExists = existsSync(intToUuidPath);
165
- if (uuidFileExists && intFileExists) {
166
- this.native = NativeBinaryIdMapper.openExisting(config);
167
- }
168
- else if (!uuidFileExists && !intFileExists) {
169
- this.native = NativeBinaryIdMapper.create(config);
170
- }
171
- else {
172
- throw new Error(`NativeBinaryEntityIdMapperWrapper: half-present file pair — ` +
173
- `${this.uuidToIntKey} ${uuidFileExists ? 'exists' : 'missing'}, ` +
174
- `${this.intToUuidKey} ${intFileExists ? 'exists' : 'missing'}. ` +
175
- `Refusing to silently recreate; investigate manually.`);
176
- }
177
- // Reflect the on-disk IdSpace — authoritative over the requested
178
- // value when openExisting opens a file with a different mode.
179
- this.resolvedIdSpace = this.native.idSpace();
180
- this.initialized = true;
181
- if (prodLog?.debug) {
182
- prodLog.debug(`[cortex] BinaryIdMapper wired: paths=[${uuidToIntPath}, ${intToUuidPath}], idSpace=${this.resolvedIdSpace}`);
183
- }
184
- }
185
- /**
186
- * Report the mapper's actual IdSpace mode. Returns `'u32'` before
187
- * `init()` (the default the wrapper assumes); after init, returns the
188
- * mode reported by the native binding (which is authoritative).
189
- */
190
- getIdSpace() {
191
- return this.resolvedIdSpace;
192
- }
193
- // -- EntityIdMapperProvider surface (number-typed, brainy contract) --
194
- /**
195
- * Allocate or retrieve the entity int for `uuid`. Returns a JS
196
- * `number`. In U64 mode, routes through the BigInt sibling and
197
- * throws {@link EntityIdSpaceExceeded} if the allocated int exceeds
198
- * `Number.MAX_SAFE_INTEGER` — at that point the caller MUST switch
199
- * to `getOrAssignBig` for the full u64 range.
200
- */
201
- getOrAssign(uuid) {
202
- const native = this.ensure();
203
- if (this.resolvedIdSpace === 'u64') {
204
- return bigToSafeNumber(native.getOrAssignBig(this.encode(uuid)), 'getOrAssign');
205
- }
206
- return native.getOrAssign(this.encode(uuid));
207
- }
208
- /**
209
- * Look up the UUID for `intId`. Accepts a JS `number` — in U64 mode
210
- * this is a lossy conversion above 2^53; use {@link getUuidBig} for
211
- * the full u64 range.
212
- */
213
- getUuid(intId) {
214
- const native = this.ensure();
215
- const buf = this.resolvedIdSpace === 'u64'
216
- ? native.getUuidBig(BigInt(intId))
217
- : native.getUuid(intId);
218
- if (!buf)
219
- return undefined;
220
- return this.decode(buf);
221
- }
222
- /**
223
- * Look up the entity int for `uuid`. Returns a JS `number`. In U64
224
- * mode throws {@link EntityIdSpaceExceeded} if the int exceeds
225
- * `Number.MAX_SAFE_INTEGER`.
226
- */
227
- getInt(uuid) {
228
- const native = this.ensure();
229
- if (this.resolvedIdSpace === 'u64') {
230
- const big = native.getIntBig(this.encode(uuid));
231
- return big == null ? undefined : bigToSafeNumber(big, 'getInt');
232
- }
233
- const out = native.getInt(this.encode(uuid));
234
- return out == null ? undefined : out;
235
- }
236
- remove(uuid) {
237
- const native = this.ensure();
238
- return native.remove(this.encode(uuid));
239
- }
240
- async flush() {
241
- const native = this.ensure();
242
- native.flush();
243
- }
244
- async clear() {
245
- // Reset by recreating the files. Atomicity caveat: any concurrent
246
- // reader holds a stale mmap. Brainy calls clear() during clear()
247
- // operations that already block other access; this is fine.
248
- this.initialized = false;
249
- this.native = null;
250
- await this.init();
251
- }
252
- /**
253
- * Materialise every live int id into a JS `number[]`. **U32 mode
254
- * only.** U64 mode throws — the native binding refuses to allocate
255
- * a giant JS array at the scale a U64 brain implies. Iterate via
256
- * the BigInt sibling iterator (TBD — a follow-up surfaces it on the
257
- * wrapper) for U64 brains.
258
- */
259
- getAllIntIds() {
260
- const native = this.ensure();
261
- if (this.resolvedIdSpace === 'u64') {
262
- throw new Error('getAllIntIds: not supported in U64 mode — the materialised ' +
263
- 'number[] would risk OOM at billion scale. Use the BigInt ' +
264
- 'streaming iterator on the underlying native binding instead.');
265
- }
266
- return native.getAllIntIds();
267
- }
268
- intsIterableToUuids(ints) {
269
- const native = this.ensure();
270
- const u64 = this.resolvedIdSpace === 'u64';
271
- const out = [];
272
- for (const i of ints) {
273
- const buf = u64 ? native.getUuidBig(BigInt(i)) : native.getUuid(i);
274
- if (buf)
275
- out.push(this.decode(buf));
276
- }
277
- return out;
278
- }
279
- get size() {
280
- if (!this.initialized || !this.native)
281
- return 0;
282
- if (this.resolvedIdSpace === 'u64') {
283
- return bigToSafeNumber(this.native.sizeBig(), 'size');
284
- }
285
- return this.native.size();
286
- }
287
- // -- BigInt sibling surface (u64-safe, works in both modes) -------
288
- /**
289
- * Allocate or retrieve the entity int for `uuid` as a `bigint`.
290
- * Lossless across the full u64 range; safe to call in either mode.
291
- */
292
- getOrAssignBig(uuid) {
293
- const native = this.ensure();
294
- return native.getOrAssignBig(this.encode(uuid));
295
- }
296
- /** Look up the entity int for `uuid` as a `bigint`. */
297
- getIntBig(uuid) {
298
- const native = this.ensure();
299
- const out = native.getIntBig(this.encode(uuid));
300
- return out == null ? undefined : out;
301
- }
302
- /** Look up the UUID for `int` (passed as a `bigint`). */
303
- getUuidBig(int) {
304
- const native = this.ensure();
305
- const buf = native.getUuidBig(int);
306
- if (!buf)
307
- return undefined;
308
- return this.decode(buf);
309
- }
310
- /** Live (non-tombstone) entry count as a `bigint`. */
311
- sizeBig() {
312
- if (!this.initialized || !this.native)
313
- return 0n;
314
- return this.native.sizeBig();
315
- }
316
- /** Largest int ever assigned + 1, as a `bigint`. */
317
- nextIntBig() {
318
- return this.ensure().nextIntBig();
319
- }
320
- // ---------------------------------------------------------------
321
- // UUID string ↔ Buffer conversion
322
- // ---------------------------------------------------------------
323
- /**
324
- * Encode a UUID string into a 16-byte Buffer. Accepts canonical
325
- * 36-char form (with hyphens) or any 32-hex-digit form. Throws on
326
- * malformed input.
327
- */
328
- encode(uuid) {
329
- const hex = uuid.replace(/-/g, '').toLowerCase();
330
- if (hex.length !== 32 || !/^[0-9a-f]{32}$/.test(hex)) {
331
- throw new Error(`NativeBinaryEntityIdMapperWrapper: invalid UUID string "${uuid}"`);
332
- }
333
- return Buffer.from(hex, 'hex');
334
- }
335
- /** Decode a 16-byte Buffer back to canonical UUID string. */
336
- decode(buf) {
337
- if (buf.length !== UUID_BYTES) {
338
- throw new Error(`NativeBinaryEntityIdMapperWrapper: native returned ${buf.length}-byte uuid (expected ${UUID_BYTES})`);
339
- }
340
- const hex = buf.toString('hex');
341
- return (hex.slice(0, 8) +
342
- '-' +
343
- hex.slice(8, 12) +
344
- '-' +
345
- hex.slice(12, 16) +
346
- '-' +
347
- hex.slice(16, 20) +
348
- '-' +
349
- hex.slice(20, 32));
350
- }
351
- ensure() {
352
- if (!this.initialized || !this.native) {
353
- throw new Error('NativeBinaryEntityIdMapperWrapper: call init() before any operation');
354
- }
355
- return this.native;
356
- }
357
- }
358
- //# sourceMappingURL=nativeBinaryEntityIdMapper.js.map