@soulcraft/cortex 2.4.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,161 @@
1
+ /**
2
+ * @module hnsw/NativeDiskAnnWrapper
3
+ * @description TypeScript wrapper around cortex's native DiskANN engine
4
+ * that satisfies brainy's `HnswProvider` contract. From brainy's
5
+ * perspective this is interchangeable with `NativeHNSWWrapper` — same
6
+ * `addItem` / `search` / `rebuild` surface — but underneath it drives
7
+ * the billion-scale Vamana + PQ index.
8
+ *
9
+ * @example
10
+ * ```typescript
11
+ * import { BrainyData } from '@soulcraft/brainy'
12
+ * import { register as registerCortex } from '@soulcraft/cortex'
13
+ *
14
+ * const brain = new BrainyData({
15
+ * storage: { type: 'filesystem', rootDirectory: '/data/idx' }
16
+ * })
17
+ * await registerCortex(brain)
18
+ * await brain.init() // [brainy] DiskANN engaged (path=..., dim=384)
19
+ *
20
+ * await brain.add({ data: 'native rust acceleration', type: 'concept' })
21
+ * const hits = await brain.search('billion scale ann', 10)
22
+ * ```
23
+ *
24
+ * @example
25
+ * ```typescript
26
+ * // Explicit billion-scale build config
27
+ * const brain = new BrainyData({
28
+ * storage: { type: 'filesystem', rootDirectory: '/data/idx' },
29
+ * index: {
30
+ * type: 'diskann',
31
+ * diskann: {
32
+ * pqM: 16,
33
+ * maxDegree: 64,
34
+ * searchListSize: 100,
35
+ * useMmapAdjacency: true, // required >100M nodes
36
+ * mmapAdjacencyPath: '/data/scratch/diskann-build.adj'
37
+ * }
38
+ * }
39
+ * })
40
+ * ```
41
+ *
42
+ * ## Operating model
43
+ *
44
+ * DiskANN is build-once, query-many by design: the on-disk file
45
+ * embeds the Vamana graph, PQ codebook, codes, and full vectors in a
46
+ * single contiguous mmap-able layout. Dynamic insertions go to a
47
+ * small **delta buffer** that brute-force-searches alongside the main
48
+ * index until the next `rebuild()` folds them in. This matches
49
+ * FreshDiskANN's published online-update model.
50
+ *
51
+ * ## Search path
52
+ *
53
+ * 1. Query the main index via the native DiskANN searcher: PQ-greedy
54
+ * walk in RAM, full-vector re-rank on the candidate set.
55
+ * 2. Brute-force the delta buffer (typically <0.1% of total size after
56
+ * a recent rebuild).
57
+ * 3. Merge + sort + truncate to `k`.
58
+ *
59
+ * ## When this wrapper engages
60
+ *
61
+ * Brainy's `wireDiskAnn()` decides at init time whether to instantiate
62
+ * this wrapper or the standard HNSW one. The criteria
63
+ * ([ADR-002](../../docs/ADR-002-diskann-100-percent-rust.md)):
64
+ * - Cortex's `index:diskann` provider is registered (this file).
65
+ * - The storage adapter exposes a local filesystem path
66
+ * (`getBinaryBlobPath` is the canonical check).
67
+ * - The metadata index has a stable `idMapper` (the cortex 2.4.0 #23
68
+ * foundation).
69
+ * - `config.index.type !== 'hnsw'` (opt-out path).
70
+ */
71
+ import type { Vector, VectorDocument, DistanceFunction, StorageAdapter } from '@soulcraft/brainy';
72
+ import type { HnswProvider } from '../providerContracts.js';
73
+ export interface DiskAnnIndexConfig {
74
+ /** Vector dimension (e.g. 384 for all-MiniLM-L6-v2). */
75
+ dimensions: number;
76
+ /** Output path for the on-disk DiskANN file. */
77
+ indexPath: string;
78
+ /** PQ subspaces. Default 16. dim must be divisible by m. */
79
+ pqM?: number;
80
+ /** Centroids per subspace. Default 256 (8-bit codes). */
81
+ pqKsub?: number;
82
+ /** Vamana max degree (R). Default 64. */
83
+ maxDegree?: number;
84
+ /** Build-time candidate list size (L). Default 100. */
85
+ searchListSize?: number;
86
+ /** α-pruning density factor. Default 1.2. */
87
+ alpha?: number;
88
+ /** Default search-time candidate list size. `2*k` is a good baseline. */
89
+ defaultLSearch?: number;
90
+ /** Default padding factor for re-rank over-fetch. Default 1.2. */
91
+ defaultPaddingFactor?: number;
92
+ /** Use a file-backed adjacency during build. Required >~100M nodes. */
93
+ useMmapAdjacency?: boolean;
94
+ /** Scratch file path when `useMmapAdjacency` is true. */
95
+ mmapAdjacencyPath?: string;
96
+ }
97
+ export declare class NativeDiskAnnWrapper implements HnswProvider {
98
+ private config;
99
+ private distanceFunction;
100
+ private storage;
101
+ private persistMode;
102
+ /** Live searcher instance — null until the first build. */
103
+ private native;
104
+ /** Newly added entries since the last build. Brute-force searched. */
105
+ private delta;
106
+ /** Removed entries — filtered out at search time. */
107
+ private tombstones;
108
+ /** Bidirectional UUID ↔ slot map for the main index. */
109
+ private slotByUuid;
110
+ private uuidBySlot;
111
+ constructor(config: DiskAnnIndexConfig & {
112
+ distanceFunction?: DistanceFunction;
113
+ }, distanceFunction: DistanceFunction, options?: {
114
+ storage?: StorageAdapter | null;
115
+ persistMode?: 'immediate' | 'deferred';
116
+ });
117
+ /**
118
+ * Append an entry to the delta buffer. Persisted by the next
119
+ * `rebuild()` call, which folds the delta into the main index.
120
+ */
121
+ addItem(item: VectorDocument): Promise<string>;
122
+ /**
123
+ * Mark an entry as removed. Filtered out at search time; physically
124
+ * removed at the next `rebuild()`.
125
+ */
126
+ removeItem(id: string): Promise<boolean>;
127
+ search(queryVector: Vector, k?: number, filter?: (id: string) => Promise<boolean>, options?: {
128
+ rerank?: {
129
+ multiplier: number;
130
+ };
131
+ candidateIds?: string[];
132
+ }): Promise<Array<[string, number]>>;
133
+ size(): number;
134
+ clear(): void;
135
+ /**
136
+ * Rebuild the main index from scratch: concatenate (current main −
137
+ * tombstones) ∪ delta, run a full DiskANN build, swap the searcher
138
+ * atomically.
139
+ *
140
+ * At billion-scale this is the expensive operation (hours of build
141
+ * time). Operators schedule it during off-peak; the delta buffer
142
+ * absorbs writes in between.
143
+ */
144
+ rebuild(options?: {
145
+ pqM?: number;
146
+ pqKsub?: number;
147
+ maxDegree?: number;
148
+ searchListSize?: number;
149
+ alpha?: number;
150
+ }): Promise<void>;
151
+ /**
152
+ * Flush the delta buffer to disk. For DiskANN the delta is in-memory
153
+ * by design (a few MB at most between rebuilds); returns the buffer
154
+ * size for parity with HNSW's flush contract.
155
+ */
156
+ flush(): Promise<number>;
157
+ getPersistMode(): 'immediate' | 'deferred';
158
+ private tryOpenExisting;
159
+ private countMainTombstones;
160
+ }
161
+ //# sourceMappingURL=NativeDiskAnnWrapper.d.ts.map
@@ -0,0 +1,286 @@
1
+ /**
2
+ * @module hnsw/NativeDiskAnnWrapper
3
+ * @description TypeScript wrapper around cortex's native DiskANN engine
4
+ * that satisfies brainy's `HnswProvider` contract. From brainy's
5
+ * perspective this is interchangeable with `NativeHNSWWrapper` — same
6
+ * `addItem` / `search` / `rebuild` surface — but underneath it drives
7
+ * the billion-scale Vamana + PQ index.
8
+ *
9
+ * @example
10
+ * ```typescript
11
+ * import { BrainyData } from '@soulcraft/brainy'
12
+ * import { register as registerCortex } from '@soulcraft/cortex'
13
+ *
14
+ * const brain = new BrainyData({
15
+ * storage: { type: 'filesystem', rootDirectory: '/data/idx' }
16
+ * })
17
+ * await registerCortex(brain)
18
+ * await brain.init() // [brainy] DiskANN engaged (path=..., dim=384)
19
+ *
20
+ * await brain.add({ data: 'native rust acceleration', type: 'concept' })
21
+ * const hits = await brain.search('billion scale ann', 10)
22
+ * ```
23
+ *
24
+ * @example
25
+ * ```typescript
26
+ * // Explicit billion-scale build config
27
+ * const brain = new BrainyData({
28
+ * storage: { type: 'filesystem', rootDirectory: '/data/idx' },
29
+ * index: {
30
+ * type: 'diskann',
31
+ * diskann: {
32
+ * pqM: 16,
33
+ * maxDegree: 64,
34
+ * searchListSize: 100,
35
+ * useMmapAdjacency: true, // required >100M nodes
36
+ * mmapAdjacencyPath: '/data/scratch/diskann-build.adj'
37
+ * }
38
+ * }
39
+ * })
40
+ * ```
41
+ *
42
+ * ## Operating model
43
+ *
44
+ * DiskANN is build-once, query-many by design: the on-disk file
45
+ * embeds the Vamana graph, PQ codebook, codes, and full vectors in a
46
+ * single contiguous mmap-able layout. Dynamic insertions go to a
47
+ * small **delta buffer** that brute-force-searches alongside the main
48
+ * index until the next `rebuild()` folds them in. This matches
49
+ * FreshDiskANN's published online-update model.
50
+ *
51
+ * ## Search path
52
+ *
53
+ * 1. Query the main index via the native DiskANN searcher: PQ-greedy
54
+ * walk in RAM, full-vector re-rank on the candidate set.
55
+ * 2. Brute-force the delta buffer (typically <0.1% of total size after
56
+ * a recent rebuild).
57
+ * 3. Merge + sort + truncate to `k`.
58
+ *
59
+ * ## When this wrapper engages
60
+ *
61
+ * Brainy's `wireDiskAnn()` decides at init time whether to instantiate
62
+ * this wrapper or the standard HNSW one. The criteria
63
+ * ([ADR-002](../../docs/ADR-002-diskann-100-percent-rust.md)):
64
+ * - Cortex's `index:diskann` provider is registered (this file).
65
+ * - The storage adapter exposes a local filesystem path
66
+ * (`getBinaryBlobPath` is the canonical check).
67
+ * - The metadata index has a stable `idMapper` (the cortex 2.4.0 #23
68
+ * foundation).
69
+ * - `config.index.type !== 'hnsw'` (opt-out path).
70
+ */
71
+ import { loadNativeModule } from '../native/index.js';
72
+ import { prodLog } from '@soulcraft/brainy/internals';
73
+ const DEFAULTS = {
74
+ pqM: 16,
75
+ pqKsub: 256,
76
+ maxDegree: 64,
77
+ searchListSize: 100,
78
+ alpha: 1.2,
79
+ defaultLSearch: 100,
80
+ defaultPaddingFactor: 1.2,
81
+ useMmapAdjacency: false,
82
+ };
83
+ export class NativeDiskAnnWrapper {
84
+ config;
85
+ distanceFunction;
86
+ storage;
87
+ persistMode;
88
+ /** Live searcher instance — null until the first build. */
89
+ native = null;
90
+ /** Newly added entries since the last build. Brute-force searched. */
91
+ delta = new Map();
92
+ /** Removed entries — filtered out at search time. */
93
+ tombstones = new Set();
94
+ /** Bidirectional UUID ↔ slot map for the main index. */
95
+ slotByUuid = new Map();
96
+ uuidBySlot = new Map();
97
+ constructor(config, distanceFunction, options = {}) {
98
+ this.config = { ...DEFAULTS, ...config };
99
+ this.distanceFunction = distanceFunction;
100
+ this.storage = options.storage ?? null;
101
+ this.persistMode = options.persistMode ?? 'immediate';
102
+ // Try to open an existing file. If absent, the index stays
103
+ // empty until the first rebuild() flushes the delta buffer.
104
+ this.tryOpenExisting();
105
+ }
106
+ /**
107
+ * Append an entry to the delta buffer. Persisted by the next
108
+ * `rebuild()` call, which folds the delta into the main index.
109
+ */
110
+ async addItem(item) {
111
+ if (this.tombstones.has(item.id)) {
112
+ this.tombstones.delete(item.id);
113
+ }
114
+ this.delta.set(item.id, item.vector);
115
+ return item.id;
116
+ }
117
+ /**
118
+ * Mark an entry as removed. Filtered out at search time; physically
119
+ * removed at the next `rebuild()`.
120
+ */
121
+ async removeItem(id) {
122
+ const inDelta = this.delta.delete(id);
123
+ const inMain = this.slotByUuid.has(id);
124
+ if (inMain)
125
+ this.tombstones.add(id);
126
+ return inDelta || inMain;
127
+ }
128
+ async search(queryVector, k = 10, filter, options) {
129
+ const lSearch = Math.max(this.config.defaultLSearch, k * 2);
130
+ const padding = options?.rerank?.multiplier ?? this.config.defaultPaddingFactor;
131
+ // 1. Main-index PQ-greedy walk (returns slot ids).
132
+ const mainHits = this.native
133
+ ? this.native.search(Array.from(queryVector), k * 2, // over-fetch so filter / tombstone losses don't starve final result
134
+ lSearch, padding)
135
+ : [];
136
+ // 2. Hydrate slot → uuid; drop tombstoned + filter-rejected.
137
+ const merged = [];
138
+ for (const hit of mainHits) {
139
+ const uuid = this.uuidBySlot.get(hit.slot);
140
+ if (!uuid)
141
+ continue;
142
+ if (this.tombstones.has(uuid))
143
+ continue;
144
+ if (filter && !(await filter(uuid)))
145
+ continue;
146
+ merged.push([uuid, hit.distance]);
147
+ }
148
+ // 3. Brute-force the delta buffer.
149
+ for (const [id, v] of this.delta) {
150
+ if (filter && !(await filter(id)))
151
+ continue;
152
+ const d = this.distanceFunction(queryVector, v);
153
+ merged.push([id, d]);
154
+ }
155
+ // 4. Sort ascending by distance, truncate to k.
156
+ merged.sort((a, b) => a[1] - b[1]);
157
+ return merged.slice(0, k);
158
+ }
159
+ size() {
160
+ const mainSize = this.native ? this.native.size() : 0;
161
+ return (mainSize +
162
+ this.delta.size -
163
+ // Tombstones from the main index reduce effective size.
164
+ this.countMainTombstones());
165
+ }
166
+ clear() {
167
+ this.delta.clear();
168
+ this.tombstones.clear();
169
+ this.slotByUuid.clear();
170
+ this.uuidBySlot.clear();
171
+ this.native = null;
172
+ }
173
+ /**
174
+ * Rebuild the main index from scratch: concatenate (current main −
175
+ * tombstones) ∪ delta, run a full DiskANN build, swap the searcher
176
+ * atomically.
177
+ *
178
+ * At billion-scale this is the expensive operation (hours of build
179
+ * time). Operators schedule it during off-peak; the delta buffer
180
+ * absorbs writes in between.
181
+ */
182
+ async rebuild(options) {
183
+ const bindings = loadNativeModule();
184
+ const NativeDiskANN = bindings.NativeDiskANN;
185
+ if (!NativeDiskANN) {
186
+ throw new Error('NativeDiskANN binding missing — rebuild requires the cortex native module');
187
+ }
188
+ // Collect the surviving vector set: main minus tombstones, plus delta.
189
+ const allVectors = [];
190
+ if (this.native) {
191
+ // Iterate current main index. The native side doesn't expose a
192
+ // vector iterator yet (35c follow-up), so we replay the
193
+ // delta+tombstones model: callers building from scratch should
194
+ // pass a fresh storage source. For now: rebuild from delta only.
195
+ // TODO once NativeDiskANN.iterAll() lands, fold the main index
196
+ // into allVectors here.
197
+ }
198
+ for (const [id, vector] of this.delta) {
199
+ allVectors.push({ id, vector });
200
+ }
201
+ if (allVectors.length === 0) {
202
+ prodLog?.warn?.('NativeDiskAnnWrapper.rebuild: nothing to build');
203
+ return;
204
+ }
205
+ const dim = this.config.dimensions;
206
+ const buf = new Float32Array(allVectors.length * dim);
207
+ const newSlotByUuid = new Map();
208
+ const newUuidBySlot = new Map();
209
+ for (let i = 0; i < allVectors.length; i++) {
210
+ const v = allVectors[i].vector;
211
+ if (v.length !== dim) {
212
+ throw new Error(`NativeDiskAnnWrapper.rebuild: vector dim ${v.length} ≠ index dim ${dim}`);
213
+ }
214
+ buf.set(v, i * dim);
215
+ newSlotByUuid.set(allVectors[i].id, i);
216
+ newUuidBySlot.set(i, allVectors[i].id);
217
+ }
218
+ const cfg = {
219
+ vamana: {
220
+ maxDegree: options?.maxDegree ?? this.config.maxDegree,
221
+ searchListSize: options?.searchListSize ?? this.config.searchListSize,
222
+ alpha: options?.alpha ?? this.config.alpha,
223
+ seed: BigInt(0xd15ca4440ffff00dn),
224
+ parallel: true,
225
+ parallelBatch: 64,
226
+ },
227
+ pq: {
228
+ m: options?.pqM ?? this.config.pqM,
229
+ ksub: options?.pqKsub ?? this.config.pqKsub,
230
+ iterations: 25,
231
+ trainingSample: Math.min(200_000, allVectors.length),
232
+ },
233
+ adjacency: this.config.useMmapAdjacency
234
+ ? {
235
+ kind: 'mmap',
236
+ mmapPath: this.config.mmapAdjacencyPath ?? `${this.config.indexPath}.adj`,
237
+ }
238
+ : { kind: 'ram' },
239
+ };
240
+ const newNative = NativeDiskANN.build(Buffer.from(buf.buffer, buf.byteOffset, buf.byteLength), dim, this.config.indexPath, cfg);
241
+ // Atomic swap: replace the searcher + the slot maps, drop tombstones
242
+ // (they're already applied — the rebuilt set excludes them).
243
+ this.native = newNative;
244
+ this.slotByUuid = newSlotByUuid;
245
+ this.uuidBySlot = newUuidBySlot;
246
+ this.delta.clear();
247
+ this.tombstones.clear();
248
+ }
249
+ /**
250
+ * Flush the delta buffer to disk. For DiskANN the delta is in-memory
251
+ * by design (a few MB at most between rebuilds); returns the buffer
252
+ * size for parity with HNSW's flush contract.
253
+ */
254
+ async flush() {
255
+ return this.delta.size;
256
+ }
257
+ getPersistMode() {
258
+ return this.persistMode;
259
+ }
260
+ tryOpenExisting() {
261
+ try {
262
+ const bindings = loadNativeModule();
263
+ const NativeDiskANN = bindings.NativeDiskANN;
264
+ if (!NativeDiskANN)
265
+ return;
266
+ this.native = NativeDiskANN.openExisting(this.config.indexPath);
267
+ // Populate slot maps from the storage adapter — these are persisted
268
+ // alongside the index file in production. For 35c we read from a
269
+ // sibling `.slots.json` that rebuild() writes.
270
+ // (Stub for now; the real path lands when storage integration ships.)
271
+ }
272
+ catch {
273
+ // No existing file — index stays empty until first rebuild().
274
+ this.native = null;
275
+ }
276
+ }
277
+ countMainTombstones() {
278
+ let n = 0;
279
+ for (const uuid of this.tombstones) {
280
+ if (this.slotByUuid.has(uuid))
281
+ n++;
282
+ }
283
+ return n;
284
+ }
285
+ }
286
+ //# sourceMappingURL=NativeDiskAnnWrapper.js.map
@@ -30,6 +30,7 @@ export declare class NativeHNSWWrapper implements HnswProvider {
30
30
  private unifiedCache;
31
31
  private cowEnabled;
32
32
  private mmapStore;
33
+ private connectionsCodec;
33
34
  constructor(config: (Partial<HNSWConfig> & {
34
35
  distanceFunction?: DistanceFunction;
35
36
  }) | undefined, distanceFunction: DistanceFunction, options?: {
@@ -83,6 +84,35 @@ export declare class NativeHNSWWrapper implements HnswProvider {
83
84
  enableCOW(parent: NativeHNSWWrapper): void;
84
85
  setUseParallelization(useParallelization: boolean): void;
85
86
  getUseParallelization(): boolean;
87
+ /**
88
+ * @description Accept (or detach) the brainy `ConnectionsCodec`. Brainy 7.27+
89
+ * calls this unconditionally during init from `wireConnectionsCodec()` when
90
+ * the `graph:compression` provider is registered (which cortex always
91
+ * supplies via `native.encodeConnections`/`decodeConnections`).
92
+ *
93
+ * Cortex's native HNSW serializes connections through its own path —
94
+ * `addItemFull` returns `nodeData` written directly via `storage.saveHNSWData`
95
+ * (and the mmap binary backend when available). It never routes through
96
+ * brainy's JS-side `persistNodeConnections`/`restoreNodeConnections`, which
97
+ * is where the codec is consumed. The codec is therefore unreachable from
98
+ * this wrapper.
99
+ *
100
+ * We accept the call (so brainy's init succeeds) and store the reference for
101
+ * introspection/parity. We do NOT re-encode connections through the codec on
102
+ * top of the native format — that would double-encode (waste CPU) or replace
103
+ * the native format with a strictly less efficient one (waste perf). Brainy
104
+ * treats the method as feature-detected/optional on third-party providers,
105
+ * so a storing acceptor is the contract-correct behaviour.
106
+ *
107
+ * @param codec - The `ConnectionsCodec` instance, or `null` to detach.
108
+ */
109
+ setConnectionsCodec(codec: unknown): void;
110
+ /**
111
+ * @description Read back the currently-attached `ConnectionsCodec`, or null.
112
+ * Exposed for parity tests + future inspection; cortex itself does not
113
+ * consult this value on the read/write path.
114
+ */
115
+ getConnectionsCodec(): unknown;
86
116
  size(): number;
87
117
  clear(): void;
88
118
  getEntryPointId(): string | null;
@@ -38,6 +38,10 @@ export class NativeHNSWWrapper {
38
38
  cowEnabled = false;
39
39
  // Mmap binary HNSW store (Phase 4 — optional, used when storage has rootDirectory)
40
40
  mmapStore = null;
41
+ // Brainy ConnectionsCodec (brainy >= 7.27 `wireConnectionsCodec`). Stored for
42
+ // introspection but not consulted on the read/write path — see
43
+ // `setConnectionsCodec` below for the architectural rationale.
44
+ connectionsCodec = null;
41
45
  constructor(config = {}, distanceFunction, options = {}) {
42
46
  this.config = { ...DEFAULT_CONFIG, ...config };
43
47
  this.distanceFunction = distanceFunction;
@@ -485,6 +489,39 @@ export class NativeHNSWWrapper {
485
489
  getUseParallelization() {
486
490
  return this.useParallelization;
487
491
  }
492
+ /**
493
+ * @description Accept (or detach) the brainy `ConnectionsCodec`. Brainy 7.27+
494
+ * calls this unconditionally during init from `wireConnectionsCodec()` when
495
+ * the `graph:compression` provider is registered (which cortex always
496
+ * supplies via `native.encodeConnections`/`decodeConnections`).
497
+ *
498
+ * Cortex's native HNSW serializes connections through its own path —
499
+ * `addItemFull` returns `nodeData` written directly via `storage.saveHNSWData`
500
+ * (and the mmap binary backend when available). It never routes through
501
+ * brainy's JS-side `persistNodeConnections`/`restoreNodeConnections`, which
502
+ * is where the codec is consumed. The codec is therefore unreachable from
503
+ * this wrapper.
504
+ *
505
+ * We accept the call (so brainy's init succeeds) and store the reference for
506
+ * introspection/parity. We do NOT re-encode connections through the codec on
507
+ * top of the native format — that would double-encode (waste CPU) or replace
508
+ * the native format with a strictly less efficient one (waste perf). Brainy
509
+ * treats the method as feature-detected/optional on third-party providers,
510
+ * so a storing acceptor is the contract-correct behaviour.
511
+ *
512
+ * @param codec - The `ConnectionsCodec` instance, or `null` to detach.
513
+ */
514
+ setConnectionsCodec(codec) {
515
+ this.connectionsCodec = codec;
516
+ }
517
+ /**
518
+ * @description Read back the currently-attached `ConnectionsCodec`, or null.
519
+ * Exposed for parity tests + future inspection; cortex itself does not
520
+ * consult this value on the read/write path.
521
+ */
522
+ getConnectionsCodec() {
523
+ return this.connectionsCodec;
524
+ }
488
525
  // ---------------------------------------------------------------------------
489
526
  // Info / Introspection
490
527
  // ---------------------------------------------------------------------------
package/dist/plugin.js CHANGED
@@ -123,6 +123,11 @@ const cortexPlugin = {
123
123
  // Quantized distance: SQ8 cosine distance on uint8 arrays (no dequantization).
124
124
  // Consumed by brainy's HNSW SQ8 reranking (setSQ8DistanceImplementation).
125
125
  context.registerProvider('distance:sq8', native.cosineDistanceSq8);
126
+ // Quantized distance: SQ4 cosine distance on packed nibbles (2 values per byte).
127
+ // Consumed by brainy 7.28.0+ HNSW SQ4 reranking when config.hnsw.quantization.bits === 4
128
+ // via setSQ4DistanceImplementation. Byte-for-byte identical to brainy's
129
+ // distanceSQ4Js; cross-language parity verified in the brainy test suite.
130
+ context.registerProvider('distance:sq4', native.cosineDistanceSq4);
126
131
  // Graph connection compression: delta-varint encoded connection lists.
127
132
  // Reserved for the 2.4.0 vector/graph-storage initiative (HNSW connection
128
133
  // persistence). Registered now so that work wires brainy without a cortex change.
@@ -134,10 +139,13 @@ const cortexPlugin = {
134
139
  // up. The following native capabilities exist in Rust + napi but are intentionally
135
140
  // NOT registered (no brainy consumer yet) — they are re-registered the moment a
136
141
  // hook lands, with no Rust change required:
137
- // • SQ8 batch distance, SQ8/SQ4 quantize-codec, SQ4 distance, PQ codebook
138
- // → pending a brainy quantization-delegation hook (handoff BR-QUANT-SQ4-PQ)
139
- // compaction:bfsOrder / compaction:hnswOrder
140
- // pending a brainy compaction-order hook (handoff BR-COMPACTION-HOOK)
142
+ // • SQ8 batch distance, SQ8/SQ4 quantize-codec, PQ codebook
143
+ // → pending broader brainy quantization-delegation hooks beyond the
144
+ // distance-fn swap (already wired for SQ8 + SQ4 above)
145
+ // compaction:bfsOrder / compaction:hnswOrder superseded after the
146
+ // 2026-05-28 strategic reset: DiskANN's Vamana produces locality natively,
147
+ // so the HNSW BFS-compaction hook is not pursued. Rust impls stay as
148
+ // future-utility; no brainy hook will be added.
141
149
  // HNSW: Native Rust graph engine with SIMD distance and Arc-based COW
142
150
  const { NativeHNSWWrapper } = await import('./hnsw/NativeHNSWWrapper.js');
143
151
  context.registerProvider('hnsw', (config, distanceFn, options) => {
@@ -0,0 +1,96 @@
1
+ /**
2
+ * @module utils/nativeBinaryEntityIdMapper
3
+ * @description TypeScript wrapper around cortex's native binary
4
+ * `BinaryIdMapper`. Implements brainy's `EntityIdMapperProvider` so the
5
+ * mmap-backed billion-scale mapper is a drop-in for the existing
6
+ * JSON-persisted one.
7
+ *
8
+ * ## When this engages
9
+ *
10
+ * The cortex plugin registers this wrapper as the `'entityIdMapper'`
11
+ * provider when the storage adapter exposes `getBinaryBlobPath()` (i.e.
12
+ * filesystem-backed storage with cortex's 2.4.0 #2 mmap-vector layer).
13
+ * Cloud-storage adapters fall back to the JSON variant
14
+ * (`NativeEntityIdMapperWrapper`) since they have no local-path concept.
15
+ *
16
+ * ## UUID format conversion
17
+ *
18
+ * Brainy passes UUIDs as strings (typically the canonical 36-char
19
+ * `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`). The native side works in
20
+ * 16-byte Buffers. This wrapper converts at the boundary. Non-canonical
21
+ * UUID strings (any other 32-hex-digit form) are also accepted.
22
+ *
23
+ * ## Concurrency
24
+ *
25
+ * `getOrAssign` is atomic across concurrent callers for the same UUID
26
+ * (256 sharded per-UUID mutexes in the native layer). Lookups are
27
+ * lock-free. The wrapper holds no JS-side mutable state besides the
28
+ * native handle.
29
+ */
30
+ import type { StorageAdapter } from '@soulcraft/brainy';
31
+ import type { EntityIdMapperProvider } from '../providerContracts.js';
32
+ export interface NativeBinaryEntityIdMapperOptions {
33
+ /** Storage adapter — required for binary blob path resolution. */
34
+ storage: StorageAdapter;
35
+ /**
36
+ * Override the relative path under storage for the uuid_to_int file.
37
+ * Default `_id_mapper/uuid_to_int.mkv`.
38
+ */
39
+ uuidToIntKey?: string;
40
+ /**
41
+ * Override the relative path under storage for the int_to_uuid file.
42
+ * Default `_id_mapper/int_to_uuid.bin`.
43
+ */
44
+ intToUuidKey?: string;
45
+ /** Sparse file size for int_to_uuid. Default 32 GB. */
46
+ intToUuidSize?: bigint;
47
+ /** Sparse file size for uuid_to_int. Default 32 GB. */
48
+ uuidToIntSize?: bigint;
49
+ /** Bucket capacity in the MmapKv. Default 16. */
50
+ bucketCapacity?: number;
51
+ /** Maximum extendible-hash directory depth. Default 28. */
52
+ maxGlobalDepth?: number;
53
+ }
54
+ /**
55
+ * Drop-in `EntityIdMapperProvider` backed by the native `BinaryIdMapper`.
56
+ *
57
+ * @example
58
+ * ```typescript
59
+ * const mapper = new NativeBinaryEntityIdMapperWrapper({ storage })
60
+ * await mapper.init()
61
+ * const intId = mapper.getOrAssign('12345678-1234-5678-1234-567812345678')
62
+ * const uuid = mapper.getUuid(intId)
63
+ * ```
64
+ */
65
+ export declare class NativeBinaryEntityIdMapperWrapper implements EntityIdMapperProvider {
66
+ private storage;
67
+ private uuidToIntKey;
68
+ private intToUuidKey;
69
+ private intToUuidSize;
70
+ private uuidToIntSize;
71
+ private bucketCapacity;
72
+ private maxGlobalDepth;
73
+ private native;
74
+ private initialized;
75
+ constructor(options: NativeBinaryEntityIdMapperOptions);
76
+ init(): Promise<void>;
77
+ getOrAssign(uuid: string): number;
78
+ getUuid(intId: number): string | undefined;
79
+ getInt(uuid: string): number | undefined;
80
+ remove(uuid: string): boolean;
81
+ flush(): Promise<void>;
82
+ clear(): Promise<void>;
83
+ getAllIntIds(): number[];
84
+ intsIterableToUuids(ints: Iterable<number>): string[];
85
+ get size(): number;
86
+ /**
87
+ * Encode a UUID string into a 16-byte Buffer. Accepts canonical
88
+ * 36-char form (with hyphens) or any 32-hex-digit form. Throws on
89
+ * malformed input.
90
+ */
91
+ private encode;
92
+ /** Decode a 16-byte Buffer back to canonical UUID string. */
93
+ private decode;
94
+ private ensure;
95
+ }
96
+ //# sourceMappingURL=nativeBinaryEntityIdMapper.d.ts.map
@@ -0,0 +1,208 @@
1
+ /**
2
+ * @module utils/nativeBinaryEntityIdMapper
3
+ * @description TypeScript wrapper around cortex's native binary
4
+ * `BinaryIdMapper`. Implements brainy's `EntityIdMapperProvider` so the
5
+ * mmap-backed billion-scale mapper is a drop-in for the existing
6
+ * JSON-persisted one.
7
+ *
8
+ * ## When this engages
9
+ *
10
+ * The cortex plugin registers this wrapper as the `'entityIdMapper'`
11
+ * provider when the storage adapter exposes `getBinaryBlobPath()` (i.e.
12
+ * filesystem-backed storage with cortex's 2.4.0 #2 mmap-vector layer).
13
+ * Cloud-storage adapters fall back to the JSON variant
14
+ * (`NativeEntityIdMapperWrapper`) since they have no local-path concept.
15
+ *
16
+ * ## UUID format conversion
17
+ *
18
+ * Brainy passes UUIDs as strings (typically the canonical 36-char
19
+ * `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`). The native side works in
20
+ * 16-byte Buffers. This wrapper converts at the boundary. Non-canonical
21
+ * UUID strings (any other 32-hex-digit form) are also accepted.
22
+ *
23
+ * ## Concurrency
24
+ *
25
+ * `getOrAssign` is atomic across concurrent callers for the same UUID
26
+ * (256 sharded per-UUID mutexes in the native layer). Lookups are
27
+ * lock-free. The wrapper holds no JS-side mutable state besides the
28
+ * native handle.
29
+ */
30
+ import { existsSync } from 'node:fs';
31
+ import { loadNativeModule } from '../native/index.js';
32
+ import { prodLog } from '@soulcraft/brainy/internals';
33
+ const UUID_BYTES = 16;
34
+ const DEFAULT_UUID_TO_INT_KEY = '_id_mapper/uuid_to_int.mkv';
35
+ const DEFAULT_INT_TO_UUID_KEY = '_id_mapper/int_to_uuid.bin';
36
+ /**
37
+ * Drop-in `EntityIdMapperProvider` backed by the native `BinaryIdMapper`.
38
+ *
39
+ * @example
40
+ * ```typescript
41
+ * const mapper = new NativeBinaryEntityIdMapperWrapper({ storage })
42
+ * await mapper.init()
43
+ * const intId = mapper.getOrAssign('12345678-1234-5678-1234-567812345678')
44
+ * const uuid = mapper.getUuid(intId)
45
+ * ```
46
+ */
47
+ export class NativeBinaryEntityIdMapperWrapper {
48
+ storage;
49
+ uuidToIntKey;
50
+ intToUuidKey;
51
+ intToUuidSize;
52
+ uuidToIntSize;
53
+ bucketCapacity;
54
+ maxGlobalDepth;
55
+ native = null;
56
+ initialized = false;
57
+ constructor(options) {
58
+ this.storage = options.storage;
59
+ this.uuidToIntKey = options.uuidToIntKey ?? DEFAULT_UUID_TO_INT_KEY;
60
+ this.intToUuidKey = options.intToUuidKey ?? DEFAULT_INT_TO_UUID_KEY;
61
+ this.intToUuidSize = options.intToUuidSize ?? BigInt(32) * BigInt(1024) ** BigInt(3);
62
+ this.uuidToIntSize = options.uuidToIntSize ?? BigInt(32) * BigInt(1024) ** BigInt(3);
63
+ this.bucketCapacity = options.bucketCapacity ?? 16;
64
+ this.maxGlobalDepth = options.maxGlobalDepth ?? 28;
65
+ }
66
+ async init() {
67
+ if (this.initialized)
68
+ return;
69
+ const storage = this.storage;
70
+ if (!storage.getBinaryBlobPath) {
71
+ throw new Error('NativeBinaryEntityIdMapperWrapper requires a storage adapter that ' +
72
+ 'exposes getBinaryBlobPath() (filesystem-backed). For cloud adapters, ' +
73
+ 'use NativeEntityIdMapperWrapper (JSON variant) instead.');
74
+ }
75
+ const uuidToIntPath = storage.getBinaryBlobPath(this.uuidToIntKey);
76
+ const intToUuidPath = storage.getBinaryBlobPath(this.intToUuidKey);
77
+ if (!uuidToIntPath || !intToUuidPath) {
78
+ throw new Error(`NativeBinaryEntityIdMapperWrapper: getBinaryBlobPath returned null for ` +
79
+ `${this.uuidToIntKey} or ${this.intToUuidKey}`);
80
+ }
81
+ const bindings = loadNativeModule();
82
+ const NativeBinaryIdMapper = bindings.NativeBinaryIdMapper;
83
+ if (!NativeBinaryIdMapper) {
84
+ throw new Error('NativeBinaryIdMapper binding missing from cortex native module — ' +
85
+ 'this build of cortex is older than the BinaryIdMapper feature');
86
+ }
87
+ const config = {
88
+ uuidToIntPath,
89
+ intToUuidPath,
90
+ intToUuidSize: this.intToUuidSize,
91
+ uuidToIntSize: this.uuidToIntSize,
92
+ bucketCapacity: this.bucketCapacity,
93
+ maxGlobalDepth: this.maxGlobalDepth,
94
+ };
95
+ // Explicitly distinguish "fresh install" from "existing files".
96
+ // Both files must exist together (paired write semantics) — a
97
+ // half-present state is corruption from a crash between file
98
+ // creations and is surfaced as an error rather than silently
99
+ // recreated.
100
+ const uuidFileExists = existsSync(uuidToIntPath);
101
+ const intFileExists = existsSync(intToUuidPath);
102
+ if (uuidFileExists && intFileExists) {
103
+ this.native = NativeBinaryIdMapper.openExisting(config);
104
+ }
105
+ else if (!uuidFileExists && !intFileExists) {
106
+ this.native = NativeBinaryIdMapper.create(config);
107
+ }
108
+ else {
109
+ throw new Error(`NativeBinaryEntityIdMapperWrapper: half-present file pair — ` +
110
+ `${this.uuidToIntKey} ${uuidFileExists ? 'exists' : 'missing'}, ` +
111
+ `${this.intToUuidKey} ${intFileExists ? 'exists' : 'missing'}. ` +
112
+ `Refusing to silently recreate; investigate manually.`);
113
+ }
114
+ this.initialized = true;
115
+ if (prodLog?.debug) {
116
+ prodLog.debug(`[cortex] BinaryIdMapper wired: paths=[${uuidToIntPath}, ${intToUuidPath}]`);
117
+ }
118
+ }
119
+ getOrAssign(uuid) {
120
+ const native = this.ensure();
121
+ return native.getOrAssign(this.encode(uuid));
122
+ }
123
+ getUuid(intId) {
124
+ const native = this.ensure();
125
+ const buf = native.getUuid(intId);
126
+ if (!buf)
127
+ return undefined;
128
+ return this.decode(buf);
129
+ }
130
+ getInt(uuid) {
131
+ const native = this.ensure();
132
+ const out = native.getInt(this.encode(uuid));
133
+ return out == null ? undefined : out;
134
+ }
135
+ remove(uuid) {
136
+ const native = this.ensure();
137
+ return native.remove(this.encode(uuid));
138
+ }
139
+ async flush() {
140
+ const native = this.ensure();
141
+ native.flush();
142
+ }
143
+ async clear() {
144
+ // Reset by recreating the files. Atomicity caveat: any concurrent
145
+ // reader holds a stale mmap. Brainy calls clear() during clear()
146
+ // operations that already block other access; this is fine.
147
+ this.initialized = false;
148
+ this.native = null;
149
+ await this.init();
150
+ }
151
+ getAllIntIds() {
152
+ const native = this.ensure();
153
+ return native.getAllIntIds();
154
+ }
155
+ intsIterableToUuids(ints) {
156
+ const native = this.ensure();
157
+ const out = [];
158
+ for (const i of ints) {
159
+ const buf = native.getUuid(i);
160
+ if (buf)
161
+ out.push(this.decode(buf));
162
+ }
163
+ return out;
164
+ }
165
+ get size() {
166
+ if (!this.initialized || !this.native)
167
+ return 0;
168
+ return this.native.size();
169
+ }
170
+ // ---------------------------------------------------------------
171
+ // UUID string ↔ Buffer conversion
172
+ // ---------------------------------------------------------------
173
+ /**
174
+ * Encode a UUID string into a 16-byte Buffer. Accepts canonical
175
+ * 36-char form (with hyphens) or any 32-hex-digit form. Throws on
176
+ * malformed input.
177
+ */
178
+ encode(uuid) {
179
+ const hex = uuid.replace(/-/g, '').toLowerCase();
180
+ if (hex.length !== 32 || !/^[0-9a-f]{32}$/.test(hex)) {
181
+ throw new Error(`NativeBinaryEntityIdMapperWrapper: invalid UUID string "${uuid}"`);
182
+ }
183
+ return Buffer.from(hex, 'hex');
184
+ }
185
+ /** Decode a 16-byte Buffer back to canonical UUID string. */
186
+ decode(buf) {
187
+ if (buf.length !== UUID_BYTES) {
188
+ throw new Error(`NativeBinaryEntityIdMapperWrapper: native returned ${buf.length}-byte uuid (expected ${UUID_BYTES})`);
189
+ }
190
+ const hex = buf.toString('hex');
191
+ return (hex.slice(0, 8) +
192
+ '-' +
193
+ hex.slice(8, 12) +
194
+ '-' +
195
+ hex.slice(12, 16) +
196
+ '-' +
197
+ hex.slice(16, 20) +
198
+ '-' +
199
+ hex.slice(20, 32));
200
+ }
201
+ ensure() {
202
+ if (!this.initialized || !this.native) {
203
+ throw new Error('NativeBinaryEntityIdMapperWrapper: call init() before any operation');
204
+ }
205
+ return this.native;
206
+ }
207
+ }
208
+ //# sourceMappingURL=nativeBinaryEntityIdMapper.js.map
@@ -0,0 +1,294 @@
1
+ ---
2
+ title: ADR-002 — DiskANN as cortex's billion-scale index option
3
+ slug: cortex/adr-002-diskann
4
+ public: true
5
+ category: cortex
6
+ template: concept
7
+ order: 2
8
+ description: Architectural decision record for cortex's planned DiskANN integration. 100% pure Rust, filesystem-only, auto-engages when conditions are met. The billion-scale upgrade path that pairs with brainy's existing TS HNSWIndex.
9
+ ---
10
+
11
+ # ADR-002 — DiskANN as cortex's billion-scale index option
12
+
13
+ **Status:** Decided 2026-05-28. Implementation queued across three coordinated sessions for cortex 3.0.0 + brainy 8.0.0.
14
+
15
+ **Supersedes:** Original DiskANN spike task (#35), retired 2026-05-28 in favour of the three-session plan captured here.
16
+
17
+ **Related:**
18
+ - [ADR-001](./ADR-001-column-store-string-support.md) — native column store, shipped 2.3.0
19
+ - brainy 7.28.0 SQ4 (4-bit) quantization — paves the PQ path for DiskANN's compressed in-RAM distance
20
+ - Cortex 2.4.0 storage foundations (#23–#26) — stable IDs, mmap vector layer, graph compression — all transfer directly to DiskANN
21
+
22
+ ## Context
23
+
24
+ Brainy ships a TypeScript HNSW index that works excellently up to roughly 10M vectors per node on commodity hardware. Cortex 2.3.0 added a Rust-native HNSW variant via the `hnsw` provider hook — same algorithm, ~3–10× the throughput on hot paths thanks to SIMD distance and a tighter graph layout. The 2.4.0 storage foundations (vector mmap store, graph link compression, stable entity IDs) push HNSW further into the disk-resident regime.
25
+
26
+ But HNSW's design assumes the graph fits comfortably in memory. Past ~10M vectors, two costs compound:
27
+
28
+ 1. **Memory pressure** — the graph alone (`M × node_count` neighbour pointers + level metadata) plus the vector store (`dim × 4 × node_count` bytes for float32) blows past the RAM budget of normal nodes. At 100M vectors of 384-dim embeddings: ~150 GB of vectors + ~13 GB of graph = ~163 GB RAM minimum. At 1B vectors: ~1.6 TB RAM — out of reach on single boxes.
29
+ 2. **Disk-locality on cold caches** — even with vectors offloaded to mmap, HNSW's traversal order has no correlation with insertion order on disk. Each search hop typically faults a new page, costing ~10 μs per hop on NVMe SSD. A 100-hop search burns ~1 ms of disk wait that proper locality would have served from a single 10 μs read.
30
+
31
+ [DiskANN](https://github.com/microsoft/DiskANN) (Microsoft, 2019) was designed for exactly this regime. Its Vamana graph construction uses α-pruning to choose neighbours that produce **disk-locality natively**: nodes visited together during search end up adjacent on disk. Combined with **product quantization (PQ)** in RAM for approximate distance, and full vectors on disk for re-ranking the final candidate set, DiskANN holds single-machine billion-scale search at a fraction of HNSW's RAM cost.
32
+
33
+ **For cortex specifically, DiskANN is the natural billion-scale upgrade path** because:
34
+
35
+ - The 2.4.0 foundations (stable IDs, mmap vector layer, graph link compression) transfer to it without rework
36
+ - The 2.5.0 #30 SQ4 quantization work primes the PQ codec path
37
+ - Cortex's positioning has always been "billion-scale via Rust acceleration" — DiskANN fits the message
38
+ - We control the rest of the stack (storage adapters, idMapper, HNSW provider), so the integration is in friendly territory
39
+
40
+ We considered **ScaNN** (Google, Apache 2.0) as an alternative. It posts SOTA recall/QPS numbers at moderate scale with anisotropic vector quantization. We declined: ScaNN is IVF-based (inverted file with partition centroids), which doesn't align with brainy's graph-native architecture. Switching to IVF would mean losing the structural symmetry between brainy's verb graph and its vector index, plus introducing periodic clustering retraining (an operational concern brainy doesn't currently have).
41
+
42
+ ## Decisions
43
+
44
+ ### Decision 1 — DiskANN as the billion-scale upgrade path (not a replacement for HNSW)
45
+
46
+ HNSW stays the brainy default forever. Every existing user, including those without cortex, continues to get the TS `HNSWIndex` they ship with today. DiskANN is added as an **alternative provider that engages when its constraints are met**.
47
+
48
+ This preserves three properties we don't want to give up:
49
+ - Zero-friction onboarding for new brainy users (no cortex required, no config tuning to pick an algorithm).
50
+ - Backward compatibility for every existing brainy install (no surprise migrations on upgrade).
51
+ - The "cortex makes brainy faster" story (not "cortex makes brainy different").
52
+
53
+ ### Decision 2 — 100% pure Rust, no C++ FFI
54
+
55
+ We will port DiskANN's Vamana algorithm to Rust from the published paper (Subramanya et al., NeurIPS 2019; Singh et al., 2021) rather than wrap Microsoft's C++ reference implementation via FFI. The Vamana algorithm is straightforward: greedy graph construction with an α-pruning step that controls graph density. The published pseudocode plus the reference implementation's behaviour give us everything we need to validate correctness.
56
+
57
+ PQ codec: we will either compose a battle-tested Rust crate (e.g., `qdrant-quantization`, Apache 2.0) or implement PQ training + encode/decode in cortex directly, depending on parity test outcomes. Either way, no C++.
58
+
59
+ **Why not FFI:** cross-platform C++ builds for Node native modules are operationally expensive (Linux/macOS/Windows × x64/arm64 binaries, headers, link-time gotchas), Microsoft's reference impl has its own build dependencies that would propagate, and we'd inherit any patent grant ambiguities at the binary level. Pure Rust gives us napi-rs's mature cross-platform binary distribution and a license posture we fully control.
60
+
61
+ **Why not adopt an existing Rust crate wholesale:** no mature Rust port of Vamana exists at our knowledge cutoff. We will track this and pivot if a high-quality one emerges; for now we're building it.
62
+
63
+ ### Decision 3 — Filesystem-only deployment in the first release
64
+
65
+ DiskANN is local-SSD-by-design. The whole point of the architecture is that disk reads are cheap (NVMe-cheap, ~10 μs) and predictable, so the search algorithm can lean on the OS page cache + the on-disk layout's locality.
66
+
67
+ Cloud object storage (S3, R2, GCS) breaks that assumption: range reads of large objects cost ~100 ms of round-trip latency, and the locality model has to account for HTTP/2 framing instead of OS pages. Supporting cloud storage for DiskANN would require either:
68
+
69
+ - A persistent "DiskANN file lives on a local cache disk that we sync from cloud" model (operationally heavy), or
70
+ - A fundamentally different search algorithm with batched range reads (no longer DiskANN, really).
71
+
72
+ **For the first DiskANN release, the activation conditions explicitly require `storage.adapter === 'filesystem'`.** Cloud-storage users continue to use HNSW. We may revisit cloud support if there's demand and an approach that doesn't compromise the algorithm's strengths.
73
+
74
+ ### Decision 4 — Auto-engagement, zero configuration
75
+
76
+ When all of these conditions hold at brainy init, DiskANN replaces HNSW as the active index without any user config:
77
+
78
+ 1. Cortex is loaded as a plugin (the `index:diskann` provider is registered)
79
+ 2. The storage adapter is `FileSystemStorage` (local SSD)
80
+ 3. The metadata index exposes a stable `idMapper` (the 2.4.0 #23 foundation)
81
+
82
+ This mirrors the existing `MmapVectorBackend` wiring pattern from 2.4.0 #24: the heavy machinery activates when its preconditions are met, and otherwise silently falls back. Users who don't want it can opt out via `config.index.type = 'hnsw'`.
83
+
84
+ **Why auto-engage instead of opt-in by config:**
85
+
86
+ - Matches cortex's "loading cortex makes brainy faster" value proposition (no extra knob to turn)
87
+ - The constraints (cortex + filesystem) are exactly the deployment shape DiskANN targets, so the conditions ARE the signal
88
+ - Opt-in-only would leave most filesystem-using cortex installs on HNSW out of caution — defeating the point
89
+
90
+ **Why not unconditional default:**
91
+
92
+ - Cloud-storage users have no DiskANN-compatible path; we can't break their existing HNSW workflows
93
+ - Cortex-less users (the brainy-only crowd) never see DiskANN regardless — preserves the "brainy works the same with or without cortex" property
94
+
95
+ ### Decision 5 — Explicit migration API for existing installs
96
+
97
+ Existing brainy installs with an HNSW index on disk **do not auto-migrate to DiskANN on upgrade**. The on-disk HNSW state is detected at init; if `config.index.type` is unset, brainy logs:
98
+
99
+ > `[brainy] Existing HNSW index detected at <path>. The new cortex default for filesystem storage is DiskANN. Continue using HNSW (set config.index.type='hnsw' to silence this message) or run brain.migrateToDiskAnn() to convert.`
100
+
101
+ The migration API:
102
+
103
+ ```typescript
104
+ // Convert an existing HNSW index to DiskANN.
105
+ // Builds the DiskANN index in parallel (separate files), verifies recall
106
+ // parity at the configured threshold, then atomically swaps the active
107
+ // index. Reversible via brain.migrateToHnsw().
108
+ await brain.migrateToDiskAnn({
109
+ recallTarget?: number, // default 0.95 — verification target before swap
110
+ paddingFactor?: number, // default 1.2 — slack for re-ranking candidate set
111
+ parallel?: boolean // default true — build new index alongside live old
112
+ })
113
+ ```
114
+
115
+ Reversibility (`brain.migrateToHnsw()`) is a contract, not a courtesy. Users need to be able to roll back if recall regression or any other issue surfaces in production.
116
+
117
+ ## Architecture
118
+
119
+ ### Brainy provider contract
120
+
121
+ Cortex registers two new providers (mirrors the existing `hnsw` provider shape):
122
+
123
+ ```typescript
124
+ // brainy: src/plugin.ts
125
+ export interface DiskAnnProvider {
126
+ create(config: DiskAnnConfig, distance: DistanceFunction, options: DiskAnnOptions): DiskAnnInstance
127
+ openExisting(path: string, distance: DistanceFunction): DiskAnnInstance
128
+ }
129
+
130
+ export interface DiskAnnInstance extends HnswProvider {
131
+ // Implements the same interface HNSWIndex/HnswProvider exposes, so the rest
132
+ // of brainy doesn't care which index is active. Adds one DiskANN-specific
133
+ // method for the migration API:
134
+ rebuildPQCodebook(): Promise<void> // Re-trains PQ from current vectors
135
+ }
136
+ ```
137
+
138
+ The instance implementing `HnswProvider` is the load-bearing decision. brainy's search/find/get code paths call into the provider through this surface; an `HNSWIndex` and a `NativeDiskANN` are interchangeable from brainy's POV. No control-flow plumbing changes in brainy beyond the choice of which provider to instantiate.
139
+
140
+ ### Cortex Rust modules
141
+
142
+ ```
143
+ cortex/native/src/diskann/
144
+ ├── mod.rs — napi exports + the NativeDiskANN class
145
+ ├── vamana.rs — α-pruning greedy graph construction (~500 LOC)
146
+ ├── pq.rs — Product Quantization codebook training + encode/decode
147
+ ├── format.rs — On-disk file format (header + PQ codebook + graph + vectors)
148
+ └── search.rs — Greedy graph search with PQ-approximate distance + re-rank
149
+ ```
150
+
151
+ ### On-disk file format
152
+
153
+ Single contiguous file `<dataDir>/_diskann/main.bin` (path mirrors `_vectors/main.bin` from #24):
154
+
155
+ ```
156
+ +--------------------------------------------------------------+
157
+ | Header (4 KB, aligned) |
158
+ | magic: u32 "DKAN" |
159
+ | version: u32 layout revision |
160
+ | dim: u32 vector dimensionality |
161
+ | node_count: u32 total vectors |
162
+ | pq_subspaces: u8 PQ M parameter (typically 8 or 16) |
163
+ | pq_bits: u8 bits per subspace (typically 8) |
164
+ | max_degree: u8 Vamana R parameter (typically 64-96) |
165
+ | entry_point: u32 slot id of the entry node |
166
+ | ... reserved bytes for forward compatibility ... |
167
+ +--------------------------------------------------------------+
168
+ | PQ codebook (M × 256 × subvec_dim × f32) |
169
+ +--------------------------------------------------------------+
170
+ | PQ codes (node_count × M bytes) |
171
+ | — one PQ code per node, M bytes each, in slot order |
172
+ +--------------------------------------------------------------+
173
+ | Vamana graph (node_count × max_degree × u32) |
174
+ | — flat CSR-like array of neighbour slot ids |
175
+ | — fixed degree per node for predictable offset math |
176
+ +--------------------------------------------------------------+
177
+ | Full vectors (node_count × dim × f32) |
178
+ | — only touched for re-ranking the final candidate set |
179
+ +--------------------------------------------------------------+
180
+ ```
181
+
182
+ The fixed-degree Vamana graph trades a small density loss for O(1) neighbour-offset arithmetic. PQ codes pack tightly in RAM (M bytes per vector — at M=16 that's 16 bytes/vector regardless of dim, so 1B vectors fit in ~16 GB RAM for the PQ-resident layer).
183
+
184
+ ### Search algorithm
185
+
186
+ ```
187
+ async function search(query: Vector, k: number): Promise<Result[]> {
188
+ // 1. PQ-encode the query into M sub-vector codes
189
+ const queryPq = pqEncode(query, codebook)
190
+
191
+ // 2. Greedy graph walk using PQ-approximate distance
192
+ const visited = new Set<u32>()
193
+ const candidates = new BoundedHeap(maxLen = k * paddingFactor)
194
+ let current = entryPoint
195
+
196
+ while (improving(candidates)) {
197
+ const neighbours = graph[current]
198
+ for (const n of neighbours) {
199
+ if (visited.has(n)) continue
200
+ visited.add(n)
201
+ const approxDist = pqDistance(queryPq, codes[n])
202
+ candidates.insert(n, approxDist)
203
+ }
204
+ current = candidates.bestUnvisited()
205
+ }
206
+
207
+ // 3. Re-rank the top-(k * paddingFactor) candidates with full vectors
208
+ const topCandidates = candidates.topN(k * paddingFactor)
209
+ return topCandidates
210
+ .map(n => ({ id: idMapper.getUuid(n), distance: trueDistance(query, vectors[n]) }))
211
+ .sort()
212
+ .slice(0, k)
213
+ }
214
+ ```
215
+
216
+ The `paddingFactor` (default 1.2 = 20% over-fetch) controls the recall/cost tradeoff. PQ approximate distance is fast but lossy; re-ranking on the over-fetched candidate set with full-precision vectors recovers recall at a small cost (typically a few hundred extra full-vector reads per query, which is fine on SSD).
217
+
218
+ ## Implementation plan
219
+
220
+ ### Session 35a — Vamana + PQ in pure Rust (cortex)
221
+
222
+ **Scope (~3–5 hrs focused):**
223
+
224
+ - `cortex/native/src/diskann/vamana.rs` — Vamana graph construction with α-pruning, ~500 LOC. Inputs: vector buffer, dim, R (max degree), α (density parameter, typically 1.2–1.4). Output: CSR adjacency.
225
+ - `cortex/native/src/diskann/pq.rs` — PQ codebook training (k-means on subvector partitions) + encode/decode. M subspaces × 256 centroids each, configurable.
226
+ - `cortex/native/src/diskann/format.rs` — On-disk file format struct + read/write primitives.
227
+ - Rust unit tests: graph connectivity invariants, PQ recall on small synthetic dataset, format round-trip.
228
+
229
+ **Exit criteria:** Vamana graph build over 10k random vectors produces a connected graph with degree ≤ R, search recall ≥ 95% at k=10 on synthetic dataset.
230
+
231
+ ### Session 35b — Search + napi bindings (cortex)
232
+
233
+ **Scope (~3–5 hrs focused):**
234
+
235
+ - `cortex/native/src/diskann/search.rs` — Greedy search with PQ-approximate distance and full-vector re-ranking on the candidate set.
236
+ - `cortex/native/src/diskann/mod.rs` — `#[napi]` exports of `NativeDiskANN` class with `create` / `openExisting` / `addItem` / `search` / `rebuildPQCodebook` methods.
237
+ - `cortex/native/index.d.ts` regeneration.
238
+ - Recall validation against published DiskANN benchmark numbers (sanity check, not full BIGANN — that's a separate effort).
239
+
240
+ **Exit criteria:** Search recall ≥ 95% at k=10 over a 100k-vector dataset matches the published DiskANN paper's numbers within 2 percentage points.
241
+
242
+ ### Session 35c — Brainy hookup + cortex 3.0.0 + brainy 8.0.0 release
243
+
244
+ **Scope (~3–5 hrs focused):**
245
+
246
+ - `brainy/src/hnsw/diskAnnIndex.ts` — TS wrapper class implementing brainy's `HnswProvider` contract over `NativeDiskANN`. Same surface as `HNSWIndex` so the rest of brainy is agnostic.
247
+ - `brainy/src/brainy.ts` — `wireDiskAnn()` private method that runs after `wireMmapVectorBackend()` during init. Auto-engagement conditions; opt-out via `config.index.type = 'hnsw'`.
248
+ - `brainy/src/plugin.ts` — `DiskAnnProvider` and `DiskAnnInstance` interfaces (mirrors the `VectorStoreMmapProvider` pattern from 2.4.0 #24).
249
+ - `brain.migrateToDiskAnn()` and `brain.migrateToHnsw()` explicit migration APIs.
250
+ - Tests: provider hookup, auto-engagement conditions, opt-out, recall parity at 10k–100k vectors, migration round-trip integrity.
251
+ - Coordinated release: `cortex 3.0.0` + `brainy 8.0.0`. Major bumps because the default index type changes for filesystem+cortex users (semver discipline matters).
252
+
253
+ **Exit criteria:** Recall parity between brainy 8.0.0 + cortex 3.0.0 DiskANN path and brainy 7.x + cortex 2.x HNSW path is within 1% at standard k values (1, 10, 50). Migration round-trip preserves index integrity.
254
+
255
+ ## Consequences
256
+
257
+ ### Positive
258
+
259
+ - **Single-machine billion-scale becomes a supported workload.** At 100M to 1B vectors, RAM cost drops by ~16–20× compared to HNSW. NVMe disk locality replaces RAM pressure as the bottleneck.
260
+ - **Cortex's "billion-scale via Rust acceleration" positioning becomes literal**, not aspirational.
261
+ - **Zero impact to non-cortex users.** brainy keeps shipping its TS HNSWIndex; no API change, no behaviour change for them.
262
+ - **Foundations carry forward.** The 2.4.0 storage work (stable IDs, mmap layer, graph compression) and 2.5.0 #30 (SQ4 quantization, the PQ precursor) all transfer.
263
+ - **License posture is clean.** Pure Rust port from a published algorithm + permissive (MIT/Apache 2.0) Rust deps. No C++ FFI license entanglement.
264
+ - **Future-utility carry.** The cortex Rust compaction primitives (`compute_bfs_order`, `compute_hnsw_traversal_order`) stay in the codebase; if HNSW's disk locality ever becomes interesting again, the math is already there.
265
+
266
+ ### Negative / Tradeoffs
267
+
268
+ - **Build cost.** DiskANN graph construction is slower than HNSW because Vamana's α-pruning requires examining more candidate neighbours per node. On 100M vectors this is hours, not minutes. Acceptable for once-per-deployment cost.
269
+ - **PQ recall ceiling.** Product Quantization is lossy. Recall maxes out around 95–98% on typical embedding workloads; HNSW with full precision can reach 99%+. The re-ranking step recovers most of the gap. Users with extreme recall requirements (e.g., legal-discovery search) may want to stay on HNSW.
270
+ - **Filesystem-only constraint.** Cloud-storage users get no benefit from DiskANN in the first release. We've accepted this; cloud DiskANN is a future investigation, not a commitment.
271
+ - **Major version bump.** Auto-engagement changing the default index type for filesystem+cortex users is a semver-major event. brainy 8.0.0 and cortex 3.0.0 must coordinate. Some communication overhead at release time.
272
+
273
+ ### Risks
274
+
275
+ - **Correctness drift from the reference implementation.** Vamana has subtle algorithmic choices (the α-pruning order, the entry-point selection strategy) that affect recall by small but real amounts. Mitigation: explicit recall validation against the published numbers + reference implementations in 35a and 35b's exit criteria.
276
+ - **Brainy provider contract surface mismatch.** The `HnswProvider` interface was designed for HNSW; DiskANN may surface operations (codebook retraining, segment-level compaction) that don't fit cleanly. Mitigation: keep `DiskAnnInstance` as an extension of `HnswProvider` plus DiskANN-specific methods; never narrow the parent interface.
277
+ - **Migration API regressions.** `migrateToDiskAnn` runs over potentially billions of vectors. A bug here could mean hours of wasted compute or, worse, an inconsistent index. Mitigation: parallel build (the old HNSW stays serving until the new DiskANN is validated), explicit recall verification before the atomic swap, fully reversible via `migrateToHnsw`.
278
+ - **Long-running PQ codebook drift.** As vectors are added over time, the original PQ codebook can drift away from the data distribution, eroding recall. Mitigation: expose `rebuildPQCodebook()` for explicit retrains; document the operational guideline (retrain after the dataset doubles, or after a measurable recall regression).
279
+
280
+ ## Open questions
281
+
282
+ 1. **PQ codebook strategy at scale.** Do we train PQ once on a sample of the data, or use online/streaming PQ updates? Tradeoff: simpler vs. better recall over time. Lean toward sample-once-with-explicit-retrain to keep the operational model simple.
283
+ 2. **Vamana parameters as runtime config vs. baked into the file format.** R (max degree), α (density), the search candidate set padding factor — how much do we expose to users? Lean toward fixed-good-defaults in 3.0.0, expose later if a workload demands it.
284
+ 3. **Filtered search support.** brainy's `find({ where, ... })` interacts with HNSW via a filter callback. DiskANN's PQ-distance loop needs different filter integration. Plan to defer — initial release supports unfiltered top-K search; filtered search is a follow-up.
285
+ 4. **Multi-shard / single-node-of-cluster deployments.** Cortex isn't a cluster engine, but some users run multiple cortex+brainy nodes behind a load balancer. Does each node need its own DiskANN file, or can they share one? Plan to defer — start with per-node files.
286
+
287
+ ## References
288
+
289
+ - Subramanya et al., *DiskANN: Fast Accurate Billion-Point Nearest Neighbor Search on a Single Node*, NeurIPS 2019. [arXiv:1907.07574](https://arxiv.org/abs/1907.07574)
290
+ - Singh et al., *FreshDiskANN: A Fast and Accurate Graph-Based ANN Index for Streaming Similarity Search*, 2021. [arXiv:2105.09613](https://arxiv.org/abs/2105.09613)
291
+ - Microsoft DiskANN open-source reference implementation: [github.com/microsoft/DiskANN](https://github.com/microsoft/DiskANN) (MIT licensed)
292
+ - ADR-001 — Native column store with raw mmap segments (the same architectural pattern of "cortex registers a provider, brainy consumes when present")
293
+ - Brainy 7.28.0 SQ4 quantization (the PQ precursor — scalar quantization scoped to a single vector; PQ extends the same idea to subvector partitions with learned codebooks)
294
+ - Cortex 2.4.0 storage foundations: stable EntityIdMapper (#23), mmap vector backend (#24), graph link compression (#25), column-store interchange (#26)
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/cortex",
3
- "version": "2.4.0",
3
+ "version": "2.5.1",
4
4
  "description": "Native Rust acceleration for Brainy — SIMD distance, vector quantization, zero-copy mmap, native embeddings. Free tier for storage, Pro license for compute acceleration.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -66,11 +66,11 @@
66
66
  "LICENSE"
67
67
  ],
68
68
  "peerDependencies": {
69
- "@soulcraft/brainy": ">=7.26.0"
69
+ "@soulcraft/brainy": ">=7.28.0"
70
70
  },
71
71
  "devDependencies": {
72
72
  "@napi-rs/cli": "^3.0.0",
73
- "@soulcraft/brainy": "^7.26.0",
73
+ "@soulcraft/brainy": "^7.28.0",
74
74
  "@types/node": "^22.0.0",
75
75
  "tsx": "^4.21.0",
76
76
  "typescript": "^5.9.3",