@soulcraft/cortex 2.7.2 → 2.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +0 -0
- package/dist/hnsw/AdaptiveDiskAnnModeSelector.d.ts +168 -0
- package/dist/hnsw/AdaptiveDiskAnnModeSelector.js +276 -0
- package/dist/hnsw/NativeDiskAnnWrapper.d.ts +284 -0
- package/dist/hnsw/NativeDiskAnnWrapper.js +738 -0
- package/dist/hnsw/NativeHNSWWrapper.js +34 -2
- package/dist/legacyLayoutGuard.d.ts +61 -0
- package/dist/legacyLayoutGuard.js +187 -0
- package/dist/resource/OsMemoryProbe.d.ts +175 -0
- package/dist/resource/OsMemoryProbe.js +206 -0
- package/dist/utils/nativeBinaryEntityIdMapper.d.ts +199 -0
- package/dist/utils/nativeBinaryEntityIdMapper.js +358 -0
- package/native/brainy-native.node +0 -0
- package/package.json +1 -1
|
@@ -0,0 +1,738 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module hnsw/NativeDiskAnnWrapper
|
|
3
|
+
* @description TypeScript wrapper around cor's native **Adaptive
|
|
4
|
+
* DiskANN** engine. Satisfies brainy 8.0's `VectorIndexProvider`
|
|
5
|
+
* contract (see [`providerContracts.ts`](../providerContracts.ts))
|
|
6
|
+
* so brainy treats it as the standard vector index slot —
|
|
7
|
+
* `addItem` / `search` / `rebuild` / `flush` / ... Underneath,
|
|
8
|
+
* every operation routes through cor's pure-Rust DiskANN engine
|
|
9
|
+
* (`native/diskann/`).
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
* import { Brainy } from '@soulcraft/brainy'
|
|
14
|
+
*
|
|
15
|
+
* // Cor is auto-detected by brainy's plugin system on init.
|
|
16
|
+
* const brain = new Brainy({
|
|
17
|
+
* storage: { type: 'filesystem', rootDirectory: '/data/idx' }
|
|
18
|
+
* })
|
|
19
|
+
* await brain.init() // [cor] Adaptive DiskANN engaged (mode auto)
|
|
20
|
+
*
|
|
21
|
+
* await brain.add({ data: 'native rust acceleration', type: 'concept' })
|
|
22
|
+
* const hits = await brain.search('billion scale ann', 10)
|
|
23
|
+
* ```
|
|
24
|
+
*
|
|
25
|
+
* @example
|
|
26
|
+
* ```typescript
|
|
27
|
+
* // Default zero-config — the adaptive selector picks Mode 1 / 2 / 3
|
|
28
|
+
* // from observed memory pressure. Most users only set `recall`.
|
|
29
|
+
* const brain = new Brainy({
|
|
30
|
+
* storage: { type: 'filesystem', rootDirectory: '/data/idx' },
|
|
31
|
+
* vector: {
|
|
32
|
+
* recall: 'balanced', // 'fast' | 'balanced' | 'accurate'
|
|
33
|
+
* }
|
|
34
|
+
* })
|
|
35
|
+
* ```
|
|
36
|
+
*
|
|
37
|
+
* ## Operating model
|
|
38
|
+
*
|
|
39
|
+
* Adaptive DiskANN is build-once, query-many: the on-disk file
|
|
40
|
+
* embeds the Vamana graph plus — depending on the build mode — an
|
|
41
|
+
* optional PQ codebook + codes section + the full vectors, all in
|
|
42
|
+
* a single contiguous mmap-able layout (see `native/diskann/src/format.rs`).
|
|
43
|
+
* Dynamic insertions go to a small in-memory **delta buffer** that
|
|
44
|
+
* brute-force-searches alongside the main index until the next
|
|
45
|
+
* `rebuild()` folds them in. This matches FreshDiskANN's published
|
|
46
|
+
* online-update model.
|
|
47
|
+
*
|
|
48
|
+
* The wrapper picks an operating mode (in-memory / hybrid /
|
|
49
|
+
* on-disk) at build time and open time via the
|
|
50
|
+
* {@link AdaptiveDiskAnnModeSelector}; see that module for the
|
|
51
|
+
* decision tree. Operators set nothing — observed memory drives
|
|
52
|
+
* the choice.
|
|
53
|
+
*
|
|
54
|
+
* ## Search path
|
|
55
|
+
*
|
|
56
|
+
* 1. Query the main index via the native DiskANN searcher:
|
|
57
|
+
* - **Mode 1**: exact-distance walk over full vectors (no PQ,
|
|
58
|
+
* no rerank — the walk's top-L is the exact top-L).
|
|
59
|
+
* - **Mode 2/3**: PQ-greedy walk in RAM (codes pinned or paged
|
|
60
|
+
* depending on mode), full-vector re-rank on the top
|
|
61
|
+
* `k × paddingFactor` candidates.
|
|
62
|
+
* 2. Brute-force the delta buffer (typically <0.1% of total
|
|
63
|
+
* size after a recent rebuild).
|
|
64
|
+
* 3. Merge + sort + truncate to `k`.
|
|
65
|
+
*
|
|
66
|
+
* ## When this wrapper engages
|
|
67
|
+
*
|
|
68
|
+
* The cor plugin (`src/plugin.ts`) registers this wrapper under
|
|
69
|
+
* brainy 8.0's canonical `'vector'` provider key. Whenever cor is
|
|
70
|
+
* installed, brainy's vector index slot resolves to Adaptive DiskANN.
|
|
71
|
+
*/
|
|
72
|
+
import { loadNativeModule } from '../native/index.js';
|
|
73
|
+
import { prodLog } from '@soulcraft/brainy/internals';
|
|
74
|
+
import { mkdirSync, writeFileSync, renameSync, existsSync, readFileSync } from 'node:fs';
|
|
75
|
+
import { dirname } from 'node:path';
|
|
76
|
+
import { autoModeForHeader, selectModeFromResourceManager, } from './AdaptiveDiskAnnModeSelector.js';
|
|
77
|
+
/**
|
|
78
|
+
* Node-count threshold above which `useMmapAdjacency: 'auto'`
|
|
79
|
+
* resolves to file-backed. Below this, the in-RAM build adjacency
|
|
80
|
+
* fits comfortably in heap; above, the per-row mutex table starts
|
|
81
|
+
* to dominate. 100 M matches the published DiskANN guidance and
|
|
82
|
+
* the comment that lives next to `AdjacencyBackend::Mmap` in the
|
|
83
|
+
* Rust crate.
|
|
84
|
+
*/
|
|
85
|
+
const MMAP_ADJACENCY_AUTO_THRESHOLD = 100_000_000;
|
|
86
|
+
/**
|
|
87
|
+
* **Canonical .dkann path convention** (brainy 8.0 + cor 3.0
|
|
88
|
+
* lockstep). When `DiskAnnIndexConfig.indexPath` is omitted, the
|
|
89
|
+
* wrapper derives the path by passing this key to the storage
|
|
90
|
+
* adapter's `getBinaryBlobPath`. Living under `_system/` puts
|
|
91
|
+
* the DiskANN file inside brainy's existing backup-by-default
|
|
92
|
+
* tree — no special inclusion rule needed in `db.persist` /
|
|
93
|
+
* `gsutil` / `rclone` configurations.
|
|
94
|
+
*
|
|
95
|
+
* Sharded deployments override `indexPath` explicitly with per-
|
|
96
|
+
* shard suffixes (e.g. `_system/vector-index/shard-7.dkann`).
|
|
97
|
+
*/
|
|
98
|
+
const DEFAULT_INDEX_PATH_KEY = '_system/vector-index/main.dkann';
|
|
99
|
+
/**
|
|
100
|
+
* Derive the canonical `.dkann` path from a storage adapter, or
|
|
101
|
+
* return `null` if the adapter doesn't expose `getBinaryBlobPath`
|
|
102
|
+
* (cloud adapters that don't support binary blobs, or no adapter
|
|
103
|
+
* supplied). The caller decides whether `null` is a hard error or
|
|
104
|
+
* a graceful fallback.
|
|
105
|
+
*/
|
|
106
|
+
function deriveIndexPath(storage) {
|
|
107
|
+
if (!storage)
|
|
108
|
+
return null;
|
|
109
|
+
const getBinaryBlobPath = storage.getBinaryBlobPath;
|
|
110
|
+
if (typeof getBinaryBlobPath !== 'function')
|
|
111
|
+
return null;
|
|
112
|
+
const path = getBinaryBlobPath.call(storage, DEFAULT_INDEX_PATH_KEY);
|
|
113
|
+
return typeof path === 'string' && path.length > 0 ? path : null;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* **Recall presets** (brainy 8.0 lockstep). One knob (`recall`)
|
|
117
|
+
* maps to the search-time `defaultLSearch` + `defaultPaddingFactor`
|
|
118
|
+
* pair. The `'balanced'` preset reproduces brainy 7.x's defaults so
|
|
119
|
+
* existing brains don't experience a recall shift on upgrade.
|
|
120
|
+
*
|
|
121
|
+
* Brainy 8.0's JS HNSW path exposes the same preset names with
|
|
122
|
+
* algorithm-appropriate values (`{m, efConstruction, efSearch}`
|
|
123
|
+
* tuples); see `brainy/.strategy/COR-3.0-INTEGRATION.md`.
|
|
124
|
+
*/
|
|
125
|
+
const RECALL_PRESETS = {
|
|
126
|
+
fast: { defaultLSearch: 50, defaultPaddingFactor: 1.0 },
|
|
127
|
+
balanced: { defaultLSearch: 100, defaultPaddingFactor: 1.2 },
|
|
128
|
+
accurate: { defaultLSearch: 200, defaultPaddingFactor: 1.5 },
|
|
129
|
+
};
|
|
130
|
+
const DEFAULTS = {
|
|
131
|
+
pqM: 16,
|
|
132
|
+
pqKsub: 256,
|
|
133
|
+
maxDegree: 64,
|
|
134
|
+
searchListSize: 100,
|
|
135
|
+
alpha: 1.2,
|
|
136
|
+
defaultLSearch: 100,
|
|
137
|
+
defaultPaddingFactor: 1.2,
|
|
138
|
+
useMmapAdjacency: 'auto',
|
|
139
|
+
mode: 'auto',
|
|
140
|
+
};
|
|
141
|
+
/**
|
|
142
|
+
* Predicate-pushdown thresholds (#10). When `find()` supplies a
|
|
143
|
+
* metadata∩graph `allowedIds` universe, the wrapper pushes it INTO the native
|
|
144
|
+
* beam walk instead of post-filtering — but only when the predicate is
|
|
145
|
+
* selective enough that post-filtering would lose recall. At or below
|
|
146
|
+
* `PUSHDOWN_MAX_SELECTIVITY` (allowed/total) the unfiltered top-(k×2) rarely
|
|
147
|
+
* holds k allowed hits, so pushdown wins; above it, post-filtering already
|
|
148
|
+
* keeps plenty and is cheaper (no slot translation of a huge set).
|
|
149
|
+
* `PUSHDOWN_MAX_LSEARCH` is the walk↔brute-force crossover: when the walk
|
|
150
|
+
* width the selectivity demands (~k/selectivity) would exceed it, the walk
|
|
151
|
+
* is BOTH slow (the bounded candidate list is O(L) per insert) and low-recall
|
|
152
|
+
* (allowed points sit outside the frontier), so pushdown switches to an exact
|
|
153
|
+
* scan of the allowed set instead. Below it, the walk runs at its natural,
|
|
154
|
+
* never-capped width. Zero-config: both are derived defaults, not user knobs.
|
|
155
|
+
*/
|
|
156
|
+
const PUSHDOWN_MAX_SELECTIVITY = 0.1;
|
|
157
|
+
const PUSHDOWN_MAX_LSEARCH = 4096;
|
|
158
|
+
/**
|
|
159
|
+
* Allowed-set size at or below which pushdown exact-scans the set rather than
|
|
160
|
+
* graph-walking it. Measured crossover (SIFT/128-d on the reference host): an
|
|
161
|
+
* exact scan of ≤ ~32k vectors (≈4M float ops, low single-digit ms) is both
|
|
162
|
+
* faster than the filtered walk's floor AND exact (recall 1.0), because the
|
|
163
|
+
* walk carries fixed traversal + re-rank overhead and its bounded candidate
|
|
164
|
+
* list is O(L) per insert. Above this the walk is the fast-approximate path
|
|
165
|
+
* for large allowed sets at higher selectivity. Zero-config default; a future
|
|
166
|
+
* O(log L) candidate list would raise the walk's competitiveness and shift
|
|
167
|
+
* this crossover up.
|
|
168
|
+
*
|
|
169
|
+
* Measured (SIFT 1M, 128-d, bxl9000): exact scan of 65,536 vectors ≈ 4–6 ms
|
|
170
|
+
* and recall 1.0; at that count the filtered walk is ~equal latency but only
|
|
171
|
+
* ~0.73 recall, so the exact scan is the better pick. Past ~65k the scan
|
|
172
|
+
* dominates query time and the walk (small L at the implied higher
|
|
173
|
+
* selectivity) wins — e.g. 5% of a 10M index (500k) correctly routes to it.
|
|
174
|
+
*/
|
|
175
|
+
const PUSHDOWN_BRUTE_MAX_ALLOWED = 65536;
|
|
176
|
+
export class NativeDiskAnnWrapper {
|
|
177
|
+
config;
|
|
178
|
+
distanceFunction;
|
|
179
|
+
storage;
|
|
180
|
+
persistMode;
|
|
181
|
+
/** Live searcher instance — null until the first build. */
|
|
182
|
+
native = null;
|
|
183
|
+
/** Newly added entries since the last build. Brute-force searched. */
|
|
184
|
+
delta = new Map();
|
|
185
|
+
/** Removed entries — filtered out at search time. */
|
|
186
|
+
tombstones = new Set();
|
|
187
|
+
/** Bidirectional UUID ↔ slot map for the main index. */
|
|
188
|
+
slotByUuid = new Map();
|
|
189
|
+
uuidBySlot = new Map();
|
|
190
|
+
/**
|
|
191
|
+
* **Piece I — Adaptive DiskANN.** The selector's most recent
|
|
192
|
+
* decision, exposed via {@link getLastModeSelection} for telemetry
|
|
193
|
+
* and tests. `null` until the first build or open.
|
|
194
|
+
*/
|
|
195
|
+
lastSelection = null;
|
|
196
|
+
constructor(config, distanceFunction, options = {}) {
|
|
197
|
+
// Merge order matters:
|
|
198
|
+
// 1. DEFAULTS — baseline (matches the 'balanced' preset).
|
|
199
|
+
// 2. recall preset — overrides DEFAULTS for the chosen tier
|
|
200
|
+
// WHEN the user supplied `recall`.
|
|
201
|
+
// 3. config — user overrides win over both. So
|
|
202
|
+
// `recall: 'fast', defaultLSearch: 200`
|
|
203
|
+
// gives lSearch=200, not the fast preset's
|
|
204
|
+
// 50.
|
|
205
|
+
const preset = config.recall ? RECALL_PRESETS[config.recall] : {};
|
|
206
|
+
const merged = { ...DEFAULTS, ...preset, ...config };
|
|
207
|
+
// Path resolution: explicit `indexPath` wins; otherwise derive
|
|
208
|
+
// from the storage adapter using the canonical convention
|
|
209
|
+
// (`_system/vector-index/main.dkann`). If neither is available
|
|
210
|
+
// the constructor throws — without a path, the wrapper has no
|
|
211
|
+
// anchor for build, open, or rebuild.
|
|
212
|
+
const resolvedIndexPath = merged.indexPath ?? deriveIndexPath(options.storage);
|
|
213
|
+
if (!resolvedIndexPath) {
|
|
214
|
+
throw new Error('NativeDiskAnnWrapper: indexPath is required when no ' +
|
|
215
|
+
'storage adapter (with getBinaryBlobPath) is provided. ' +
|
|
216
|
+
'Pass `indexPath` explicitly or supply a filesystem-backed ' +
|
|
217
|
+
'storage in `options.storage` so the wrapper can derive ' +
|
|
218
|
+
`the canonical path \`{root}/${DEFAULT_INDEX_PATH_KEY}\`.`);
|
|
219
|
+
}
|
|
220
|
+
this.config = { ...merged, indexPath: resolvedIndexPath };
|
|
221
|
+
this.distanceFunction = distanceFunction;
|
|
222
|
+
this.storage = options.storage ?? null;
|
|
223
|
+
this.persistMode = options.persistMode ?? 'immediate';
|
|
224
|
+
// Try to open an existing file. If absent, the index stays
|
|
225
|
+
// empty until the first rebuild() flushes the delta buffer.
|
|
226
|
+
this.tryOpenExisting();
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Append an entry to the delta buffer. Persisted by the next
|
|
230
|
+
* `rebuild()` call, which folds the delta into the main index.
|
|
231
|
+
*/
|
|
232
|
+
async addItem(item) {
|
|
233
|
+
if (this.tombstones.has(item.id)) {
|
|
234
|
+
this.tombstones.delete(item.id);
|
|
235
|
+
}
|
|
236
|
+
this.delta.set(item.id, item.vector);
|
|
237
|
+
return item.id;
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Mark an entry as removed. Filtered out at search time; physically
|
|
241
|
+
* removed at the next `rebuild()`.
|
|
242
|
+
*/
|
|
243
|
+
async removeItem(id) {
|
|
244
|
+
const inDelta = this.delta.delete(id);
|
|
245
|
+
const inMain = this.slotByUuid.has(id);
|
|
246
|
+
if (inMain)
|
|
247
|
+
this.tombstones.add(id);
|
|
248
|
+
return inDelta || inMain;
|
|
249
|
+
}
|
|
250
|
+
async search(queryVector, k = 10, filter, options) {
|
|
251
|
+
// Predicate pushdown: a SELECTIVE metadata∩graph universe is evaluated
|
|
252
|
+
// INSIDE the native walk rather than dropped afterwards.
|
|
253
|
+
const allowedIds = options?.allowedIds;
|
|
254
|
+
if (allowedIds && this.native && allowedIds.size > 0) {
|
|
255
|
+
const mainSize = this.native.size();
|
|
256
|
+
const selectivity = mainSize > 0 ? allowedIds.size / mainSize : 1;
|
|
257
|
+
if (selectivity <= PUSHDOWN_MAX_SELECTIVITY) {
|
|
258
|
+
return this.searchPushdown(queryVector, k, allowedIds, filter, options);
|
|
259
|
+
}
|
|
260
|
+
// Not selective enough to push down — fold allowedIds into the
|
|
261
|
+
// post-filter predicate so the result semantics are identical.
|
|
262
|
+
const inner = filter;
|
|
263
|
+
filter = async (id) => {
|
|
264
|
+
if (!allowedIds.has(id))
|
|
265
|
+
return false;
|
|
266
|
+
return inner ? inner(id) : true;
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
const lSearch = Math.max(this.config.defaultLSearch, k * 2);
|
|
270
|
+
const padding = options?.rerank?.multiplier ?? this.config.defaultPaddingFactor;
|
|
271
|
+
// 1. Main-index PQ-greedy walk (returns slot ids).
|
|
272
|
+
const mainHits = this.native
|
|
273
|
+
? this.native.search(Array.from(queryVector), k * 2, // over-fetch so filter / tombstone losses don't starve final result
|
|
274
|
+
lSearch, padding)
|
|
275
|
+
: [];
|
|
276
|
+
// 2. Hydrate slot → uuid; drop tombstoned + filter-rejected.
|
|
277
|
+
const merged = [];
|
|
278
|
+
for (const hit of mainHits) {
|
|
279
|
+
const uuid = this.uuidBySlot.get(hit.slot);
|
|
280
|
+
if (!uuid)
|
|
281
|
+
continue;
|
|
282
|
+
if (this.tombstones.has(uuid))
|
|
283
|
+
continue;
|
|
284
|
+
if (filter && !(await filter(uuid)))
|
|
285
|
+
continue;
|
|
286
|
+
merged.push([uuid, hit.distance]);
|
|
287
|
+
}
|
|
288
|
+
// 3. Brute-force the delta buffer.
|
|
289
|
+
for (const [id, v] of this.delta) {
|
|
290
|
+
if (filter && !(await filter(id)))
|
|
291
|
+
continue;
|
|
292
|
+
const d = this.distanceFunction(queryVector, v);
|
|
293
|
+
merged.push([id, d]);
|
|
294
|
+
}
|
|
295
|
+
// 4. Sort ascending by distance, truncate to k.
|
|
296
|
+
merged.sort((a, b) => a[1] - b[1]);
|
|
297
|
+
return merged.slice(0, k);
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Predicate-pushdown search (#10). Translates the metadata∩graph entity
|
|
301
|
+
* universe to main-index slots, pushes them into the native beam walk
|
|
302
|
+
* (which traverses *through* all nodes but only collects allowed slots),
|
|
303
|
+
* and inflates `l_search` by ~1/selectivity so a selective predicate still
|
|
304
|
+
* surfaces k allowed neighbours. Delta-buffer entries (no slot yet) are
|
|
305
|
+
* brute-forced. This is the high-recall path for selective filtered
|
|
306
|
+
* `find()` — post-filtering the unfiltered top-k loses recall when few of
|
|
307
|
+
* those candidates pass the predicate.
|
|
308
|
+
*
|
|
309
|
+
* @param queryVector - The query embedding.
|
|
310
|
+
* @param k - Number of nearest allowed neighbours to return.
|
|
311
|
+
* @param allowedIds - Entity UUIDs permitted in the result (the find() universe).
|
|
312
|
+
* @param filter - Optional additional async predicate, composed with allowedIds.
|
|
313
|
+
* @param options - Optional rerank multiplier override.
|
|
314
|
+
* @returns Up to k `[uuid, distance]` pairs, ascending by distance.
|
|
315
|
+
*/
|
|
316
|
+
async searchPushdown(queryVector, k, allowedIds, filter, options) {
|
|
317
|
+
const padding = options?.rerank?.multiplier ?? this.config.defaultPaddingFactor;
|
|
318
|
+
const merged = [];
|
|
319
|
+
// 1. Native filtered walk over the main index (allowed slots only).
|
|
320
|
+
if (this.native) {
|
|
321
|
+
const allowedSlots = [];
|
|
322
|
+
for (const id of allowedIds) {
|
|
323
|
+
const slot = this.slotByUuid.get(id);
|
|
324
|
+
if (slot !== undefined)
|
|
325
|
+
allowedSlots.push(slot);
|
|
326
|
+
}
|
|
327
|
+
if (allowedSlots.length > 0) {
|
|
328
|
+
const mainSize = this.native.size();
|
|
329
|
+
const selectivity = allowedSlots.length / Math.max(1, mainSize);
|
|
330
|
+
// Over-fetch so tombstone + extra-filter losses don't starve k.
|
|
331
|
+
const nativeK = Math.min(allowedSlots.length, Math.max(k * 2, k));
|
|
332
|
+
// Walk width needed to surface k allowed neighbours (~k/selectivity).
|
|
333
|
+
const idealL = Math.ceil((k / Math.max(selectivity, 1e-9)) * 1.5);
|
|
334
|
+
// Exact-scan when the allowed set is small enough to scan cheaply, OR
|
|
335
|
+
// when the walk would be capped (extreme selectivity). Both regimes:
|
|
336
|
+
// brute is faster than the walk AND exact (recall 1.0). The walk only
|
|
337
|
+
// wins for large allowed sets at higher selectivity, where its L stays
|
|
338
|
+
// small. (Measured on SIFT: brute beats the walk up to ~32k allowed.)
|
|
339
|
+
const useBrute = allowedSlots.length <= PUSHDOWN_BRUTE_MAX_ALLOWED || idealL > PUSHDOWN_MAX_LSEARCH;
|
|
340
|
+
let hits;
|
|
341
|
+
if (useBrute) {
|
|
342
|
+
// Exact scan of the allowed set: recall 1.0, O(|allowed|·dim).
|
|
343
|
+
hits = this.native.searchExactSlots(Array.from(queryVector), nativeK, allowedSlots);
|
|
344
|
+
}
|
|
345
|
+
else {
|
|
346
|
+
// Medium selectivity: filtered graph walk with selectivity-tuned L
|
|
347
|
+
// (never capped here, so the bounded candidate list stays cheap).
|
|
348
|
+
const lSearch = Math.max(this.config.defaultLSearch, k * 2, idealL);
|
|
349
|
+
hits = this.native.searchFiltered(Array.from(queryVector), nativeK, allowedSlots, lSearch, padding);
|
|
350
|
+
}
|
|
351
|
+
for (const hit of hits) {
|
|
352
|
+
const uuid = this.uuidBySlot.get(hit.slot);
|
|
353
|
+
if (!uuid)
|
|
354
|
+
continue;
|
|
355
|
+
if (this.tombstones.has(uuid))
|
|
356
|
+
continue;
|
|
357
|
+
if (filter && !(await filter(uuid)))
|
|
358
|
+
continue;
|
|
359
|
+
merged.push([uuid, hit.distance]);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
// 2. Brute-force the delta buffer (allowed entries only).
|
|
364
|
+
for (const [id, v] of this.delta) {
|
|
365
|
+
if (!allowedIds.has(id))
|
|
366
|
+
continue;
|
|
367
|
+
if (filter && !(await filter(id)))
|
|
368
|
+
continue;
|
|
369
|
+
const d = this.distanceFunction(queryVector, v);
|
|
370
|
+
merged.push([id, d]);
|
|
371
|
+
}
|
|
372
|
+
// 3. Sort ascending by distance, truncate to k.
|
|
373
|
+
merged.sort((a, b) => a[1] - b[1]);
|
|
374
|
+
return merged.slice(0, k);
|
|
375
|
+
}
|
|
376
|
+
size() {
|
|
377
|
+
const mainSize = this.native ? this.native.size() : 0;
|
|
378
|
+
return (mainSize +
|
|
379
|
+
this.delta.size -
|
|
380
|
+
// Tombstones from the main index reduce effective size.
|
|
381
|
+
this.countMainTombstones());
|
|
382
|
+
}
|
|
383
|
+
clear() {
|
|
384
|
+
this.delta.clear();
|
|
385
|
+
this.tombstones.clear();
|
|
386
|
+
this.slotByUuid.clear();
|
|
387
|
+
this.uuidBySlot.clear();
|
|
388
|
+
this.native = null;
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Rebuild the main index from scratch: concatenate (current main −
|
|
392
|
+
* tombstones) ∪ delta, run a full DiskANN build, swap the searcher
|
|
393
|
+
* atomically.
|
|
394
|
+
*
|
|
395
|
+
* At billion-scale this is the expensive operation (hours of build
|
|
396
|
+
* time). Operators schedule it during off-peak; the delta buffer
|
|
397
|
+
* absorbs writes in between.
|
|
398
|
+
*
|
|
399
|
+
* The optional `recall` argument overrides the wrapper's
|
|
400
|
+
* construction-time recall preset for this rebuild only — useful
|
|
401
|
+
* when an operator wants to ship a higher-quality index after a
|
|
402
|
+
* data-quality push but kept the brain on `'balanced'` originally.
|
|
403
|
+
* Omit to keep the wrapper's current preset.
|
|
404
|
+
*/
|
|
405
|
+
async rebuild(options) {
|
|
406
|
+
const bindings = loadNativeModule();
|
|
407
|
+
// napi-rs exports the class as `NativeDiskAnn` (PascalCase
|
|
408
|
+
// normalization of the Rust ident `NativeDiskANN`). The TS type
|
|
409
|
+
// alias `NativeDiskANN = NativeDiskAnn` in `native/index.d.ts` is
|
|
410
|
+
// for backwards-compat in *types* only — at runtime there's a
|
|
411
|
+
// single export under the napi-normalized name.
|
|
412
|
+
const NativeDiskANN = bindings.NativeDiskAnn;
|
|
413
|
+
if (!NativeDiskANN) {
|
|
414
|
+
throw new Error('NativeDiskANN binding missing — rebuild requires the cor native module');
|
|
415
|
+
}
|
|
416
|
+
// Build the new logical slot ordering: (live old slots) + (delta).
|
|
417
|
+
// **Critical for billion-scale correctness**: the old vectors stay
|
|
418
|
+
// mmap'd inside the native module — we only pass slot IDs across
|
|
419
|
+
// the FFI boundary, not the vector data itself. At 1B × 1536 × 4
|
|
420
|
+
// bytes = ~6 TB this is the difference between "rebuild works" and
|
|
421
|
+
// "rebuild OOMs."
|
|
422
|
+
const liveOldSlots = [];
|
|
423
|
+
const newUuids = [];
|
|
424
|
+
if (this.native) {
|
|
425
|
+
// Iterate in slot order so the new index's first n_live slots
|
|
426
|
+
// mirror the OLD index's surviving subset in deterministic order.
|
|
427
|
+
// We deliberately iterate by sorted slot id rather than uuidBySlot
|
|
428
|
+
// insertion order — sorting keeps the Vamana entry point stable
|
|
429
|
+
// and the on-disk vector section's locality similar to the
|
|
430
|
+
// pre-rebuild file (less page-cache turnover during the post-
|
|
431
|
+
// rebuild warm-up).
|
|
432
|
+
const sortedSlots = Array.from(this.uuidBySlot.keys()).sort((a, b) => a - b);
|
|
433
|
+
for (const slot of sortedSlots) {
|
|
434
|
+
const uuid = this.uuidBySlot.get(slot);
|
|
435
|
+
if (this.tombstones.has(uuid))
|
|
436
|
+
continue;
|
|
437
|
+
liveOldSlots.push(slot);
|
|
438
|
+
newUuids.push(uuid);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
const dim = this.config.dimensions;
|
|
442
|
+
const deltaCount = this.delta.size;
|
|
443
|
+
let deltaBuf = null;
|
|
444
|
+
if (deltaCount > 0) {
|
|
445
|
+
deltaBuf = new Float32Array(deltaCount * dim);
|
|
446
|
+
let idx = 0;
|
|
447
|
+
for (const [uuid, vector] of this.delta) {
|
|
448
|
+
if (vector.length !== dim) {
|
|
449
|
+
throw new Error(`NativeDiskAnnWrapper.rebuild: vector dim ${vector.length} ≠ index dim ${dim}`);
|
|
450
|
+
}
|
|
451
|
+
deltaBuf.set(vector, idx * dim);
|
|
452
|
+
newUuids.push(uuid);
|
|
453
|
+
idx++;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
if (liveOldSlots.length + deltaCount === 0) {
|
|
457
|
+
prodLog?.warn?.('NativeDiskAnnWrapper.rebuild: nothing to build');
|
|
458
|
+
return;
|
|
459
|
+
}
|
|
460
|
+
const totalCount = liveOldSlots.length + deltaCount;
|
|
461
|
+
// Apply optional recall override for this rebuild only.
|
|
462
|
+
if (options?.recall) {
|
|
463
|
+
const preset = RECALL_PRESETS[options.recall];
|
|
464
|
+
this.config = { ...this.config, ...preset };
|
|
465
|
+
}
|
|
466
|
+
// **Adaptive DiskANN mode selection (Piece I)** — pick the
|
|
467
|
+
// build-time mode based on observed memory unless the caller
|
|
468
|
+
// has explicitly pinned it on the config. The selector
|
|
469
|
+
// chooses "in-memory" for brains that fit in RAM (no PQ,
|
|
470
|
+
// sub-ms search), "hybrid" for medium scale, "on-disk" when
|
|
471
|
+
// memory is tight or many brains share the box.
|
|
472
|
+
const maxDegree = this.config.maxDegree;
|
|
473
|
+
const buildMode = this.resolveBuildMode({
|
|
474
|
+
nodeCount: totalCount,
|
|
475
|
+
dim,
|
|
476
|
+
maxDegree,
|
|
477
|
+
});
|
|
478
|
+
this.lastSelection = buildMode.selection;
|
|
479
|
+
const cfg = {
|
|
480
|
+
mode: buildMode.mode,
|
|
481
|
+
vamana: {
|
|
482
|
+
maxDegree,
|
|
483
|
+
searchListSize: this.config.searchListSize,
|
|
484
|
+
alpha: this.config.alpha,
|
|
485
|
+
seed: BigInt(0xd15ca4440ffff00dn),
|
|
486
|
+
parallel: true,
|
|
487
|
+
parallelBatch: 64,
|
|
488
|
+
},
|
|
489
|
+
// pq config is read only when mode is "hybrid" or "on-disk";
|
|
490
|
+
// the napi surface ignores it for "in-memory". The wrapper's
|
|
491
|
+
// internal DEFAULTS supply pqM / pqKsub for the PQ modes.
|
|
492
|
+
pq: {
|
|
493
|
+
m: this.config.pqM,
|
|
494
|
+
ksub: this.config.pqKsub,
|
|
495
|
+
iterations: 25,
|
|
496
|
+
trainingSample: Math.min(200_000, totalCount),
|
|
497
|
+
},
|
|
498
|
+
adjacency: this.resolveAdjacencyBackend(totalCount),
|
|
499
|
+
};
|
|
500
|
+
// Ensure the .dkann's parent dir exists — the native build writes straight
|
|
501
|
+
// to outputPath and won't create intermediate dirs (e.g. _system/vector-index).
|
|
502
|
+
mkdirSync(dirname(this.config.indexPath), { recursive: true });
|
|
503
|
+
const newNative = NativeDiskANN.rebuildFromExisting({
|
|
504
|
+
existingPath: this.native ? this.config.indexPath : undefined,
|
|
505
|
+
liveOldSlots,
|
|
506
|
+
deltaVectors: deltaBuf != null
|
|
507
|
+
? Buffer.from(deltaBuf.buffer, deltaBuf.byteOffset, deltaBuf.byteLength)
|
|
508
|
+
: undefined,
|
|
509
|
+
deltaCount,
|
|
510
|
+
dim,
|
|
511
|
+
outputPath: this.config.indexPath,
|
|
512
|
+
cfg,
|
|
513
|
+
});
|
|
514
|
+
// Rebuild the bidirectional UUID↔slot maps from `newUuids`. New
|
|
515
|
+
// slot `i` corresponds to `newUuids[i]` — this matches the napi
|
|
516
|
+
// layout invariant (live old slots first, delta tail second).
|
|
517
|
+
const newSlotByUuid = new Map();
|
|
518
|
+
const newUuidBySlot = new Map();
|
|
519
|
+
for (let i = 0; i < newUuids.length; i++) {
|
|
520
|
+
newSlotByUuid.set(newUuids[i], i);
|
|
521
|
+
newUuidBySlot.set(i, newUuids[i]);
|
|
522
|
+
}
|
|
523
|
+
// Atomic swap.
|
|
524
|
+
this.native = newNative;
|
|
525
|
+
this.slotByUuid = newSlotByUuid;
|
|
526
|
+
this.uuidBySlot = newUuidBySlot;
|
|
527
|
+
this.delta.clear();
|
|
528
|
+
this.tombstones.clear();
|
|
529
|
+
// Persist the slot map (slot i → uuid) next to the .dkann so a cold restart
|
|
530
|
+
// can rehydrate it (tryOpenExisting/loadSlots). Without this the reopened
|
|
531
|
+
// index can't map results to entity IDs → vector search returns empty.
|
|
532
|
+
this.persistSlots(newUuids);
|
|
533
|
+
}
|
|
534
|
+
/**
|
|
535
|
+
* Flush the delta buffer to disk. For DiskANN the delta is in-memory
|
|
536
|
+
* by design (a few MB at most between rebuilds); returns the buffer
|
|
537
|
+
* size for parity with HNSW's flush contract.
|
|
538
|
+
*/
|
|
539
|
+
async flush() {
|
|
540
|
+
return this.delta.size;
|
|
541
|
+
}
|
|
542
|
+
getPersistMode() {
|
|
543
|
+
return this.persistMode;
|
|
544
|
+
}
|
|
545
|
+
tryOpenExisting() {
|
|
546
|
+
try {
|
|
547
|
+
const bindings = loadNativeModule();
|
|
548
|
+
// napi-rs exports the class as `NativeDiskAnn` (PascalCase
|
|
549
|
+
// normalization of the Rust ident `NativeDiskANN`). The TS
|
|
550
|
+
// type alias `NativeDiskANN = NativeDiskAnn` in
|
|
551
|
+
// `native/index.d.ts` is for backwards-compat in *types*
|
|
552
|
+
// only — at runtime there's a single export under the
|
|
553
|
+
// napi-normalized name.
|
|
554
|
+
const NativeDiskANN = bindings.NativeDiskAnn;
|
|
555
|
+
if (!NativeDiskANN)
|
|
556
|
+
return;
|
|
557
|
+
// **Adaptive DiskANN open-mode selection (Piece I).** Open
|
|
558
|
+
// with Auto first to read the header; the selector then
|
|
559
|
+
// decides whether the file should be reopened with a
|
|
560
|
+
// different mode. Common case (Auto's resolution matches the
|
|
561
|
+
// selector's pick) is a single open; only mismatches pay the
|
|
562
|
+
// reopen cost.
|
|
563
|
+
let native = NativeDiskANN.openExisting(this.config.indexPath);
|
|
564
|
+
const header = native.header();
|
|
565
|
+
const stats = {
|
|
566
|
+
nodeCount: header.nodeCount,
|
|
567
|
+
dim: header.dim,
|
|
568
|
+
maxDegree: header.maxDegree,
|
|
569
|
+
pqM: header.pqM,
|
|
570
|
+
};
|
|
571
|
+
const selection = this.resolveOpenMode(stats);
|
|
572
|
+
this.lastSelection = selection.selection;
|
|
573
|
+
const autoMode = autoModeForHeader({ pqM: header.pqM });
|
|
574
|
+
if (selection.mode !== autoMode) {
|
|
575
|
+
native = NativeDiskANN.openExisting(this.config.indexPath, selection.mode);
|
|
576
|
+
}
|
|
577
|
+
this.native = native;
|
|
578
|
+
// Rehydrate the persisted slot map (slot i → uuid) that rebuild() writes
|
|
579
|
+
// next to the .dkann. The native index stores vectors by slot only, so
|
|
580
|
+
// without this map search() can't map hits back to entity IDs (returns
|
|
581
|
+
// empty on a cold restart) and rebuild() would enumerate zero existing
|
|
582
|
+
// entries and drop the persisted index. If the map is missing/corrupt the
|
|
583
|
+
// loaded index is unusable on its own — discard it so the next rebuild()
|
|
584
|
+
// regenerates both consistently.
|
|
585
|
+
if (!this.loadSlots()) {
|
|
586
|
+
this.native = null;
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
catch {
|
|
590
|
+
// No existing file — index stays empty until first rebuild().
|
|
591
|
+
this.native = null;
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
/** Sibling path for the persisted slot map, next to the `.dkann`. */
|
|
595
|
+
slotsPath() {
|
|
596
|
+
return this.config.indexPath.replace(/\.dkann$/, '.slots.json');
|
|
597
|
+
}
|
|
598
|
+
/**
|
|
599
|
+
* Persist the slot map (slot i → `uuids[i]`) next to the `.dkann` so a cold
|
|
600
|
+
* restart can rehydrate it (see {@link tryOpenExisting}). Atomic tmp+rename.
|
|
601
|
+
* Lives under the same `_system/` tree as the index, so it travels with
|
|
602
|
+
* `db.persist` and bypasses branch scoping just like the `.dkann`.
|
|
603
|
+
*/
|
|
604
|
+
persistSlots(uuids) {
|
|
605
|
+
try {
|
|
606
|
+
const p = this.slotsPath();
|
|
607
|
+
mkdirSync(dirname(p), { recursive: true });
|
|
608
|
+
const tmp = `${p}.tmp`;
|
|
609
|
+
writeFileSync(tmp, JSON.stringify(uuids));
|
|
610
|
+
renameSync(tmp, p);
|
|
611
|
+
}
|
|
612
|
+
catch (e) {
|
|
613
|
+
prodLog?.warn?.(`NativeDiskAnnWrapper: failed to persist slot map: ${String(e)}`);
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
/**
|
|
617
|
+
* Rehydrate `slotByUuid`/`uuidBySlot` from the persisted slot map. Returns
|
|
618
|
+
* `false` (caller discards the loaded index) when the map is absent or
|
|
619
|
+
* unreadable — the native index alone cannot map slots → entity IDs.
|
|
620
|
+
*/
|
|
621
|
+
loadSlots() {
|
|
622
|
+
try {
|
|
623
|
+
const p = this.slotsPath();
|
|
624
|
+
if (!existsSync(p))
|
|
625
|
+
return false;
|
|
626
|
+
const uuids = JSON.parse(readFileSync(p, 'utf8'));
|
|
627
|
+
if (!Array.isArray(uuids))
|
|
628
|
+
return false;
|
|
629
|
+
const slotByUuid = new Map();
|
|
630
|
+
const uuidBySlot = new Map();
|
|
631
|
+
for (let i = 0; i < uuids.length; i++) {
|
|
632
|
+
slotByUuid.set(uuids[i], i);
|
|
633
|
+
uuidBySlot.set(i, uuids[i]);
|
|
634
|
+
}
|
|
635
|
+
this.slotByUuid = slotByUuid;
|
|
636
|
+
this.uuidBySlot = uuidBySlot;
|
|
637
|
+
return true;
|
|
638
|
+
}
|
|
639
|
+
catch {
|
|
640
|
+
return false;
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
/**
|
|
644
|
+
* **Piece I.** Resolve the open-time mode for an existing file.
|
|
645
|
+
* Respects an explicit `config.mode` override; otherwise consults
|
|
646
|
+
* the adaptive selector.
|
|
647
|
+
*/
|
|
648
|
+
resolveOpenMode(stats) {
|
|
649
|
+
const explicit = this.config.mode;
|
|
650
|
+
if (explicit !== 'auto') {
|
|
651
|
+
// Synthesise a minimal ModeSelection so telemetry callers
|
|
652
|
+
// still see a populated lastSelection. estimatedBytes left
|
|
653
|
+
// at 0 — the override skipped the cost calc.
|
|
654
|
+
return {
|
|
655
|
+
mode: explicit,
|
|
656
|
+
selection: {
|
|
657
|
+
mode: explicit,
|
|
658
|
+
reason: 'no-pq-file',
|
|
659
|
+
estimatedBytes: 0,
|
|
660
|
+
perBrainAvailable: 0,
|
|
661
|
+
},
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
const selection = selectModeFromResourceManager(stats);
|
|
665
|
+
return { mode: selection.mode, selection };
|
|
666
|
+
}
|
|
667
|
+
/**
|
|
668
|
+
* **Piece I.** Resolve the build-time mode. Same override-respecting
|
|
669
|
+
* shape as {@link resolveOpenMode}; the build-time decision
|
|
670
|
+
* additionally lets the selector choose Mode 1 (no PQ) when the
|
|
671
|
+
* brain fits in RAM.
|
|
672
|
+
*/
|
|
673
|
+
resolveBuildMode(stats) {
|
|
674
|
+
const explicit = this.config.mode;
|
|
675
|
+
if (explicit !== 'auto') {
|
|
676
|
+
return {
|
|
677
|
+
mode: explicit,
|
|
678
|
+
selection: {
|
|
679
|
+
mode: explicit,
|
|
680
|
+
reason: explicit === 'in-memory' ? 'fits-in-memory' : 'codes-pinned',
|
|
681
|
+
estimatedBytes: 0,
|
|
682
|
+
perBrainAvailable: 0,
|
|
683
|
+
},
|
|
684
|
+
};
|
|
685
|
+
}
|
|
686
|
+
const selection = selectModeFromResourceManager(stats);
|
|
687
|
+
return { mode: selection.mode, selection };
|
|
688
|
+
}
|
|
689
|
+
/**
|
|
690
|
+
* **Piece I.** The selector's most recent decision (either at the
|
|
691
|
+
* last `tryOpenExisting` or `rebuild`). Returns `null` if no
|
|
692
|
+
* brain has been opened or built yet. Surface for telemetry and
|
|
693
|
+
* tests.
|
|
694
|
+
*/
|
|
695
|
+
getLastModeSelection() {
|
|
696
|
+
return this.lastSelection;
|
|
697
|
+
}
|
|
698
|
+
/**
|
|
699
|
+
* **Test-only hook** for exercising the build-adjacency resolver
|
|
700
|
+
* without spinning up a real build. Mirrors the
|
|
701
|
+
* `_testInjectOsMemoryProbe` pattern (Piece J); not part of the
|
|
702
|
+
* public contract.
|
|
703
|
+
* @internal
|
|
704
|
+
*/
|
|
705
|
+
_testResolveAdjacencyBackend(nodeCount) {
|
|
706
|
+
return this.resolveAdjacencyBackend(nodeCount);
|
|
707
|
+
}
|
|
708
|
+
/**
|
|
709
|
+
* Resolve the build-time adjacency backend from the config plus
|
|
710
|
+
* the actual node count. `useMmapAdjacency: 'auto'` (the default)
|
|
711
|
+
* picks file-backed once `nodeCount > MMAP_ADJACENCY_AUTO_THRESHOLD`;
|
|
712
|
+
* explicit `true` / `false` always wins. Per the zero-config
|
|
713
|
+
* principle, operators don't have to know the 100 M threshold —
|
|
714
|
+
* the wrapper observes `totalCount` and flips automatically.
|
|
715
|
+
*/
|
|
716
|
+
resolveAdjacencyBackend(nodeCount) {
|
|
717
|
+
const setting = this.config.useMmapAdjacency;
|
|
718
|
+
const useMmap = setting === 'auto'
|
|
719
|
+
? nodeCount > MMAP_ADJACENCY_AUTO_THRESHOLD
|
|
720
|
+
: setting === true;
|
|
721
|
+
if (useMmap) {
|
|
722
|
+
return {
|
|
723
|
+
kind: 'mmap',
|
|
724
|
+
mmapPath: this.config.mmapAdjacencyPath ?? `${this.config.indexPath}.adj`,
|
|
725
|
+
};
|
|
726
|
+
}
|
|
727
|
+
return { kind: 'ram' };
|
|
728
|
+
}
|
|
729
|
+
countMainTombstones() {
|
|
730
|
+
let n = 0;
|
|
731
|
+
for (const uuid of this.tombstones) {
|
|
732
|
+
if (this.slotByUuid.has(uuid))
|
|
733
|
+
n++;
|
|
734
|
+
}
|
|
735
|
+
return n;
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
//# sourceMappingURL=NativeDiskAnnWrapper.js.map
|