@soulcraft/cortex 2.5.1 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -181,40 +181,60 @@ export class NativeDiskAnnWrapper {
|
|
|
181
181
|
*/
|
|
182
182
|
async rebuild(options) {
|
|
183
183
|
const bindings = loadNativeModule();
|
|
184
|
-
|
|
184
|
+
// napi-rs exports the class as `NativeDiskAnn` (PascalCase
|
|
185
|
+
// normalization of the Rust ident `NativeDiskANN`). The TS type
|
|
186
|
+
// alias `NativeDiskANN = NativeDiskAnn` in `native/index.d.ts` is
|
|
187
|
+
// for backwards-compat in *types* only — at runtime there's a
|
|
188
|
+
// single export under the napi-normalized name.
|
|
189
|
+
const NativeDiskANN = bindings.NativeDiskAnn;
|
|
185
190
|
if (!NativeDiskANN) {
|
|
186
191
|
throw new Error('NativeDiskANN binding missing — rebuild requires the cortex native module');
|
|
187
192
|
}
|
|
188
|
-
//
|
|
189
|
-
|
|
193
|
+
// Build the new logical slot ordering: (live old slots) + (delta).
|
|
194
|
+
// **Critical for billion-scale correctness**: the old vectors stay
|
|
195
|
+
// mmap'd inside the native module — we only pass slot IDs across
|
|
196
|
+
// the FFI boundary, not the vector data itself. At 1B × 1536 × 4
|
|
197
|
+
// bytes = ~6 TB this is the difference between "rebuild works" and
|
|
198
|
+
// "rebuild OOMs."
|
|
199
|
+
const liveOldSlots = [];
|
|
200
|
+
const newUuids = [];
|
|
190
201
|
if (this.native) {
|
|
191
|
-
// Iterate
|
|
192
|
-
//
|
|
193
|
-
//
|
|
194
|
-
//
|
|
195
|
-
//
|
|
196
|
-
//
|
|
202
|
+
// Iterate in slot order so the new index's first n_live slots
|
|
203
|
+
// mirror the OLD index's surviving subset in deterministic order.
|
|
204
|
+
// We deliberately iterate by sorted slot id rather than uuidBySlot
|
|
205
|
+
// insertion order — sorting keeps the Vamana entry point stable
|
|
206
|
+
// and the on-disk vector section's locality similar to the
|
|
207
|
+
// pre-rebuild file (less page-cache turnover during the post-
|
|
208
|
+
// rebuild warm-up).
|
|
209
|
+
const sortedSlots = Array.from(this.uuidBySlot.keys()).sort((a, b) => a - b);
|
|
210
|
+
for (const slot of sortedSlots) {
|
|
211
|
+
const uuid = this.uuidBySlot.get(slot);
|
|
212
|
+
if (this.tombstones.has(uuid))
|
|
213
|
+
continue;
|
|
214
|
+
liveOldSlots.push(slot);
|
|
215
|
+
newUuids.push(uuid);
|
|
216
|
+
}
|
|
197
217
|
}
|
|
198
|
-
|
|
199
|
-
|
|
218
|
+
const dim = this.config.dimensions;
|
|
219
|
+
const deltaCount = this.delta.size;
|
|
220
|
+
let deltaBuf = null;
|
|
221
|
+
if (deltaCount > 0) {
|
|
222
|
+
deltaBuf = new Float32Array(deltaCount * dim);
|
|
223
|
+
let idx = 0;
|
|
224
|
+
for (const [uuid, vector] of this.delta) {
|
|
225
|
+
if (vector.length !== dim) {
|
|
226
|
+
throw new Error(`NativeDiskAnnWrapper.rebuild: vector dim ${vector.length} ≠ index dim ${dim}`);
|
|
227
|
+
}
|
|
228
|
+
deltaBuf.set(vector, idx * dim);
|
|
229
|
+
newUuids.push(uuid);
|
|
230
|
+
idx++;
|
|
231
|
+
}
|
|
200
232
|
}
|
|
201
|
-
if (
|
|
233
|
+
if (liveOldSlots.length + deltaCount === 0) {
|
|
202
234
|
prodLog?.warn?.('NativeDiskAnnWrapper.rebuild: nothing to build');
|
|
203
235
|
return;
|
|
204
236
|
}
|
|
205
|
-
const
|
|
206
|
-
const buf = new Float32Array(allVectors.length * dim);
|
|
207
|
-
const newSlotByUuid = new Map();
|
|
208
|
-
const newUuidBySlot = new Map();
|
|
209
|
-
for (let i = 0; i < allVectors.length; i++) {
|
|
210
|
-
const v = allVectors[i].vector;
|
|
211
|
-
if (v.length !== dim) {
|
|
212
|
-
throw new Error(`NativeDiskAnnWrapper.rebuild: vector dim ${v.length} ≠ index dim ${dim}`);
|
|
213
|
-
}
|
|
214
|
-
buf.set(v, i * dim);
|
|
215
|
-
newSlotByUuid.set(allVectors[i].id, i);
|
|
216
|
-
newUuidBySlot.set(i, allVectors[i].id);
|
|
217
|
-
}
|
|
237
|
+
const totalCount = liveOldSlots.length + deltaCount;
|
|
218
238
|
const cfg = {
|
|
219
239
|
vamana: {
|
|
220
240
|
maxDegree: options?.maxDegree ?? this.config.maxDegree,
|
|
@@ -228,7 +248,7 @@ export class NativeDiskAnnWrapper {
|
|
|
228
248
|
m: options?.pqM ?? this.config.pqM,
|
|
229
249
|
ksub: options?.pqKsub ?? this.config.pqKsub,
|
|
230
250
|
iterations: 25,
|
|
231
|
-
trainingSample: Math.min(200_000,
|
|
251
|
+
trainingSample: Math.min(200_000, totalCount),
|
|
232
252
|
},
|
|
233
253
|
adjacency: this.config.useMmapAdjacency
|
|
234
254
|
? {
|
|
@@ -237,9 +257,27 @@ export class NativeDiskAnnWrapper {
|
|
|
237
257
|
}
|
|
238
258
|
: { kind: 'ram' },
|
|
239
259
|
};
|
|
240
|
-
const newNative = NativeDiskANN.
|
|
241
|
-
|
|
242
|
-
|
|
260
|
+
const newNative = NativeDiskANN.rebuildFromExisting({
|
|
261
|
+
existingPath: this.native ? this.config.indexPath : undefined,
|
|
262
|
+
liveOldSlots,
|
|
263
|
+
deltaVectors: deltaBuf != null
|
|
264
|
+
? Buffer.from(deltaBuf.buffer, deltaBuf.byteOffset, deltaBuf.byteLength)
|
|
265
|
+
: undefined,
|
|
266
|
+
deltaCount,
|
|
267
|
+
dim,
|
|
268
|
+
outputPath: this.config.indexPath,
|
|
269
|
+
cfg,
|
|
270
|
+
});
|
|
271
|
+
// Rebuild the bidirectional UUID↔slot maps from `newUuids`. New
|
|
272
|
+
// slot `i` corresponds to `newUuids[i]` — this matches the napi
|
|
273
|
+
// layout invariant (live old slots first, delta tail second).
|
|
274
|
+
const newSlotByUuid = new Map();
|
|
275
|
+
const newUuidBySlot = new Map();
|
|
276
|
+
for (let i = 0; i < newUuids.length; i++) {
|
|
277
|
+
newSlotByUuid.set(newUuids[i], i);
|
|
278
|
+
newUuidBySlot.set(i, newUuids[i]);
|
|
279
|
+
}
|
|
280
|
+
// Atomic swap.
|
|
243
281
|
this.native = newNative;
|
|
244
282
|
this.slotByUuid = newSlotByUuid;
|
|
245
283
|
this.uuidBySlot = newUuidBySlot;
|
|
@@ -260,7 +298,12 @@ export class NativeDiskAnnWrapper {
|
|
|
260
298
|
tryOpenExisting() {
|
|
261
299
|
try {
|
|
262
300
|
const bindings = loadNativeModule();
|
|
263
|
-
|
|
301
|
+
// napi-rs exports the class as `NativeDiskAnn` (PascalCase
|
|
302
|
+
// normalization of the Rust ident `NativeDiskANN`). The TS type
|
|
303
|
+
// alias `NativeDiskANN = NativeDiskAnn` in `native/index.d.ts` is
|
|
304
|
+
// for backwards-compat in *types* only — at runtime there's a
|
|
305
|
+
// single export under the napi-normalized name.
|
|
306
|
+
const NativeDiskANN = bindings.NativeDiskAnn;
|
|
264
307
|
if (!NativeDiskANN)
|
|
265
308
|
return;
|
|
266
309
|
this.native = NativeDiskANN.openExisting(this.config.indexPath);
|
|
@@ -26,6 +26,26 @@
|
|
|
26
26
|
* (256 sharded per-UUID mutexes in the native layer). Lookups are
|
|
27
27
|
* lock-free. The wrapper holds no JS-side mutable state besides the
|
|
28
28
|
* native handle.
|
|
29
|
+
*
|
|
30
|
+
* ## IdSpace (Piece 10)
|
|
31
|
+
*
|
|
32
|
+
* The wrapper supports two entity-int wire widths:
|
|
33
|
+
*
|
|
34
|
+
* - `'u32'` (default): cortex 2.x compatible — JS `number` throughout,
|
|
35
|
+
* capped at 4.29 B entities. Persists in the legacy `int_to_uuid.bin`
|
|
36
|
+
* v1 header.
|
|
37
|
+
* - `'u64'`: opt-in via `idSpace: 'u64'`. The native layer's `number`
|
|
38
|
+
* surface throws in this mode, so the wrapper transparently routes
|
|
39
|
+
* the `EntityIdMapperProvider` methods through the BigInt napi
|
|
40
|
+
* siblings and converts BigInt → number at the boundary. Entity ints
|
|
41
|
+
* above `Number.MAX_SAFE_INTEGER` (2^53 - 1 = ~9 PB of entities)
|
|
42
|
+
* throw a clear `EntityIdSpaceExceeded` error so callers get a loud
|
|
43
|
+
* failure rather than a silent precision loss.
|
|
44
|
+
*
|
|
45
|
+
* The `getOrAssignBig` / `getIntBig` / `getUuidBig` / `sizeBig`
|
|
46
|
+
* sibling methods are available on both modes and always return
|
|
47
|
+
* `bigint` losslessly — use them in u64-aware code paths that need
|
|
48
|
+
* the full u64 range.
|
|
29
49
|
*/
|
|
30
50
|
import type { StorageAdapter } from '@soulcraft/brainy';
|
|
31
51
|
import type { EntityIdMapperProvider } from '../providerContracts.js';
|
|
@@ -50,6 +70,33 @@ export interface NativeBinaryEntityIdMapperOptions {
|
|
|
50
70
|
bucketCapacity?: number;
|
|
51
71
|
/** Maximum extendible-hash directory depth. Default 28. */
|
|
52
72
|
maxGlobalDepth?: number;
|
|
73
|
+
/**
|
|
74
|
+
* Entity-int wire width. `'u32'` (default) caps at 4.29 B entities
|
|
75
|
+
* and is cortex 2.x compatible. `'u64'` opts into the Piece 10 U64
|
|
76
|
+
* IdSpace — required when targeting >4.29 B entities. The
|
|
77
|
+
* `EntityIdMapperProvider` `number`-typed methods still work in U64
|
|
78
|
+
* mode by routing through the BigInt napi siblings; entity ints
|
|
79
|
+
* above `Number.MAX_SAFE_INTEGER` (2^53 - 1) throw an explicit
|
|
80
|
+
* `EntityIdSpaceExceeded` error.
|
|
81
|
+
*
|
|
82
|
+
* Ignored on open when the underlying file's header disagrees: the
|
|
83
|
+
* on-disk format wins and any mismatch is surfaced as a hard error.
|
|
84
|
+
*/
|
|
85
|
+
idSpace?: 'u32' | 'u64';
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Thrown when a U64-mode mapper allocates or returns an entity int
|
|
89
|
+
* above `Number.MAX_SAFE_INTEGER` (2^53 - 1). At this point a JS
|
|
90
|
+
* `number` can no longer represent the value losslessly; callers must
|
|
91
|
+
* switch to the BigInt sibling methods (`getOrAssignBig`,
|
|
92
|
+
* `getIntBig`, `getUuidBig`).
|
|
93
|
+
*/
|
|
94
|
+
export declare class EntityIdSpaceExceeded extends Error {
|
|
95
|
+
/** The u64 entity int that exceeded the safe-integer ceiling. */
|
|
96
|
+
readonly value: bigint;
|
|
97
|
+
/** The method that was called (`'getOrAssign'`, `'getInt'`, etc.). */
|
|
98
|
+
readonly method: string;
|
|
99
|
+
constructor(method: string, value: bigint);
|
|
53
100
|
}
|
|
54
101
|
/**
|
|
55
102
|
* Drop-in `EntityIdMapperProvider` backed by the native `BinaryIdMapper`.
|
|
@@ -70,19 +117,70 @@ export declare class NativeBinaryEntityIdMapperWrapper implements EntityIdMapper
|
|
|
70
117
|
private uuidToIntSize;
|
|
71
118
|
private bucketCapacity;
|
|
72
119
|
private maxGlobalDepth;
|
|
120
|
+
private requestedIdSpace;
|
|
121
|
+
/**
|
|
122
|
+
* The actual IdSpace of the open mapper, sourced from the native
|
|
123
|
+
* binding's `idSpace()` reflection after `init()`. The on-disk
|
|
124
|
+
* header wins over `requestedIdSpace` (which may be ignored at
|
|
125
|
+
* `openExisting` time).
|
|
126
|
+
*/
|
|
127
|
+
private resolvedIdSpace;
|
|
73
128
|
private native;
|
|
74
129
|
private initialized;
|
|
75
130
|
constructor(options: NativeBinaryEntityIdMapperOptions);
|
|
76
131
|
init(): Promise<void>;
|
|
132
|
+
/**
|
|
133
|
+
* Report the mapper's actual IdSpace mode. Returns `'u32'` before
|
|
134
|
+
* `init()` (the default the wrapper assumes); after init, returns the
|
|
135
|
+
* mode reported by the native binding (which is authoritative).
|
|
136
|
+
*/
|
|
137
|
+
getIdSpace(): 'u32' | 'u64';
|
|
138
|
+
/**
|
|
139
|
+
* Allocate or retrieve the entity int for `uuid`. Returns a JS
|
|
140
|
+
* `number`. In U64 mode, routes through the BigInt sibling and
|
|
141
|
+
* throws {@link EntityIdSpaceExceeded} if the allocated int exceeds
|
|
142
|
+
* `Number.MAX_SAFE_INTEGER` — at that point the caller MUST switch
|
|
143
|
+
* to `getOrAssignBig` for the full u64 range.
|
|
144
|
+
*/
|
|
77
145
|
getOrAssign(uuid: string): number;
|
|
146
|
+
/**
|
|
147
|
+
* Look up the UUID for `intId`. Accepts a JS `number` — in U64 mode
|
|
148
|
+
* this is a lossy conversion above 2^53; use {@link getUuidBig} for
|
|
149
|
+
* the full u64 range.
|
|
150
|
+
*/
|
|
78
151
|
getUuid(intId: number): string | undefined;
|
|
152
|
+
/**
|
|
153
|
+
* Look up the entity int for `uuid`. Returns a JS `number`. In U64
|
|
154
|
+
* mode throws {@link EntityIdSpaceExceeded} if the int exceeds
|
|
155
|
+
* `Number.MAX_SAFE_INTEGER`.
|
|
156
|
+
*/
|
|
79
157
|
getInt(uuid: string): number | undefined;
|
|
80
158
|
remove(uuid: string): boolean;
|
|
81
159
|
flush(): Promise<void>;
|
|
82
160
|
clear(): Promise<void>;
|
|
161
|
+
/**
|
|
162
|
+
* Materialise every live int id into a JS `number[]`. **U32 mode
|
|
163
|
+
* only.** U64 mode throws — the native binding refuses to allocate
|
|
164
|
+
* a giant JS array at the scale a U64 brain implies. Iterate via
|
|
165
|
+
* the BigInt sibling iterator (TBD — a follow-up surfaces it on the
|
|
166
|
+
* wrapper) for U64 brains.
|
|
167
|
+
*/
|
|
83
168
|
getAllIntIds(): number[];
|
|
84
169
|
intsIterableToUuids(ints: Iterable<number>): string[];
|
|
85
170
|
get size(): number;
|
|
171
|
+
/**
|
|
172
|
+
* Allocate or retrieve the entity int for `uuid` as a `bigint`.
|
|
173
|
+
* Lossless across the full u64 range; safe to call in either mode.
|
|
174
|
+
*/
|
|
175
|
+
getOrAssignBig(uuid: string): bigint;
|
|
176
|
+
/** Look up the entity int for `uuid` as a `bigint`. */
|
|
177
|
+
getIntBig(uuid: string): bigint | undefined;
|
|
178
|
+
/** Look up the UUID for `int` (passed as a `bigint`). */
|
|
179
|
+
getUuidBig(int: bigint): string | undefined;
|
|
180
|
+
/** Live (non-tombstone) entry count as a `bigint`. */
|
|
181
|
+
sizeBig(): bigint;
|
|
182
|
+
/** Largest int ever assigned + 1, as a `bigint`. */
|
|
183
|
+
nextIntBig(): bigint;
|
|
86
184
|
/**
|
|
87
185
|
* Encode a UUID string into a 16-byte Buffer. Accepts canonical
|
|
88
186
|
* 36-char form (with hyphens) or any 32-hex-digit form. Throws on
|
|
@@ -26,11 +26,64 @@
|
|
|
26
26
|
* (256 sharded per-UUID mutexes in the native layer). Lookups are
|
|
27
27
|
* lock-free. The wrapper holds no JS-side mutable state besides the
|
|
28
28
|
* native handle.
|
|
29
|
+
*
|
|
30
|
+
* ## IdSpace (Piece 10)
|
|
31
|
+
*
|
|
32
|
+
* The wrapper supports two entity-int wire widths:
|
|
33
|
+
*
|
|
34
|
+
* - `'u32'` (default): cortex 2.x compatible — JS `number` throughout,
|
|
35
|
+
* capped at 4.29 B entities. Persists in the legacy `int_to_uuid.bin`
|
|
36
|
+
* v1 header.
|
|
37
|
+
* - `'u64'`: opt-in via `idSpace: 'u64'`. The native layer's `number`
|
|
38
|
+
* surface throws in this mode, so the wrapper transparently routes
|
|
39
|
+
* the `EntityIdMapperProvider` methods through the BigInt napi
|
|
40
|
+
* siblings and converts BigInt → number at the boundary. Entity ints
|
|
41
|
+
* above `Number.MAX_SAFE_INTEGER` (2^53 - 1 = ~9 PB of entities)
|
|
42
|
+
* throw a clear `EntityIdSpaceExceeded` error so callers get a loud
|
|
43
|
+
* failure rather than a silent precision loss.
|
|
44
|
+
*
|
|
45
|
+
* The `getOrAssignBig` / `getIntBig` / `getUuidBig` / `sizeBig`
|
|
46
|
+
* sibling methods are available on both modes and always return
|
|
47
|
+
* `bigint` losslessly — use them in u64-aware code paths that need
|
|
48
|
+
* the full u64 range.
|
|
29
49
|
*/
|
|
30
50
|
import { existsSync } from 'node:fs';
|
|
31
51
|
import { loadNativeModule } from '../native/index.js';
|
|
32
52
|
import { prodLog } from '@soulcraft/brainy/internals';
|
|
33
53
|
const UUID_BYTES = 16;
|
|
54
|
+
/**
|
|
55
|
+
* Thrown when a U64-mode mapper allocates or returns an entity int
|
|
56
|
+
* above `Number.MAX_SAFE_INTEGER` (2^53 - 1). At this point a JS
|
|
57
|
+
* `number` can no longer represent the value losslessly; callers must
|
|
58
|
+
* switch to the BigInt sibling methods (`getOrAssignBig`,
|
|
59
|
+
* `getIntBig`, `getUuidBig`).
|
|
60
|
+
*/
|
|
61
|
+
export class EntityIdSpaceExceeded extends Error {
|
|
62
|
+
/** The u64 entity int that exceeded the safe-integer ceiling. */
|
|
63
|
+
value;
|
|
64
|
+
/** The method that was called (`'getOrAssign'`, `'getInt'`, etc.). */
|
|
65
|
+
method;
|
|
66
|
+
constructor(method, value) {
|
|
67
|
+
super(`${method}: entity int ${value} exceeds Number.MAX_SAFE_INTEGER ` +
|
|
68
|
+
`(2^53 - 1). Switch to the BigInt sibling method (${method}Big) ` +
|
|
69
|
+
`for entity ints above 9.007 PB.`);
|
|
70
|
+
this.name = 'EntityIdSpaceExceeded';
|
|
71
|
+
this.method = method;
|
|
72
|
+
this.value = value;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
const MAX_SAFE_INTEGER_BIG = BigInt(Number.MAX_SAFE_INTEGER);
|
|
76
|
+
/**
|
|
77
|
+
* Convert a `bigint` entity int to a JS `number`. Throws
|
|
78
|
+
* {@link EntityIdSpaceExceeded} if the value exceeds the JS safe-integer
|
|
79
|
+
* range.
|
|
80
|
+
*/
|
|
81
|
+
function bigToSafeNumber(value, method) {
|
|
82
|
+
if (value > MAX_SAFE_INTEGER_BIG) {
|
|
83
|
+
throw new EntityIdSpaceExceeded(method, value);
|
|
84
|
+
}
|
|
85
|
+
return Number(value);
|
|
86
|
+
}
|
|
34
87
|
const DEFAULT_UUID_TO_INT_KEY = '_id_mapper/uuid_to_int.mkv';
|
|
35
88
|
const DEFAULT_INT_TO_UUID_KEY = '_id_mapper/int_to_uuid.bin';
|
|
36
89
|
/**
|
|
@@ -52,6 +105,14 @@ export class NativeBinaryEntityIdMapperWrapper {
|
|
|
52
105
|
uuidToIntSize;
|
|
53
106
|
bucketCapacity;
|
|
54
107
|
maxGlobalDepth;
|
|
108
|
+
requestedIdSpace;
|
|
109
|
+
/**
|
|
110
|
+
* The actual IdSpace of the open mapper, sourced from the native
|
|
111
|
+
* binding's `idSpace()` reflection after `init()`. The on-disk
|
|
112
|
+
* header wins over `requestedIdSpace` (which may be ignored at
|
|
113
|
+
* `openExisting` time).
|
|
114
|
+
*/
|
|
115
|
+
resolvedIdSpace = 'u32';
|
|
55
116
|
native = null;
|
|
56
117
|
initialized = false;
|
|
57
118
|
constructor(options) {
|
|
@@ -62,6 +123,7 @@ export class NativeBinaryEntityIdMapperWrapper {
|
|
|
62
123
|
this.uuidToIntSize = options.uuidToIntSize ?? BigInt(32) * BigInt(1024) ** BigInt(3);
|
|
63
124
|
this.bucketCapacity = options.bucketCapacity ?? 16;
|
|
64
125
|
this.maxGlobalDepth = options.maxGlobalDepth ?? 28;
|
|
126
|
+
this.requestedIdSpace = options.idSpace ?? 'u32';
|
|
65
127
|
}
|
|
66
128
|
async init() {
|
|
67
129
|
if (this.initialized)
|
|
@@ -91,6 +153,7 @@ export class NativeBinaryEntityIdMapperWrapper {
|
|
|
91
153
|
uuidToIntSize: this.uuidToIntSize,
|
|
92
154
|
bucketCapacity: this.bucketCapacity,
|
|
93
155
|
maxGlobalDepth: this.maxGlobalDepth,
|
|
156
|
+
idSpace: this.requestedIdSpace,
|
|
94
157
|
};
|
|
95
158
|
// Explicitly distinguish "fresh install" from "existing files".
|
|
96
159
|
// Both files must exist together (paired write semantics) — a
|
|
@@ -111,24 +174,62 @@ export class NativeBinaryEntityIdMapperWrapper {
|
|
|
111
174
|
`${this.intToUuidKey} ${intFileExists ? 'exists' : 'missing'}. ` +
|
|
112
175
|
`Refusing to silently recreate; investigate manually.`);
|
|
113
176
|
}
|
|
177
|
+
// Reflect the on-disk IdSpace — authoritative over the requested
|
|
178
|
+
// value when openExisting opens a file with a different mode.
|
|
179
|
+
this.resolvedIdSpace = this.native.idSpace();
|
|
114
180
|
this.initialized = true;
|
|
115
181
|
if (prodLog?.debug) {
|
|
116
|
-
prodLog.debug(`[cortex] BinaryIdMapper wired: paths=[${uuidToIntPath}, ${intToUuidPath}]`);
|
|
182
|
+
prodLog.debug(`[cortex] BinaryIdMapper wired: paths=[${uuidToIntPath}, ${intToUuidPath}], idSpace=${this.resolvedIdSpace}`);
|
|
117
183
|
}
|
|
118
184
|
}
|
|
185
|
+
/**
|
|
186
|
+
* Report the mapper's actual IdSpace mode. Returns `'u32'` before
|
|
187
|
+
* `init()` (the default the wrapper assumes); after init, returns the
|
|
188
|
+
* mode reported by the native binding (which is authoritative).
|
|
189
|
+
*/
|
|
190
|
+
getIdSpace() {
|
|
191
|
+
return this.resolvedIdSpace;
|
|
192
|
+
}
|
|
193
|
+
// -- EntityIdMapperProvider surface (number-typed, brainy contract) --
|
|
194
|
+
/**
|
|
195
|
+
* Allocate or retrieve the entity int for `uuid`. Returns a JS
|
|
196
|
+
* `number`. In U64 mode, routes through the BigInt sibling and
|
|
197
|
+
* throws {@link EntityIdSpaceExceeded} if the allocated int exceeds
|
|
198
|
+
* `Number.MAX_SAFE_INTEGER` — at that point the caller MUST switch
|
|
199
|
+
* to `getOrAssignBig` for the full u64 range.
|
|
200
|
+
*/
|
|
119
201
|
getOrAssign(uuid) {
|
|
120
202
|
const native = this.ensure();
|
|
203
|
+
if (this.resolvedIdSpace === 'u64') {
|
|
204
|
+
return bigToSafeNumber(native.getOrAssignBig(this.encode(uuid)), 'getOrAssign');
|
|
205
|
+
}
|
|
121
206
|
return native.getOrAssign(this.encode(uuid));
|
|
122
207
|
}
|
|
208
|
+
/**
|
|
209
|
+
* Look up the UUID for `intId`. Accepts a JS `number` — in U64 mode
|
|
210
|
+
* this is a lossy conversion above 2^53; use {@link getUuidBig} for
|
|
211
|
+
* the full u64 range.
|
|
212
|
+
*/
|
|
123
213
|
getUuid(intId) {
|
|
124
214
|
const native = this.ensure();
|
|
125
|
-
const buf =
|
|
215
|
+
const buf = this.resolvedIdSpace === 'u64'
|
|
216
|
+
? native.getUuidBig(BigInt(intId))
|
|
217
|
+
: native.getUuid(intId);
|
|
126
218
|
if (!buf)
|
|
127
219
|
return undefined;
|
|
128
220
|
return this.decode(buf);
|
|
129
221
|
}
|
|
222
|
+
/**
|
|
223
|
+
* Look up the entity int for `uuid`. Returns a JS `number`. In U64
|
|
224
|
+
* mode throws {@link EntityIdSpaceExceeded} if the int exceeds
|
|
225
|
+
* `Number.MAX_SAFE_INTEGER`.
|
|
226
|
+
*/
|
|
130
227
|
getInt(uuid) {
|
|
131
228
|
const native = this.ensure();
|
|
229
|
+
if (this.resolvedIdSpace === 'u64') {
|
|
230
|
+
const big = native.getIntBig(this.encode(uuid));
|
|
231
|
+
return big == null ? undefined : bigToSafeNumber(big, 'getInt');
|
|
232
|
+
}
|
|
132
233
|
const out = native.getInt(this.encode(uuid));
|
|
133
234
|
return out == null ? undefined : out;
|
|
134
235
|
}
|
|
@@ -148,15 +249,28 @@ export class NativeBinaryEntityIdMapperWrapper {
|
|
|
148
249
|
this.native = null;
|
|
149
250
|
await this.init();
|
|
150
251
|
}
|
|
252
|
+
/**
|
|
253
|
+
* Materialise every live int id into a JS `number[]`. **U32 mode
|
|
254
|
+
* only.** U64 mode throws — the native binding refuses to allocate
|
|
255
|
+
* a giant JS array at the scale a U64 brain implies. Iterate via
|
|
256
|
+
* the BigInt sibling iterator (TBD — a follow-up surfaces it on the
|
|
257
|
+
* wrapper) for U64 brains.
|
|
258
|
+
*/
|
|
151
259
|
getAllIntIds() {
|
|
152
260
|
const native = this.ensure();
|
|
261
|
+
if (this.resolvedIdSpace === 'u64') {
|
|
262
|
+
throw new Error('getAllIntIds: not supported in U64 mode — the materialised ' +
|
|
263
|
+
'number[] would risk OOM at billion scale. Use the BigInt ' +
|
|
264
|
+
'streaming iterator on the underlying native binding instead.');
|
|
265
|
+
}
|
|
153
266
|
return native.getAllIntIds();
|
|
154
267
|
}
|
|
155
268
|
intsIterableToUuids(ints) {
|
|
156
269
|
const native = this.ensure();
|
|
270
|
+
const u64 = this.resolvedIdSpace === 'u64';
|
|
157
271
|
const out = [];
|
|
158
272
|
for (const i of ints) {
|
|
159
|
-
const buf = native.getUuid(i);
|
|
273
|
+
const buf = u64 ? native.getUuidBig(BigInt(i)) : native.getUuid(i);
|
|
160
274
|
if (buf)
|
|
161
275
|
out.push(this.decode(buf));
|
|
162
276
|
}
|
|
@@ -165,8 +279,44 @@ export class NativeBinaryEntityIdMapperWrapper {
|
|
|
165
279
|
get size() {
|
|
166
280
|
if (!this.initialized || !this.native)
|
|
167
281
|
return 0;
|
|
282
|
+
if (this.resolvedIdSpace === 'u64') {
|
|
283
|
+
return bigToSafeNumber(this.native.sizeBig(), 'size');
|
|
284
|
+
}
|
|
168
285
|
return this.native.size();
|
|
169
286
|
}
|
|
287
|
+
// -- BigInt sibling surface (u64-safe, works in both modes) -------
|
|
288
|
+
/**
|
|
289
|
+
* Allocate or retrieve the entity int for `uuid` as a `bigint`.
|
|
290
|
+
* Lossless across the full u64 range; safe to call in either mode.
|
|
291
|
+
*/
|
|
292
|
+
getOrAssignBig(uuid) {
|
|
293
|
+
const native = this.ensure();
|
|
294
|
+
return native.getOrAssignBig(this.encode(uuid));
|
|
295
|
+
}
|
|
296
|
+
/** Look up the entity int for `uuid` as a `bigint`. */
|
|
297
|
+
getIntBig(uuid) {
|
|
298
|
+
const native = this.ensure();
|
|
299
|
+
const out = native.getIntBig(this.encode(uuid));
|
|
300
|
+
return out == null ? undefined : out;
|
|
301
|
+
}
|
|
302
|
+
/** Look up the UUID for `int` (passed as a `bigint`). */
|
|
303
|
+
getUuidBig(int) {
|
|
304
|
+
const native = this.ensure();
|
|
305
|
+
const buf = native.getUuidBig(int);
|
|
306
|
+
if (!buf)
|
|
307
|
+
return undefined;
|
|
308
|
+
return this.decode(buf);
|
|
309
|
+
}
|
|
310
|
+
/** Live (non-tombstone) entry count as a `bigint`. */
|
|
311
|
+
sizeBig() {
|
|
312
|
+
if (!this.initialized || !this.native)
|
|
313
|
+
return 0n;
|
|
314
|
+
return this.native.sizeBig();
|
|
315
|
+
}
|
|
316
|
+
/** Largest int ever assigned + 1, as a `bigint`. */
|
|
317
|
+
nextIntBig() {
|
|
318
|
+
return this.ensure().nextIntBig();
|
|
319
|
+
}
|
|
170
320
|
// ---------------------------------------------------------------
|
|
171
321
|
// UUID string ↔ Buffer conversion
|
|
172
322
|
// ---------------------------------------------------------------
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/cortex",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.6.0",
|
|
4
4
|
"description": "Native Rust acceleration for Brainy — SIMD distance, vector quantization, zero-copy mmap, native embeddings. Free tier for storage, Pro license for compute acceleration.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -66,11 +66,11 @@
|
|
|
66
66
|
"LICENSE"
|
|
67
67
|
],
|
|
68
68
|
"peerDependencies": {
|
|
69
|
-
"@soulcraft/brainy": ">=7.
|
|
69
|
+
"@soulcraft/brainy": ">=7.29.0"
|
|
70
70
|
},
|
|
71
71
|
"devDependencies": {
|
|
72
72
|
"@napi-rs/cli": "^3.0.0",
|
|
73
|
-
"@soulcraft/brainy": "
|
|
73
|
+
"@soulcraft/brainy": "7.29.0",
|
|
74
74
|
"@types/node": "^22.0.0",
|
|
75
75
|
"tsx": "^4.21.0",
|
|
76
76
|
"typescript": "^5.9.3",
|