@soulcraft/cortex 2.7.2 → 2.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +0 -0
- package/dist/hnsw/AdaptiveDiskAnnModeSelector.d.ts +168 -0
- package/dist/hnsw/AdaptiveDiskAnnModeSelector.js +276 -0
- package/dist/hnsw/NativeDiskAnnWrapper.d.ts +284 -0
- package/dist/hnsw/NativeDiskAnnWrapper.js +738 -0
- package/dist/hnsw/NativeHNSWWrapper.js +34 -2
- package/dist/legacyLayoutGuard.d.ts +61 -0
- package/dist/legacyLayoutGuard.js +187 -0
- package/dist/resource/OsMemoryProbe.d.ts +175 -0
- package/dist/resource/OsMemoryProbe.js +206 -0
- package/dist/utils/nativeBinaryEntityIdMapper.d.ts +199 -0
- package/dist/utils/nativeBinaryEntityIdMapper.js +358 -0
- package/native/brainy-native.node +0 -0
- package/package.json +1 -1
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module utils/nativeBinaryEntityIdMapper
|
|
3
|
+
* @description TypeScript wrapper around cor's native binary
|
|
4
|
+
* `BinaryIdMapper`. Implements brainy's `EntityIdMapperProvider` so the
|
|
5
|
+
* mmap-backed billion-scale mapper is a drop-in for the existing
|
|
6
|
+
* JSON-persisted one.
|
|
7
|
+
*
|
|
8
|
+
* ## When this engages
|
|
9
|
+
*
|
|
10
|
+
* The cor plugin registers this wrapper as the `'entityIdMapper'`
|
|
11
|
+
* provider when the storage adapter exposes `getBinaryBlobPath()` (i.e.
|
|
12
|
+
* filesystem-backed storage with cor's 2.4.0 #2 mmap-vector layer).
|
|
13
|
+
* Cloud-storage adapters fall back to the JSON variant
|
|
14
|
+
* (`NativeEntityIdMapperWrapper`) since they have no local-path concept.
|
|
15
|
+
*
|
|
16
|
+
* ## UUID format conversion
|
|
17
|
+
*
|
|
18
|
+
* Brainy passes UUIDs as strings (typically the canonical 36-char
|
|
19
|
+
* `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`). The native side works in
|
|
20
|
+
* 16-byte Buffers. This wrapper converts at the boundary. Non-canonical
|
|
21
|
+
* UUID strings (any other 32-hex-digit form) are also accepted.
|
|
22
|
+
*
|
|
23
|
+
* ## Concurrency
|
|
24
|
+
*
|
|
25
|
+
* `getOrAssign` is atomic across concurrent callers for the same UUID
|
|
26
|
+
* (256 sharded per-UUID mutexes in the native layer). Lookups are
|
|
27
|
+
* lock-free. The wrapper holds no JS-side mutable state besides the
|
|
28
|
+
* native handle.
|
|
29
|
+
*
|
|
30
|
+
* ## IdSpace (Piece 10)
|
|
31
|
+
*
|
|
32
|
+
* The wrapper supports two entity-int wire widths:
|
|
33
|
+
*
|
|
34
|
+
* - `'u32'` (default): cortex 2.x compatible — JS `number` throughout,
|
|
35
|
+
* capped at 4.29 B entities. Persists in the legacy `int_to_uuid.bin`
|
|
36
|
+
* v1 header.
|
|
37
|
+
* - `'u64'`: opt-in via `idSpace: 'u64'`. The native layer's `number`
|
|
38
|
+
* surface throws in this mode, so the wrapper transparently routes
|
|
39
|
+
* the `EntityIdMapperProvider` methods through the BigInt napi
|
|
40
|
+
* siblings and converts BigInt → number at the boundary. Entity ints
|
|
41
|
+
* above `Number.MAX_SAFE_INTEGER` (2^53 - 1 = ~9 PB of entities)
|
|
42
|
+
* throw a clear `EntityIdSpaceExceeded` error so callers get a loud
|
|
43
|
+
* failure rather than a silent precision loss.
|
|
44
|
+
*
|
|
45
|
+
* The `getOrAssignBig` / `getIntBig` / `getUuidBig` / `sizeBig`
|
|
46
|
+
* sibling methods are available on both modes and always return
|
|
47
|
+
* `bigint` losslessly — use them in u64-aware code paths that need
|
|
48
|
+
* the full u64 range.
|
|
49
|
+
*/
|
|
50
|
+
import type { StorageAdapter } from '@soulcraft/brainy';
|
|
51
|
+
import type { EntityIdMapperProvider } from '../providerContracts.js';
|
|
52
|
+
export interface NativeBinaryEntityIdMapperOptions {
|
|
53
|
+
/** Storage adapter — required for binary blob path resolution. */
|
|
54
|
+
storage: StorageAdapter;
|
|
55
|
+
/**
|
|
56
|
+
* Override the relative path under storage for the uuid_to_int file.
|
|
57
|
+
* Default `_id_mapper/uuid_to_int.mkv`.
|
|
58
|
+
*/
|
|
59
|
+
uuidToIntKey?: string;
|
|
60
|
+
/**
|
|
61
|
+
* Override the relative path under storage for the int_to_uuid file.
|
|
62
|
+
* Default `_id_mapper/int_to_uuid.bin`.
|
|
63
|
+
*/
|
|
64
|
+
intToUuidKey?: string;
|
|
65
|
+
/** Sparse file size for int_to_uuid. Default 32 GB. */
|
|
66
|
+
intToUuidSize?: bigint;
|
|
67
|
+
/** Sparse file size for uuid_to_int. Default 32 GB. */
|
|
68
|
+
uuidToIntSize?: bigint;
|
|
69
|
+
/** Bucket capacity in the MmapKv. Default 16. */
|
|
70
|
+
bucketCapacity?: number;
|
|
71
|
+
/** Maximum extendible-hash directory depth. Default 28. */
|
|
72
|
+
maxGlobalDepth?: number;
|
|
73
|
+
/**
|
|
74
|
+
* Entity-int wire width. **`'u64'` is the default** for cor 3.0 —
|
|
75
|
+
* supports `u64::MAX - 1` entities with no measurable perf cost
|
|
76
|
+
* (audit: 4 extra bytes per int_to_uuid slot, no impact on the hot
|
|
77
|
+
* paths per `findPerfGates.test.ts`). `'u32'` is available for the
|
|
78
|
+
* brainy 7.x-compatible cap at 4.29 B entities but is no longer the
|
|
79
|
+
* default — cor 3.0's design floor of 1 B+ entities makes u32
|
|
80
|
+
* the wrong default for production.
|
|
81
|
+
*
|
|
82
|
+
* The `EntityIdMapperProvider` `number`-typed methods still work in
|
|
83
|
+
* U64 mode by routing through the BigInt napi siblings; entity ints
|
|
84
|
+
* above `Number.MAX_SAFE_INTEGER` (2^53 - 1) throw an explicit
|
|
85
|
+
* `EntityIdSpaceExceeded` error.
|
|
86
|
+
*
|
|
87
|
+
* Ignored on open when the underlying file's header disagrees: the
|
|
88
|
+
* on-disk format wins and any mismatch is surfaced as a hard error.
|
|
89
|
+
*/
|
|
90
|
+
idSpace?: 'u32' | 'u64';
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Thrown when a U64-mode mapper allocates or returns an entity int
|
|
94
|
+
* above `Number.MAX_SAFE_INTEGER` (2^53 - 1). At this point a JS
|
|
95
|
+
* `number` can no longer represent the value losslessly; callers must
|
|
96
|
+
* switch to the BigInt sibling methods (`getOrAssignBig`,
|
|
97
|
+
* `getIntBig`, `getUuidBig`).
|
|
98
|
+
*/
|
|
99
|
+
export declare class EntityIdSpaceExceeded extends Error {
|
|
100
|
+
/** The u64 entity int that exceeded the safe-integer ceiling. */
|
|
101
|
+
readonly value: bigint;
|
|
102
|
+
/** The method that was called (`'getOrAssign'`, `'getInt'`, etc.). */
|
|
103
|
+
readonly method: string;
|
|
104
|
+
constructor(method: string, value: bigint);
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Drop-in `EntityIdMapperProvider` backed by the native `BinaryIdMapper`.
|
|
108
|
+
*
|
|
109
|
+
* @example
|
|
110
|
+
* ```typescript
|
|
111
|
+
* const mapper = new NativeBinaryEntityIdMapperWrapper({ storage })
|
|
112
|
+
* await mapper.init()
|
|
113
|
+
* const intId = mapper.getOrAssign('12345678-1234-5678-1234-567812345678')
|
|
114
|
+
* const uuid = mapper.getUuid(intId)
|
|
115
|
+
* ```
|
|
116
|
+
*/
|
|
117
|
+
export declare class NativeBinaryEntityIdMapperWrapper implements EntityIdMapperProvider {
|
|
118
|
+
private storage;
|
|
119
|
+
private uuidToIntKey;
|
|
120
|
+
private intToUuidKey;
|
|
121
|
+
private intToUuidSize;
|
|
122
|
+
private uuidToIntSize;
|
|
123
|
+
private bucketCapacity;
|
|
124
|
+
private maxGlobalDepth;
|
|
125
|
+
private requestedIdSpace;
|
|
126
|
+
/**
|
|
127
|
+
* The actual IdSpace of the open mapper, sourced from the native
|
|
128
|
+
* binding's `idSpace()` reflection after `init()`. The on-disk
|
|
129
|
+
* header wins over `requestedIdSpace` (which may be ignored at
|
|
130
|
+
* `openExisting` time).
|
|
131
|
+
*/
|
|
132
|
+
private resolvedIdSpace;
|
|
133
|
+
private native;
|
|
134
|
+
private initialized;
|
|
135
|
+
constructor(options: NativeBinaryEntityIdMapperOptions);
|
|
136
|
+
init(): Promise<void>;
|
|
137
|
+
/**
|
|
138
|
+
* Report the mapper's actual IdSpace mode. Returns `'u32'` before
|
|
139
|
+
* `init()` (the default the wrapper assumes); after init, returns the
|
|
140
|
+
* mode reported by the native binding (which is authoritative).
|
|
141
|
+
*/
|
|
142
|
+
getIdSpace(): 'u32' | 'u64';
|
|
143
|
+
/**
|
|
144
|
+
* Allocate or retrieve the entity int for `uuid`. Returns a JS
|
|
145
|
+
* `number`. In U64 mode, routes through the BigInt sibling and
|
|
146
|
+
* throws {@link EntityIdSpaceExceeded} if the allocated int exceeds
|
|
147
|
+
* `Number.MAX_SAFE_INTEGER` — at that point the caller MUST switch
|
|
148
|
+
* to `getOrAssignBig` for the full u64 range.
|
|
149
|
+
*/
|
|
150
|
+
getOrAssign(uuid: string): number;
|
|
151
|
+
/**
|
|
152
|
+
* Look up the UUID for `intId`. Accepts a JS `number` — in U64 mode
|
|
153
|
+
* this is a lossy conversion above 2^53; use {@link getUuidBig} for
|
|
154
|
+
* the full u64 range.
|
|
155
|
+
*/
|
|
156
|
+
getUuid(intId: number): string | undefined;
|
|
157
|
+
/**
|
|
158
|
+
* Look up the entity int for `uuid`. Returns a JS `number`. In U64
|
|
159
|
+
* mode throws {@link EntityIdSpaceExceeded} if the int exceeds
|
|
160
|
+
* `Number.MAX_SAFE_INTEGER`.
|
|
161
|
+
*/
|
|
162
|
+
getInt(uuid: string): number | undefined;
|
|
163
|
+
remove(uuid: string): boolean;
|
|
164
|
+
flush(): Promise<void>;
|
|
165
|
+
clear(): Promise<void>;
|
|
166
|
+
/**
|
|
167
|
+
* Materialise every live int id into a JS `number[]`. **U32 mode
|
|
168
|
+
* only.** U64 mode throws — the native binding refuses to allocate
|
|
169
|
+
* a giant JS array at the scale a U64 brain implies. Iterate via
|
|
170
|
+
* the BigInt sibling iterator (TBD — a follow-up surfaces it on the
|
|
171
|
+
* wrapper) for U64 brains.
|
|
172
|
+
*/
|
|
173
|
+
getAllIntIds(): number[];
|
|
174
|
+
intsIterableToUuids(ints: Iterable<number>): string[];
|
|
175
|
+
get size(): number;
|
|
176
|
+
/**
|
|
177
|
+
* Allocate or retrieve the entity int for `uuid` as a `bigint`.
|
|
178
|
+
* Lossless across the full u64 range; safe to call in either mode.
|
|
179
|
+
*/
|
|
180
|
+
getOrAssignBig(uuid: string): bigint;
|
|
181
|
+
/** Look up the entity int for `uuid` as a `bigint`. */
|
|
182
|
+
getIntBig(uuid: string): bigint | undefined;
|
|
183
|
+
/** Look up the UUID for `int` (passed as a `bigint`). */
|
|
184
|
+
getUuidBig(int: bigint): string | undefined;
|
|
185
|
+
/** Live (non-tombstone) entry count as a `bigint`. */
|
|
186
|
+
sizeBig(): bigint;
|
|
187
|
+
/** Largest int ever assigned + 1, as a `bigint`. */
|
|
188
|
+
nextIntBig(): bigint;
|
|
189
|
+
/**
|
|
190
|
+
* Encode a UUID string into a 16-byte Buffer. Accepts canonical
|
|
191
|
+
* 36-char form (with hyphens) or any 32-hex-digit form. Throws on
|
|
192
|
+
* malformed input.
|
|
193
|
+
*/
|
|
194
|
+
private encode;
|
|
195
|
+
/** Decode a 16-byte Buffer back to canonical UUID string. */
|
|
196
|
+
private decode;
|
|
197
|
+
private ensure;
|
|
198
|
+
}
|
|
199
|
+
//# sourceMappingURL=nativeBinaryEntityIdMapper.d.ts.map
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module utils/nativeBinaryEntityIdMapper
|
|
3
|
+
* @description TypeScript wrapper around cor's native binary
|
|
4
|
+
* `BinaryIdMapper`. Implements brainy's `EntityIdMapperProvider` so the
|
|
5
|
+
* mmap-backed billion-scale mapper is a drop-in for the existing
|
|
6
|
+
* JSON-persisted one.
|
|
7
|
+
*
|
|
8
|
+
* ## When this engages
|
|
9
|
+
*
|
|
10
|
+
* The cor plugin registers this wrapper as the `'entityIdMapper'`
|
|
11
|
+
* provider when the storage adapter exposes `getBinaryBlobPath()` (i.e.
|
|
12
|
+
* filesystem-backed storage with cor's 2.4.0 #2 mmap-vector layer).
|
|
13
|
+
* Cloud-storage adapters fall back to the JSON variant
|
|
14
|
+
* (`NativeEntityIdMapperWrapper`) since they have no local-path concept.
|
|
15
|
+
*
|
|
16
|
+
* ## UUID format conversion
|
|
17
|
+
*
|
|
18
|
+
* Brainy passes UUIDs as strings (typically the canonical 36-char
|
|
19
|
+
* `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`). The native side works in
|
|
20
|
+
* 16-byte Buffers. This wrapper converts at the boundary. Non-canonical
|
|
21
|
+
* UUID strings (any other 32-hex-digit form) are also accepted.
|
|
22
|
+
*
|
|
23
|
+
* ## Concurrency
|
|
24
|
+
*
|
|
25
|
+
* `getOrAssign` is atomic across concurrent callers for the same UUID
|
|
26
|
+
* (256 sharded per-UUID mutexes in the native layer). Lookups are
|
|
27
|
+
* lock-free. The wrapper holds no JS-side mutable state besides the
|
|
28
|
+
* native handle.
|
|
29
|
+
*
|
|
30
|
+
* ## IdSpace (Piece 10)
|
|
31
|
+
*
|
|
32
|
+
* The wrapper supports two entity-int wire widths:
|
|
33
|
+
*
|
|
34
|
+
* - `'u32'` (default): cortex 2.x compatible — JS `number` throughout,
|
|
35
|
+
* capped at 4.29 B entities. Persists in the legacy `int_to_uuid.bin`
|
|
36
|
+
* v1 header.
|
|
37
|
+
* - `'u64'`: opt-in via `idSpace: 'u64'`. The native layer's `number`
|
|
38
|
+
* surface throws in this mode, so the wrapper transparently routes
|
|
39
|
+
* the `EntityIdMapperProvider` methods through the BigInt napi
|
|
40
|
+
* siblings and converts BigInt → number at the boundary. Entity ints
|
|
41
|
+
* above `Number.MAX_SAFE_INTEGER` (2^53 - 1 = ~9 PB of entities)
|
|
42
|
+
* throw a clear `EntityIdSpaceExceeded` error so callers get a loud
|
|
43
|
+
* failure rather than a silent precision loss.
|
|
44
|
+
*
|
|
45
|
+
* The `getOrAssignBig` / `getIntBig` / `getUuidBig` / `sizeBig`
|
|
46
|
+
* sibling methods are available on both modes and always return
|
|
47
|
+
* `bigint` losslessly — use them in u64-aware code paths that need
|
|
48
|
+
* the full u64 range.
|
|
49
|
+
*/
|
|
50
|
+
import { existsSync } from 'node:fs';
|
|
51
|
+
import { loadNativeModule } from '../native/index.js';
|
|
52
|
+
import { prodLog } from '@soulcraft/brainy/internals';
|
|
53
|
+
const UUID_BYTES = 16;
|
|
54
|
+
/**
|
|
55
|
+
* Thrown when a U64-mode mapper allocates or returns an entity int
|
|
56
|
+
* above `Number.MAX_SAFE_INTEGER` (2^53 - 1). At this point a JS
|
|
57
|
+
* `number` can no longer represent the value losslessly; callers must
|
|
58
|
+
* switch to the BigInt sibling methods (`getOrAssignBig`,
|
|
59
|
+
* `getIntBig`, `getUuidBig`).
|
|
60
|
+
*/
|
|
61
|
+
export class EntityIdSpaceExceeded extends Error {
|
|
62
|
+
/** The u64 entity int that exceeded the safe-integer ceiling. */
|
|
63
|
+
value;
|
|
64
|
+
/** The method that was called (`'getOrAssign'`, `'getInt'`, etc.). */
|
|
65
|
+
method;
|
|
66
|
+
constructor(method, value) {
|
|
67
|
+
super(`${method}: entity int ${value} exceeds Number.MAX_SAFE_INTEGER ` +
|
|
68
|
+
`(2^53 - 1). Switch to the BigInt sibling method (${method}Big) ` +
|
|
69
|
+
`for entity ints above 9.007 PB.`);
|
|
70
|
+
this.name = 'EntityIdSpaceExceeded';
|
|
71
|
+
this.method = method;
|
|
72
|
+
this.value = value;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
const MAX_SAFE_INTEGER_BIG = BigInt(Number.MAX_SAFE_INTEGER);
|
|
76
|
+
/**
|
|
77
|
+
* Convert a `bigint` entity int to a JS `number`. Throws
|
|
78
|
+
* {@link EntityIdSpaceExceeded} if the value exceeds the JS safe-integer
|
|
79
|
+
* range.
|
|
80
|
+
*/
|
|
81
|
+
function bigToSafeNumber(value, method) {
|
|
82
|
+
if (value > MAX_SAFE_INTEGER_BIG) {
|
|
83
|
+
throw new EntityIdSpaceExceeded(method, value);
|
|
84
|
+
}
|
|
85
|
+
return Number(value);
|
|
86
|
+
}
|
|
87
|
+
const DEFAULT_UUID_TO_INT_KEY = '_id_mapper/uuid_to_int.mkv';
|
|
88
|
+
const DEFAULT_INT_TO_UUID_KEY = '_id_mapper/int_to_uuid.bin';
|
|
89
|
+
/**
|
|
90
|
+
* Drop-in `EntityIdMapperProvider` backed by the native `BinaryIdMapper`.
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```typescript
|
|
94
|
+
* const mapper = new NativeBinaryEntityIdMapperWrapper({ storage })
|
|
95
|
+
* await mapper.init()
|
|
96
|
+
* const intId = mapper.getOrAssign('12345678-1234-5678-1234-567812345678')
|
|
97
|
+
* const uuid = mapper.getUuid(intId)
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
export class NativeBinaryEntityIdMapperWrapper {
|
|
101
|
+
storage;
|
|
102
|
+
uuidToIntKey;
|
|
103
|
+
intToUuidKey;
|
|
104
|
+
intToUuidSize;
|
|
105
|
+
uuidToIntSize;
|
|
106
|
+
bucketCapacity;
|
|
107
|
+
maxGlobalDepth;
|
|
108
|
+
requestedIdSpace;
|
|
109
|
+
/**
|
|
110
|
+
* The actual IdSpace of the open mapper, sourced from the native
|
|
111
|
+
* binding's `idSpace()` reflection after `init()`. The on-disk
|
|
112
|
+
* header wins over `requestedIdSpace` (which may be ignored at
|
|
113
|
+
* `openExisting` time).
|
|
114
|
+
*/
|
|
115
|
+
resolvedIdSpace = 'u32';
|
|
116
|
+
native = null;
|
|
117
|
+
initialized = false;
|
|
118
|
+
constructor(options) {
|
|
119
|
+
this.storage = options.storage;
|
|
120
|
+
this.uuidToIntKey = options.uuidToIntKey ?? DEFAULT_UUID_TO_INT_KEY;
|
|
121
|
+
this.intToUuidKey = options.intToUuidKey ?? DEFAULT_INT_TO_UUID_KEY;
|
|
122
|
+
this.intToUuidSize = options.intToUuidSize ?? BigInt(32) * BigInt(1024) ** BigInt(3);
|
|
123
|
+
this.uuidToIntSize = options.uuidToIntSize ?? BigInt(32) * BigInt(1024) ** BigInt(3);
|
|
124
|
+
this.bucketCapacity = options.bucketCapacity ?? 16;
|
|
125
|
+
this.maxGlobalDepth = options.maxGlobalDepth ?? 28;
|
|
126
|
+
this.requestedIdSpace = options.idSpace ?? 'u64';
|
|
127
|
+
}
|
|
128
|
+
async init() {
|
|
129
|
+
if (this.initialized)
|
|
130
|
+
return;
|
|
131
|
+
const storage = this.storage;
|
|
132
|
+
if (!storage.getBinaryBlobPath) {
|
|
133
|
+
throw new Error('NativeBinaryEntityIdMapperWrapper requires a storage adapter that ' +
|
|
134
|
+
'exposes getBinaryBlobPath() (filesystem-backed). For cloud adapters, ' +
|
|
135
|
+
'use NativeEntityIdMapperWrapper (JSON variant) instead.');
|
|
136
|
+
}
|
|
137
|
+
const uuidToIntPath = storage.getBinaryBlobPath(this.uuidToIntKey);
|
|
138
|
+
const intToUuidPath = storage.getBinaryBlobPath(this.intToUuidKey);
|
|
139
|
+
if (!uuidToIntPath || !intToUuidPath) {
|
|
140
|
+
throw new Error(`NativeBinaryEntityIdMapperWrapper: getBinaryBlobPath returned null for ` +
|
|
141
|
+
`${this.uuidToIntKey} or ${this.intToUuidKey}`);
|
|
142
|
+
}
|
|
143
|
+
const bindings = loadNativeModule();
|
|
144
|
+
const NativeBinaryIdMapper = bindings.NativeBinaryIdMapper;
|
|
145
|
+
if (!NativeBinaryIdMapper) {
|
|
146
|
+
throw new Error('NativeBinaryIdMapper binding missing from cor native module — ' +
|
|
147
|
+
'this build of cor is older than the BinaryIdMapper feature');
|
|
148
|
+
}
|
|
149
|
+
const config = {
|
|
150
|
+
uuidToIntPath,
|
|
151
|
+
intToUuidPath,
|
|
152
|
+
intToUuidSize: this.intToUuidSize,
|
|
153
|
+
uuidToIntSize: this.uuidToIntSize,
|
|
154
|
+
bucketCapacity: this.bucketCapacity,
|
|
155
|
+
maxGlobalDepth: this.maxGlobalDepth,
|
|
156
|
+
idSpace: this.requestedIdSpace,
|
|
157
|
+
};
|
|
158
|
+
// Explicitly distinguish "fresh install" from "existing files".
|
|
159
|
+
// Both files must exist together (paired write semantics) — a
|
|
160
|
+
// half-present state is corruption from a crash between file
|
|
161
|
+
// creations and is surfaced as an error rather than silently
|
|
162
|
+
// recreated.
|
|
163
|
+
const uuidFileExists = existsSync(uuidToIntPath);
|
|
164
|
+
const intFileExists = existsSync(intToUuidPath);
|
|
165
|
+
if (uuidFileExists && intFileExists) {
|
|
166
|
+
this.native = NativeBinaryIdMapper.openExisting(config);
|
|
167
|
+
}
|
|
168
|
+
else if (!uuidFileExists && !intFileExists) {
|
|
169
|
+
this.native = NativeBinaryIdMapper.create(config);
|
|
170
|
+
}
|
|
171
|
+
else {
|
|
172
|
+
throw new Error(`NativeBinaryEntityIdMapperWrapper: half-present file pair — ` +
|
|
173
|
+
`${this.uuidToIntKey} ${uuidFileExists ? 'exists' : 'missing'}, ` +
|
|
174
|
+
`${this.intToUuidKey} ${intFileExists ? 'exists' : 'missing'}. ` +
|
|
175
|
+
`Refusing to silently recreate; investigate manually.`);
|
|
176
|
+
}
|
|
177
|
+
// Reflect the on-disk IdSpace — authoritative over the requested
|
|
178
|
+
// value when openExisting opens a file with a different mode.
|
|
179
|
+
this.resolvedIdSpace = this.native.idSpace();
|
|
180
|
+
this.initialized = true;
|
|
181
|
+
if (prodLog?.debug) {
|
|
182
|
+
prodLog.debug(`[cor] BinaryIdMapper wired: paths=[${uuidToIntPath}, ${intToUuidPath}], idSpace=${this.resolvedIdSpace}`);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Report the mapper's actual IdSpace mode. Returns `'u32'` before
|
|
187
|
+
* `init()` (the default the wrapper assumes); after init, returns the
|
|
188
|
+
* mode reported by the native binding (which is authoritative).
|
|
189
|
+
*/
|
|
190
|
+
getIdSpace() {
|
|
191
|
+
return this.resolvedIdSpace;
|
|
192
|
+
}
|
|
193
|
+
// -- EntityIdMapperProvider surface (number-typed, brainy contract) --
|
|
194
|
+
/**
|
|
195
|
+
* Allocate or retrieve the entity int for `uuid`. Returns a JS
|
|
196
|
+
* `number`. In U64 mode, routes through the BigInt sibling and
|
|
197
|
+
* throws {@link EntityIdSpaceExceeded} if the allocated int exceeds
|
|
198
|
+
* `Number.MAX_SAFE_INTEGER` — at that point the caller MUST switch
|
|
199
|
+
* to `getOrAssignBig` for the full u64 range.
|
|
200
|
+
*/
|
|
201
|
+
getOrAssign(uuid) {
|
|
202
|
+
const native = this.ensure();
|
|
203
|
+
if (this.resolvedIdSpace === 'u64') {
|
|
204
|
+
return bigToSafeNumber(native.getOrAssignBig(this.encode(uuid)), 'getOrAssign');
|
|
205
|
+
}
|
|
206
|
+
return native.getOrAssign(this.encode(uuid));
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Look up the UUID for `intId`. Accepts a JS `number` — in U64 mode
|
|
210
|
+
* this is a lossy conversion above 2^53; use {@link getUuidBig} for
|
|
211
|
+
* the full u64 range.
|
|
212
|
+
*/
|
|
213
|
+
getUuid(intId) {
|
|
214
|
+
const native = this.ensure();
|
|
215
|
+
const buf = this.resolvedIdSpace === 'u64'
|
|
216
|
+
? native.getUuidBig(BigInt(intId))
|
|
217
|
+
: native.getUuid(intId);
|
|
218
|
+
if (!buf)
|
|
219
|
+
return undefined;
|
|
220
|
+
return this.decode(buf);
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Look up the entity int for `uuid`. Returns a JS `number`. In U64
|
|
224
|
+
* mode throws {@link EntityIdSpaceExceeded} if the int exceeds
|
|
225
|
+
* `Number.MAX_SAFE_INTEGER`.
|
|
226
|
+
*/
|
|
227
|
+
getInt(uuid) {
|
|
228
|
+
const native = this.ensure();
|
|
229
|
+
if (this.resolvedIdSpace === 'u64') {
|
|
230
|
+
const big = native.getIntBig(this.encode(uuid));
|
|
231
|
+
return big == null ? undefined : bigToSafeNumber(big, 'getInt');
|
|
232
|
+
}
|
|
233
|
+
const out = native.getInt(this.encode(uuid));
|
|
234
|
+
return out == null ? undefined : out;
|
|
235
|
+
}
|
|
236
|
+
remove(uuid) {
|
|
237
|
+
const native = this.ensure();
|
|
238
|
+
return native.remove(this.encode(uuid));
|
|
239
|
+
}
|
|
240
|
+
async flush() {
|
|
241
|
+
const native = this.ensure();
|
|
242
|
+
native.flush();
|
|
243
|
+
}
|
|
244
|
+
async clear() {
|
|
245
|
+
// Reset by recreating the files. Atomicity caveat: any concurrent
|
|
246
|
+
// reader holds a stale mmap. Brainy calls clear() during clear()
|
|
247
|
+
// operations that already block other access; this is fine.
|
|
248
|
+
this.initialized = false;
|
|
249
|
+
this.native = null;
|
|
250
|
+
await this.init();
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Materialise every live int id into a JS `number[]`. **U32 mode
|
|
254
|
+
* only.** U64 mode throws — the native binding refuses to allocate
|
|
255
|
+
* a giant JS array at the scale a U64 brain implies. Iterate via
|
|
256
|
+
* the BigInt sibling iterator (TBD — a follow-up surfaces it on the
|
|
257
|
+
* wrapper) for U64 brains.
|
|
258
|
+
*/
|
|
259
|
+
getAllIntIds() {
|
|
260
|
+
const native = this.ensure();
|
|
261
|
+
if (this.resolvedIdSpace === 'u64') {
|
|
262
|
+
throw new Error('getAllIntIds: not supported in U64 mode — the materialised ' +
|
|
263
|
+
'number[] would risk OOM at billion scale. Use the BigInt ' +
|
|
264
|
+
'streaming iterator on the underlying native binding instead.');
|
|
265
|
+
}
|
|
266
|
+
return native.getAllIntIds();
|
|
267
|
+
}
|
|
268
|
+
intsIterableToUuids(ints) {
|
|
269
|
+
const native = this.ensure();
|
|
270
|
+
const u64 = this.resolvedIdSpace === 'u64';
|
|
271
|
+
const out = [];
|
|
272
|
+
for (const i of ints) {
|
|
273
|
+
const buf = u64 ? native.getUuidBig(BigInt(i)) : native.getUuid(i);
|
|
274
|
+
if (buf)
|
|
275
|
+
out.push(this.decode(buf));
|
|
276
|
+
}
|
|
277
|
+
return out;
|
|
278
|
+
}
|
|
279
|
+
get size() {
|
|
280
|
+
if (!this.initialized || !this.native)
|
|
281
|
+
return 0;
|
|
282
|
+
if (this.resolvedIdSpace === 'u64') {
|
|
283
|
+
return bigToSafeNumber(this.native.sizeBig(), 'size');
|
|
284
|
+
}
|
|
285
|
+
return this.native.size();
|
|
286
|
+
}
|
|
287
|
+
// -- BigInt sibling surface (u64-safe, works in both modes) -------
|
|
288
|
+
/**
|
|
289
|
+
* Allocate or retrieve the entity int for `uuid` as a `bigint`.
|
|
290
|
+
* Lossless across the full u64 range; safe to call in either mode.
|
|
291
|
+
*/
|
|
292
|
+
getOrAssignBig(uuid) {
|
|
293
|
+
const native = this.ensure();
|
|
294
|
+
return native.getOrAssignBig(this.encode(uuid));
|
|
295
|
+
}
|
|
296
|
+
/** Look up the entity int for `uuid` as a `bigint`. */
|
|
297
|
+
getIntBig(uuid) {
|
|
298
|
+
const native = this.ensure();
|
|
299
|
+
const out = native.getIntBig(this.encode(uuid));
|
|
300
|
+
return out == null ? undefined : out;
|
|
301
|
+
}
|
|
302
|
+
/** Look up the UUID for `int` (passed as a `bigint`). */
|
|
303
|
+
getUuidBig(int) {
|
|
304
|
+
const native = this.ensure();
|
|
305
|
+
const buf = native.getUuidBig(int);
|
|
306
|
+
if (!buf)
|
|
307
|
+
return undefined;
|
|
308
|
+
return this.decode(buf);
|
|
309
|
+
}
|
|
310
|
+
/** Live (non-tombstone) entry count as a `bigint`. */
|
|
311
|
+
sizeBig() {
|
|
312
|
+
if (!this.initialized || !this.native)
|
|
313
|
+
return 0n;
|
|
314
|
+
return this.native.sizeBig();
|
|
315
|
+
}
|
|
316
|
+
/** Largest int ever assigned + 1, as a `bigint`. */
|
|
317
|
+
nextIntBig() {
|
|
318
|
+
return this.ensure().nextIntBig();
|
|
319
|
+
}
|
|
320
|
+
// ---------------------------------------------------------------
|
|
321
|
+
// UUID string ↔ Buffer conversion
|
|
322
|
+
// ---------------------------------------------------------------
|
|
323
|
+
/**
|
|
324
|
+
* Encode a UUID string into a 16-byte Buffer. Accepts canonical
|
|
325
|
+
* 36-char form (with hyphens) or any 32-hex-digit form. Throws on
|
|
326
|
+
* malformed input.
|
|
327
|
+
*/
|
|
328
|
+
encode(uuid) {
|
|
329
|
+
const hex = uuid.replace(/-/g, '').toLowerCase();
|
|
330
|
+
if (hex.length !== 32 || !/^[0-9a-f]{32}$/.test(hex)) {
|
|
331
|
+
throw new Error(`NativeBinaryEntityIdMapperWrapper: invalid UUID string "${uuid}"`);
|
|
332
|
+
}
|
|
333
|
+
return Buffer.from(hex, 'hex');
|
|
334
|
+
}
|
|
335
|
+
/** Decode a 16-byte Buffer back to canonical UUID string. */
|
|
336
|
+
decode(buf) {
|
|
337
|
+
if (buf.length !== UUID_BYTES) {
|
|
338
|
+
throw new Error(`NativeBinaryEntityIdMapperWrapper: native returned ${buf.length}-byte uuid (expected ${UUID_BYTES})`);
|
|
339
|
+
}
|
|
340
|
+
const hex = buf.toString('hex');
|
|
341
|
+
return (hex.slice(0, 8) +
|
|
342
|
+
'-' +
|
|
343
|
+
hex.slice(8, 12) +
|
|
344
|
+
'-' +
|
|
345
|
+
hex.slice(12, 16) +
|
|
346
|
+
'-' +
|
|
347
|
+
hex.slice(16, 20) +
|
|
348
|
+
'-' +
|
|
349
|
+
hex.slice(20, 32));
|
|
350
|
+
}
|
|
351
|
+
ensure() {
|
|
352
|
+
if (!this.initialized || !this.native) {
|
|
353
|
+
throw new Error('NativeBinaryEntityIdMapperWrapper: call init() before any operation');
|
|
354
|
+
}
|
|
355
|
+
return this.native;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
//# sourceMappingURL=nativeBinaryEntityIdMapper.js.map
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/cortex",
|
|
3
|
-
"version": "2.7.
|
|
3
|
+
"version": "2.7.4",
|
|
4
4
|
"description": "Native Rust acceleration for Brainy — SIMD distance, vector quantization, zero-copy mmap, native embeddings. Free tier for storage, Pro license for compute acceleration.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|