@soulcraft/cortex 2.7.2 → 2.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
File without changes
@@ -0,0 +1,168 @@
1
+ /**
2
+ * @module hnsw/AdaptiveDiskAnnModeSelector
3
+ * @description **Adaptive DiskANN** mode selector — Piece I of the
4
+ * cor 3.0 plan. Closes the loop on Track 2: the previous pieces
5
+ * built the *capability* (Piece G — Mode 1 no-PQ; Piece H — open-time
6
+ * madvise profile; Piece J — `/proc/meminfo` observation), and this
7
+ * module is the *decision* that wires those signals together into a
8
+ * single mode choice at build / open time.
9
+ *
10
+ * ## What this picks
11
+ *
12
+ * - **`"in-memory"`** — Mode 1. Build path: skip PQ entirely; vectors
13
+ * stay RAM-resident; the walk is exact (sub-ms latency). Open
14
+ * path: only valid when the on-disk header already records
15
+ * `pq_m=0` — otherwise a PQ-enabled file would force the
16
+ * `search_pq` path regardless of the open-time hint, so "in-memory"
17
+ * open mode on a PQ file would only waste RAM on vectors the walk
18
+ * doesn't read.
19
+ * - **`"hybrid"`** — Mode 2. PQ codes pinned in RAM via `MADV_WILLNEED`;
20
+ * vectors paged on demand for rerank. The default for medium-scale
21
+ * brains.
22
+ * - **`"on-disk"`** — Mode 3. PQ codes can page out under memory
23
+ * pressure. The choice for memory-constrained or multi-tenant
24
+ * deployments where many brainy+cor instances share a box.
25
+ *
26
+ * ## Algorithm
27
+ *
28
+ * Per-brain RAM budget = `0.5 × (availableBytes / max(1, activeBrains))`.
29
+ * The 0.5 headroom factor leaves the other half for OS slop, query
30
+ * intermediates, and the brain's other subsystems (metadata LSM,
31
+ * embeddings, etc.).
32
+ *
33
+ * 1. If the input names a Mode 1 file (`pqM === 0` from the header),
34
+ * return `"in-memory"` — no other open mode is valid for a no-PQ
35
+ * file.
36
+ * 2. At BUILD time (`pqM` undefined), try Mode 1 first: if the
37
+ * vectors + graph cost fits the RAM budget AND the build cost
38
+ * (`N × dim`) is below the dim-aware ceiling from
39
+ * {@link mode1BuildCostCeiling}, build no-PQ. Above the ceiling,
40
+ * Mode 1's `O(N · dim)` build becomes the limiting factor —
41
+ * Mode 2's PQ-coded distance compute is 5-10× faster at the same
42
+ * N because it replaces a `dim`-wide dot product with a
43
+ * `m=16`-wide table lookup. Falling through to PQ at large N is
44
+ * the right call even when vectors fit in RAM.
45
+ * 3. For PQ-enabled files (or build-time-too-big cases), compare the
46
+ * codes-section + codebook RAM cost against the budget:
47
+ * fits → `"hybrid"`; doesn't fit → `"on-disk"`.
48
+ * 4. If V8 heap pressure is `"critical"`, demote `"hybrid"` →
49
+ * `"on-disk"` — we don't want to pile JS-heap pressure on top of
50
+ * OS-RAM pressure when GC thrash will cost more than the SSD
51
+ * page-in.
52
+ *
53
+ * ## Decision granularity
54
+ *
55
+ * Brain-open-time decision; not re-evaluated mid-flight. Mode
56
+ * switches mid-flight would require a rebuild (Mode 1 → Mode 2 means
57
+ * training a PQ codebook; Mode 2 → Mode 1 means evicting codes +
58
+ * pinning vectors), so the selector picks once and the brain runs in
59
+ * the chosen profile until restart. A future piece may explore
60
+ * triggered rebuilds when sustained pressure crosses a threshold;
61
+ * for now the choice is sticky.
62
+ */
63
+ import { ResourceManager } from '../resource/ResourceManager.js';
64
+ /**
65
+ * The three operating modes Adaptive DiskANN selects between.
66
+ *
67
+ * The strings match the vocabulary the napi surface accepts on
68
+ * `DiskAnnConfig.mode` (Piece G) and `NativeDiskAnn.openExisting`
69
+ * (Piece H), so the selector's output flows through to the Rust
70
+ * layer without translation.
71
+ */
72
+ export type AdaptiveDiskAnnMode = 'in-memory' | 'hybrid' | 'on-disk';
73
+ /**
74
+ * Per-brain statistics the selector consumes. At OPEN time these
75
+ * come from the file header; at BUILD time the caller supplies the
76
+ * expected shape (omit `pqM` to signal "build-time decision").
77
+ */
78
+ export interface BrainStats {
79
+ /** Number of vectors. For a new build, the expected total. */
80
+ nodeCount: number;
81
+ /** Vector dimension. */
82
+ dim: number;
83
+ /** Vamana max degree (R). */
84
+ maxDegree: number;
85
+ /**
86
+ * PQ subspaces. `0` is the Mode 1 marker — file is already
87
+ * no-PQ; only `"in-memory"` is a valid open mode. Omit (or pass
88
+ * `undefined`) at build time so the selector can choose between
89
+ * Mode 1 (no PQ) and Modes 2/3 (PQ-enabled).
90
+ */
91
+ pqM?: number;
92
+ }
93
+ /**
94
+ * Resource situation at decision time. The
95
+ * {@link ResourceManager} is the canonical source; tests construct
96
+ * directly.
97
+ */
98
+ export interface SystemMemoryState {
99
+ /**
100
+ * Kernel-reported `MemAvailable` (Linux) or `os.freemem()`
101
+ * fallback. The selector's primary RAM signal.
102
+ */
103
+ availableBytes: number;
104
+ /**
105
+ * Number of brainy instances already tracked by the
106
+ * ResourceManager. Divides `availableBytes` into per-brain
107
+ * shares.
108
+ */
109
+ activeBrains: number;
110
+ /**
111
+ * V8 heap pressure classification (same three-level shape as the
112
+ * system-RSS pressure). `"critical"` demotes Hybrid → OnDisk.
113
+ * Optional — omit if the caller doesn't observe V8 heap.
114
+ */
115
+ v8Pressure?: 'normal' | 'elevated' | 'critical';
116
+ }
117
+ /**
118
+ * Reason codes for the chosen mode. Surface for production
119
+ * diagnostics — a single string is enough to reconstruct the
120
+ * selector's reasoning from a log line.
121
+ */
122
+ export type ModeSelectionReason = 'no-pq-file' | 'fits-in-memory' | 'mode1-build-too-large' | 'codes-pinned' | 'codes-paged' | 'v8-pressure-demote';
123
+ /**
124
+ * Selector output. The reasoning fields make the decision
125
+ * diagnosable: a production log line can capture `mode`, `reason`,
126
+ * and `perBrainAvailable` in ~50 bytes and the operator can
127
+ * reconstruct exactly why the brain landed in its current profile.
128
+ */
129
+ export interface ModeSelection {
130
+ mode: AdaptiveDiskAnnMode;
131
+ reason: ModeSelectionReason;
132
+ /** RAM cost the selector estimated for the chosen mode, in bytes. */
133
+ estimatedBytes: number;
134
+ /** Per-brain available memory used in the calculation. */
135
+ perBrainAvailable: number;
136
+ }
137
+ /**
138
+ * Returns the build-cost ceiling above which Mode 1 falls through to
139
+ * Mode 2 at this dim. See {@link MODE1_BUILD_COST_CEILING_SMALL_DIM}.
140
+ *
141
+ * Exposed for tests that need to assert the boundary calibration
142
+ * directly; not exported beyond the module.
143
+ */
144
+ export declare function mode1BuildCostCeiling(dim: number): number;
145
+ /**
146
+ * Pure selector. The wrapper code calls this with concrete inputs;
147
+ * tests construct {@link BrainStats} + {@link SystemMemoryState}
148
+ * directly to assert each branch.
149
+ */
150
+ export declare function pickMode(stats: BrainStats, sys: SystemMemoryState): ModeSelection;
151
+ /**
152
+ * Convenience wrapper that pulls inputs from a {@link ResourceManager}
153
+ * (default: the singleton) and returns the selector's choice.
154
+ * Production code in `NativeDiskAnnWrapper` calls this at build and
155
+ * open time; tests prefer {@link pickMode} directly so they can pin
156
+ * the system state without touching the singleton.
157
+ */
158
+ export declare function selectModeFromResourceManager(stats: BrainStats, rm?: ResourceManager): ModeSelection;
159
+ /**
160
+ * The mode `MappedIndex::open_with_mode(path, Auto)` would resolve to
161
+ * for a given header (Piece H semantics). Mirrored here so the
162
+ * wrapper can skip a reopen when the selector agrees with Auto —
163
+ * avoids opening the file twice on the cold path.
164
+ */
165
+ export declare function autoModeForHeader(header: {
166
+ pqM: number;
167
+ }): AdaptiveDiskAnnMode;
168
+ //# sourceMappingURL=AdaptiveDiskAnnModeSelector.d.ts.map
@@ -0,0 +1,276 @@
1
+ /**
2
+ * @module hnsw/AdaptiveDiskAnnModeSelector
3
+ * @description **Adaptive DiskANN** mode selector — Piece I of the
4
+ * cor 3.0 plan. Closes the loop on Track 2: the previous pieces
5
+ * built the *capability* (Piece G — Mode 1 no-PQ; Piece H — open-time
6
+ * madvise profile; Piece J — `/proc/meminfo` observation), and this
7
+ * module is the *decision* that wires those signals together into a
8
+ * single mode choice at build / open time.
9
+ *
10
+ * ## What this picks
11
+ *
12
+ * - **`"in-memory"`** — Mode 1. Build path: skip PQ entirely; vectors
13
+ * stay RAM-resident; the walk is exact (sub-ms latency). Open
14
+ * path: only valid when the on-disk header already records
15
+ * `pq_m=0` — otherwise a PQ-enabled file would force the
16
+ * `search_pq` path regardless of the open-time hint, so "in-memory"
17
+ * open mode on a PQ file would only waste RAM on vectors the walk
18
+ * doesn't read.
19
+ * - **`"hybrid"`** — Mode 2. PQ codes pinned in RAM via `MADV_WILLNEED`;
20
+ * vectors paged on demand for rerank. The default for medium-scale
21
+ * brains.
22
+ * - **`"on-disk"`** — Mode 3. PQ codes can page out under memory
23
+ * pressure. The choice for memory-constrained or multi-tenant
24
+ * deployments where many brainy+cor instances share a box.
25
+ *
26
+ * ## Algorithm
27
+ *
28
+ * Per-brain RAM budget = `0.5 × (availableBytes / max(1, activeBrains))`.
29
+ * The 0.5 headroom factor leaves the other half for OS slop, query
30
+ * intermediates, and the brain's other subsystems (metadata LSM,
31
+ * embeddings, etc.).
32
+ *
33
+ * 1. If the input names a Mode 1 file (`pqM === 0` from the header),
34
+ * return `"in-memory"` — no other open mode is valid for a no-PQ
35
+ * file.
36
+ * 2. At BUILD time (`pqM` undefined), try Mode 1 first: if the
37
+ * vectors + graph cost fits the RAM budget AND the build cost
38
+ * (`N × dim`) is below the dim-aware ceiling from
39
+ * {@link mode1BuildCostCeiling}, build no-PQ. Above the ceiling,
40
+ * Mode 1's `O(N · dim)` build becomes the limiting factor —
41
+ * Mode 2's PQ-coded distance compute is 5-10× faster at the same
42
+ * N because it replaces a `dim`-wide dot product with a
43
+ * `m=16`-wide table lookup. Falling through to PQ at large N is
44
+ * the right call even when vectors fit in RAM.
45
+ * 3. For PQ-enabled files (or build-time-too-big cases), compare the
46
+ * codes-section + codebook RAM cost against the budget:
47
+ * fits → `"hybrid"`; doesn't fit → `"on-disk"`.
48
+ * 4. If V8 heap pressure is `"critical"`, demote `"hybrid"` →
49
+ * `"on-disk"` — we don't want to pile JS-heap pressure on top of
50
+ * OS-RAM pressure when GC thrash will cost more than the SSD
51
+ * page-in.
52
+ *
53
+ * ## Decision granularity
54
+ *
55
+ * Brain-open-time decision; not re-evaluated mid-flight. Mode
56
+ * switches mid-flight would require a rebuild (Mode 1 → Mode 2 means
57
+ * training a PQ codebook; Mode 2 → Mode 1 means evicting codes +
58
+ * pinning vectors), so the selector picks once and the brain runs in
59
+ * the chosen profile until restart. A future piece may explore
60
+ * triggered rebuilds when sustained pressure crosses a threshold;
61
+ * for now the choice is sticky.
62
+ */
63
+ import { ResourceManager, } from '../resource/ResourceManager.js';
64
+ /**
65
+ * Fraction of per-brain available memory the chosen mode's working
66
+ * set must fit within. Half the available; the other half is for OS
67
+ * slop, query intermediates, and the brain's other subsystems
68
+ * (metadata LSM, embeddings, page-cache headroom).
69
+ */
70
+ const HEADROOM_FACTOR = 0.5;
71
+ /**
72
+ * Default PQ subspace count for BUILD-time cost estimation. Matches
73
+ * the {@link NativeDiskAnnWrapper} default. Used only when the
74
+ * caller hasn't pinned `pqM` themselves — an existing file's header
75
+ * always provides the real value.
76
+ */
77
+ const DEFAULT_PQ_M_FOR_ESTIMATE = 16;
78
+ /**
79
+ * Default codebook ksub (centroids per subspace) — 256 is the
80
+ * 8-bit-code published default the build path uses (Piece 13).
81
+ */
82
+ const DEFAULT_KSUB = 256;
83
+ /**
84
+ * Maximum `N × dim` above which Mode 1's `O(N · dim)` build cost
85
+ * dominates the selector decision. Even when the vectors + graph fit
86
+ * in RAM, the build time becomes the limiting factor at this scale —
87
+ * Mode 2's PQ-coded distance compute is 5-10× faster (16-byte code
88
+ * lookup vs `dim`-wide dot product) so a brain that builds in ~50
89
+ * minutes under Mode 1 builds in ~5-10 minutes under Mode 2 at the
90
+ * cost of ~3× query latency (sub-ms → ~3 ms p50).
91
+ *
92
+ * The ceiling is **dim-aware** because per-cost-unit wall-clock varies
93
+ * significantly with dim — SIMD width, cache footprint, and vectorised
94
+ * distance compute all favour smaller dims by roughly an order of
95
+ * magnitude per cost-unit. A single dim-blind ceiling miscalibrates
96
+ * one regime or the other.
97
+ *
98
+ * Anchor measurements (bxl9000, 32-core Zen 4, AVX-512 dispatched
99
+ * kernel from `e6d3756`):
100
+ *
101
+ * | Workload | N×dim | Build (Mode 1) | μs/cost-unit |
102
+ * |---|---:|---:|---:|
103
+ * | SIFT1M (dim=128) | 128M | 83 s | 0.65 |
104
+ * | SIFT10M (dim=128) | 1.28B | 20 min | 0.93 |
105
+ * | 1M × 384 | 384M | 17 min | 2.65 |
106
+ * | 1M × 384 (pre-SIMD) | 384M | 47 min | 7.33 |
107
+ *
108
+ * Result files: `docs/verification/result-sift1m-auto.json`,
109
+ * `result-sift10m-in-memory.json`, `result-1m-avx512.json`,
110
+ * `result-1m-auto.json`.
111
+ *
112
+ * Target wall-clock budget: ≤ 24 min (the "acceptable one-time build"
113
+ * boundary). At dim ≤ 256 the per-cost-unit time stays under ~1 μs,
114
+ * so 2 B cost-units ≈ 33 min worst case, safe with super-linear
115
+ * scaling headroom. At dim > 256 the per-cost-unit time climbs into
116
+ * the 2-7 μs range, capping the budget near 400 M cost-units.
117
+ *
118
+ * This is a single internal constant set, not a user-facing knob.
119
+ * Power users who need a specific mode override the entire selector
120
+ * via `DiskAnnConfig.mode = "in-memory" | "hybrid" | "on-disk"`, the
121
+ * existing escape hatch.
122
+ */
123
+ const MODE1_BUILD_COST_CEILING_SMALL_DIM = 2_000_000_000;
124
+ const MODE1_BUILD_COST_CEILING_LARGE_DIM = 400_000_000;
125
+ const MODE1_DIM_BOUNDARY = 256;
126
+ /**
127
+ * Returns the build-cost ceiling above which Mode 1 falls through to
128
+ * Mode 2 at this dim. See {@link MODE1_BUILD_COST_CEILING_SMALL_DIM}.
129
+ *
130
+ * Exposed for tests that need to assert the boundary calibration
131
+ * directly; not exported beyond the module.
132
+ */
133
+ export function mode1BuildCostCeiling(dim) {
134
+ return dim <= MODE1_DIM_BOUNDARY
135
+ ? MODE1_BUILD_COST_CEILING_SMALL_DIM
136
+ : MODE1_BUILD_COST_CEILING_LARGE_DIM;
137
+ }
138
+ /**
139
+ * Pure selector. The wrapper code calls this with concrete inputs;
140
+ * tests construct {@link BrainStats} + {@link SystemMemoryState}
141
+ * directly to assert each branch.
142
+ */
143
+ export function pickMode(stats, sys) {
144
+ const perBrainAvailable = sys.availableBytes / Math.max(1, sys.activeBrains);
145
+ const budget = HEADROOM_FACTOR * perBrainAvailable;
146
+ // Case A: file is already Mode 1 (pqM === 0 in header). Only
147
+ // "in-memory" is algorithmically valid — codes don't exist.
148
+ if (stats.pqM === 0) {
149
+ return {
150
+ mode: 'in-memory',
151
+ reason: 'no-pq-file',
152
+ estimatedBytes: estimateMode1Bytes(stats),
153
+ perBrainAvailable,
154
+ };
155
+ }
156
+ // Case B: BUILD-time decision (pqM undefined). Try Mode 1 first
157
+ // if BOTH the RAM cost fits the budget AND the build cost (N × dim)
158
+ // is below the dim-aware ceiling. The second guard catches the
159
+ // "vectors fit in RAM but the build would take 47 minutes" case —
160
+ // above the ceiling, Mode 2's PQ-coded build is ~5-10× faster at
161
+ // small recall cost, so the selector prefers it even though Mode 1
162
+ // would theoretically work.
163
+ if (stats.pqM === undefined) {
164
+ const mode1Cost = estimateMode1Bytes(stats);
165
+ const buildCost = stats.nodeCount * stats.dim;
166
+ const buildCostCeiling = mode1BuildCostCeiling(stats.dim);
167
+ if (mode1Cost <= budget && buildCost <= buildCostCeiling) {
168
+ return {
169
+ mode: 'in-memory',
170
+ reason: 'fits-in-memory',
171
+ estimatedBytes: mode1Cost,
172
+ perBrainAvailable,
173
+ };
174
+ }
175
+ // Mode 1 was RAM-feasible but build-cost-prohibitive: route to
176
+ // Mode 2 (hybrid) with the distinct `mode1-build-too-large`
177
+ // reason so production logs can tell this fall-through apart
178
+ // from the RAM-driven fall-through (Case C below). Codes are
179
+ // ~m bytes/node, vectors are ~dim×4 bytes/node, so codes are
180
+ // strictly smaller than vectors at any dim ≥ 4 — if mode1Cost
181
+ // fit the budget, codesCost will too. Guaranteed hybrid (no
182
+ // on-disk branch needed here).
183
+ if (mode1Cost <= budget && buildCost > buildCostCeiling) {
184
+ const codesCost = estimateCodesBytes(stats, DEFAULT_PQ_M_FOR_ESTIMATE);
185
+ // V8-critical demote applies here too — same defensive rule
186
+ // as Case C below.
187
+ if (sys.v8Pressure === 'critical') {
188
+ return {
189
+ mode: 'on-disk',
190
+ reason: 'v8-pressure-demote',
191
+ estimatedBytes: codesCost,
192
+ perBrainAvailable,
193
+ };
194
+ }
195
+ return {
196
+ mode: 'hybrid',
197
+ reason: 'mode1-build-too-large',
198
+ estimatedBytes: codesCost,
199
+ perBrainAvailable,
200
+ };
201
+ }
202
+ }
203
+ // Case C: PQ-enabled file (pqM > 0) OR build-time too big for
204
+ // Mode 1. Choose between Hybrid (codes pinned) and OnDisk (codes
205
+ // can page) based on whether the codes section fits the budget.
206
+ const m = stats.pqM ?? DEFAULT_PQ_M_FOR_ESTIMATE;
207
+ const codesCost = estimateCodesBytes(stats, m);
208
+ let mode;
209
+ let reason;
210
+ if (codesCost <= budget) {
211
+ mode = 'hybrid';
212
+ reason = 'codes-pinned';
213
+ }
214
+ else {
215
+ mode = 'on-disk';
216
+ reason = 'codes-paged';
217
+ }
218
+ // Defensive demote: if V8 heap is critical, don't pile JS-heap
219
+ // pressure on top of OS-RAM pressure. Drop Hybrid → OnDisk so the
220
+ // OS page cache stays in charge of codes residency.
221
+ if (mode === 'hybrid' && sys.v8Pressure === 'critical') {
222
+ mode = 'on-disk';
223
+ reason = 'v8-pressure-demote';
224
+ }
225
+ return {
226
+ mode,
227
+ reason,
228
+ estimatedBytes: codesCost,
229
+ perBrainAvailable,
230
+ };
231
+ }
232
+ /**
233
+ * Mode 1 RAM commitment. The on-disk file's codebook + codes
234
+ * sections are zero bytes in Mode 1; only the graph + vectors
235
+ * sections cost RAM if we pre-page them.
236
+ */
237
+ function estimateMode1Bytes(stats) {
238
+ return stats.nodeCount * (stats.dim + stats.maxDegree) * 4;
239
+ }
240
+ /**
241
+ * Codes-section + codebook RAM commitment for Modes 2/3. Codes
242
+ * dominate at scale (n × m bytes); codebook is constant per
243
+ * dimension (m × ksub × dsub × 4 = ksub × dim × 4 = ~1 MB at the
244
+ * default ksub=256).
245
+ */
246
+ function estimateCodesBytes(stats, m) {
247
+ const codesBytes = stats.nodeCount * m;
248
+ const codebookBytes = DEFAULT_KSUB * stats.dim * 4;
249
+ return codesBytes + codebookBytes;
250
+ }
251
+ /**
252
+ * Convenience wrapper that pulls inputs from a {@link ResourceManager}
253
+ * (default: the singleton) and returns the selector's choice.
254
+ * Production code in `NativeDiskAnnWrapper` calls this at build and
255
+ * open time; tests prefer {@link pickMode} directly so they can pin
256
+ * the system state without touching the singleton.
257
+ */
258
+ export function selectModeFromResourceManager(stats, rm = ResourceManager.getInstance()) {
259
+ const osMemory = rm.getOsMemorySnapshot();
260
+ const profile = rm.getResourceProfile();
261
+ return pickMode(stats, {
262
+ availableBytes: osMemory.availableBytes,
263
+ activeBrains: profile.activeInstances,
264
+ v8Pressure: profile.v8Heap.pressure,
265
+ });
266
+ }
267
+ /**
268
+ * The mode `MappedIndex::open_with_mode(path, Auto)` would resolve to
269
+ * for a given header (Piece H semantics). Mirrored here so the
270
+ * wrapper can skip a reopen when the selector agrees with Auto —
271
+ * avoids opening the file twice on the cold path.
272
+ */
273
+ export function autoModeForHeader(header) {
274
+ return header.pqM === 0 ? 'in-memory' : 'hybrid';
275
+ }
276
+ //# sourceMappingURL=AdaptiveDiskAnnModeSelector.js.map