@soulcraft/cortex 2.7.2 → 2.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +0 -0
- package/dist/hnsw/AdaptiveDiskAnnModeSelector.d.ts +168 -0
- package/dist/hnsw/AdaptiveDiskAnnModeSelector.js +276 -0
- package/dist/hnsw/NativeDiskAnnWrapper.d.ts +284 -0
- package/dist/hnsw/NativeDiskAnnWrapper.js +738 -0
- package/dist/hnsw/NativeHNSWWrapper.js +15 -2
- package/dist/legacyLayoutGuard.d.ts +61 -0
- package/dist/legacyLayoutGuard.js +187 -0
- package/dist/resource/OsMemoryProbe.d.ts +175 -0
- package/dist/resource/OsMemoryProbe.js +206 -0
- package/dist/utils/nativeBinaryEntityIdMapper.d.ts +199 -0
- package/dist/utils/nativeBinaryEntityIdMapper.js +358 -0
- package/native/brainy-native.node +0 -0
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
File without changes
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module hnsw/AdaptiveDiskAnnModeSelector
|
|
3
|
+
* @description **Adaptive DiskANN** mode selector — Piece I of the
|
|
4
|
+
* cor 3.0 plan. Closes the loop on Track 2: the previous pieces
|
|
5
|
+
* built the *capability* (Piece G — Mode 1 no-PQ; Piece H — open-time
|
|
6
|
+
* madvise profile; Piece J — `/proc/meminfo` observation), and this
|
|
7
|
+
* module is the *decision* that wires those signals together into a
|
|
8
|
+
* single mode choice at build / open time.
|
|
9
|
+
*
|
|
10
|
+
* ## What this picks
|
|
11
|
+
*
|
|
12
|
+
* - **`"in-memory"`** — Mode 1. Build path: skip PQ entirely; vectors
|
|
13
|
+
* stay RAM-resident; the walk is exact (sub-ms latency). Open
|
|
14
|
+
* path: only valid when the on-disk header already records
|
|
15
|
+
* `pq_m=0` — otherwise a PQ-enabled file would force the
|
|
16
|
+
* `search_pq` path regardless of the open-time hint, so "in-memory"
|
|
17
|
+
* open mode on a PQ file would only waste RAM on vectors the walk
|
|
18
|
+
* doesn't read.
|
|
19
|
+
* - **`"hybrid"`** — Mode 2. PQ codes pinned in RAM via `MADV_WILLNEED`;
|
|
20
|
+
* vectors paged on demand for rerank. The default for medium-scale
|
|
21
|
+
* brains.
|
|
22
|
+
* - **`"on-disk"`** — Mode 3. PQ codes can page out under memory
|
|
23
|
+
* pressure. The choice for memory-constrained or multi-tenant
|
|
24
|
+
* deployments where many brainy+cor instances share a box.
|
|
25
|
+
*
|
|
26
|
+
* ## Algorithm
|
|
27
|
+
*
|
|
28
|
+
* Per-brain RAM budget = `0.5 × (availableBytes / max(1, activeBrains))`.
|
|
29
|
+
* The 0.5 headroom factor leaves the other half for OS slop, query
|
|
30
|
+
* intermediates, and the brain's other subsystems (metadata LSM,
|
|
31
|
+
* embeddings, etc.).
|
|
32
|
+
*
|
|
33
|
+
* 1. If the input names a Mode 1 file (`pqM === 0` from the header),
|
|
34
|
+
* return `"in-memory"` — no other open mode is valid for a no-PQ
|
|
35
|
+
* file.
|
|
36
|
+
* 2. At BUILD time (`pqM` undefined), try Mode 1 first: if the
|
|
37
|
+
* vectors + graph cost fits the RAM budget AND the build cost
|
|
38
|
+
* (`N × dim`) is below the dim-aware ceiling from
|
|
39
|
+
* {@link mode1BuildCostCeiling}, build no-PQ. Above the ceiling,
|
|
40
|
+
* Mode 1's `O(N · dim)` build becomes the limiting factor —
|
|
41
|
+
* Mode 2's PQ-coded distance compute is 5-10× faster at the same
|
|
42
|
+
* N because it replaces a `dim`-wide dot product with a
|
|
43
|
+
* `m=16`-wide table lookup. Falling through to PQ at large N is
|
|
44
|
+
* the right call even when vectors fit in RAM.
|
|
45
|
+
* 3. For PQ-enabled files (or build-time-too-big cases), compare the
|
|
46
|
+
* codes-section + codebook RAM cost against the budget:
|
|
47
|
+
* fits → `"hybrid"`; doesn't fit → `"on-disk"`.
|
|
48
|
+
* 4. If V8 heap pressure is `"critical"`, demote `"hybrid"` →
|
|
49
|
+
* `"on-disk"` — we don't want to pile JS-heap pressure on top of
|
|
50
|
+
* OS-RAM pressure when GC thrash will cost more than the SSD
|
|
51
|
+
* page-in.
|
|
52
|
+
*
|
|
53
|
+
* ## Decision granularity
|
|
54
|
+
*
|
|
55
|
+
* Brain-open-time decision; not re-evaluated mid-flight. Mode
|
|
56
|
+
* switches mid-flight would require a rebuild (Mode 1 → Mode 2 means
|
|
57
|
+
* training a PQ codebook; Mode 2 → Mode 1 means evicting codes +
|
|
58
|
+
* pinning vectors), so the selector picks once and the brain runs in
|
|
59
|
+
* the chosen profile until restart. A future piece may explore
|
|
60
|
+
* triggered rebuilds when sustained pressure crosses a threshold;
|
|
61
|
+
* for now the choice is sticky.
|
|
62
|
+
*/
|
|
63
|
+
import { ResourceManager } from '../resource/ResourceManager.js';
|
|
64
|
+
/**
|
|
65
|
+
* The three operating modes Adaptive DiskANN selects between.
|
|
66
|
+
*
|
|
67
|
+
* The strings match the vocabulary the napi surface accepts on
|
|
68
|
+
* `DiskAnnConfig.mode` (Piece G) and `NativeDiskAnn.openExisting`
|
|
69
|
+
* (Piece H), so the selector's output flows through to the Rust
|
|
70
|
+
* layer without translation.
|
|
71
|
+
*/
|
|
72
|
+
export type AdaptiveDiskAnnMode = 'in-memory' | 'hybrid' | 'on-disk';
|
|
73
|
+
/**
|
|
74
|
+
* Per-brain statistics the selector consumes. At OPEN time these
|
|
75
|
+
* come from the file header; at BUILD time the caller supplies the
|
|
76
|
+
* expected shape (omit `pqM` to signal "build-time decision").
|
|
77
|
+
*/
|
|
78
|
+
export interface BrainStats {
|
|
79
|
+
/** Number of vectors. For a new build, the expected total. */
|
|
80
|
+
nodeCount: number;
|
|
81
|
+
/** Vector dimension. */
|
|
82
|
+
dim: number;
|
|
83
|
+
/** Vamana max degree (R). */
|
|
84
|
+
maxDegree: number;
|
|
85
|
+
/**
|
|
86
|
+
* PQ subspaces. `0` is the Mode 1 marker — file is already
|
|
87
|
+
* no-PQ; only `"in-memory"` is a valid open mode. Omit (or pass
|
|
88
|
+
* `undefined`) at build time so the selector can choose between
|
|
89
|
+
* Mode 1 (no PQ) and Modes 2/3 (PQ-enabled).
|
|
90
|
+
*/
|
|
91
|
+
pqM?: number;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Resource situation at decision time. The
|
|
95
|
+
* {@link ResourceManager} is the canonical source; tests construct
|
|
96
|
+
* directly.
|
|
97
|
+
*/
|
|
98
|
+
export interface SystemMemoryState {
|
|
99
|
+
/**
|
|
100
|
+
* Kernel-reported `MemAvailable` (Linux) or `os.freemem()`
|
|
101
|
+
* fallback. The selector's primary RAM signal.
|
|
102
|
+
*/
|
|
103
|
+
availableBytes: number;
|
|
104
|
+
/**
|
|
105
|
+
* Number of brainy instances already tracked by the
|
|
106
|
+
* ResourceManager. Divides `availableBytes` into per-brain
|
|
107
|
+
* shares.
|
|
108
|
+
*/
|
|
109
|
+
activeBrains: number;
|
|
110
|
+
/**
|
|
111
|
+
* V8 heap pressure classification (same three-level shape as the
|
|
112
|
+
* system-RSS pressure). `"critical"` demotes Hybrid → OnDisk.
|
|
113
|
+
* Optional — omit if the caller doesn't observe V8 heap.
|
|
114
|
+
*/
|
|
115
|
+
v8Pressure?: 'normal' | 'elevated' | 'critical';
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Reason codes for the chosen mode. Surface for production
|
|
119
|
+
* diagnostics — a single string is enough to reconstruct the
|
|
120
|
+
* selector's reasoning from a log line.
|
|
121
|
+
*/
|
|
122
|
+
export type ModeSelectionReason = 'no-pq-file' | 'fits-in-memory' | 'mode1-build-too-large' | 'codes-pinned' | 'codes-paged' | 'v8-pressure-demote';
|
|
123
|
+
/**
|
|
124
|
+
* Selector output. The reasoning fields make the decision
|
|
125
|
+
* diagnosable: a production log line can capture `mode`, `reason`,
|
|
126
|
+
* and `perBrainAvailable` in ~50 bytes and the operator can
|
|
127
|
+
* reconstruct exactly why the brain landed in its current profile.
|
|
128
|
+
*/
|
|
129
|
+
export interface ModeSelection {
|
|
130
|
+
mode: AdaptiveDiskAnnMode;
|
|
131
|
+
reason: ModeSelectionReason;
|
|
132
|
+
/** RAM cost the selector estimated for the chosen mode, in bytes. */
|
|
133
|
+
estimatedBytes: number;
|
|
134
|
+
/** Per-brain available memory used in the calculation. */
|
|
135
|
+
perBrainAvailable: number;
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Returns the build-cost ceiling above which Mode 1 falls through to
|
|
139
|
+
* Mode 2 at this dim. See {@link MODE1_BUILD_COST_CEILING_SMALL_DIM}.
|
|
140
|
+
*
|
|
141
|
+
* Exposed for tests that need to assert the boundary calibration
|
|
142
|
+
* directly; not exported beyond the module.
|
|
143
|
+
*/
|
|
144
|
+
export declare function mode1BuildCostCeiling(dim: number): number;
|
|
145
|
+
/**
|
|
146
|
+
* Pure selector. The wrapper code calls this with concrete inputs;
|
|
147
|
+
* tests construct {@link BrainStats} + {@link SystemMemoryState}
|
|
148
|
+
* directly to assert each branch.
|
|
149
|
+
*/
|
|
150
|
+
export declare function pickMode(stats: BrainStats, sys: SystemMemoryState): ModeSelection;
|
|
151
|
+
/**
|
|
152
|
+
* Convenience wrapper that pulls inputs from a {@link ResourceManager}
|
|
153
|
+
* (default: the singleton) and returns the selector's choice.
|
|
154
|
+
* Production code in `NativeDiskAnnWrapper` calls this at build and
|
|
155
|
+
* open time; tests prefer {@link pickMode} directly so they can pin
|
|
156
|
+
* the system state without touching the singleton.
|
|
157
|
+
*/
|
|
158
|
+
export declare function selectModeFromResourceManager(stats: BrainStats, rm?: ResourceManager): ModeSelection;
|
|
159
|
+
/**
|
|
160
|
+
* The mode `MappedIndex::open_with_mode(path, Auto)` would resolve to
|
|
161
|
+
* for a given header (Piece H semantics). Mirrored here so the
|
|
162
|
+
* wrapper can skip a reopen when the selector agrees with Auto —
|
|
163
|
+
* avoids opening the file twice on the cold path.
|
|
164
|
+
*/
|
|
165
|
+
export declare function autoModeForHeader(header: {
|
|
166
|
+
pqM: number;
|
|
167
|
+
}): AdaptiveDiskAnnMode;
|
|
168
|
+
//# sourceMappingURL=AdaptiveDiskAnnModeSelector.d.ts.map
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module hnsw/AdaptiveDiskAnnModeSelector
|
|
3
|
+
* @description **Adaptive DiskANN** mode selector — Piece I of the
|
|
4
|
+
* cor 3.0 plan. Closes the loop on Track 2: the previous pieces
|
|
5
|
+
* built the *capability* (Piece G — Mode 1 no-PQ; Piece H — open-time
|
|
6
|
+
* madvise profile; Piece J — `/proc/meminfo` observation), and this
|
|
7
|
+
* module is the *decision* that wires those signals together into a
|
|
8
|
+
* single mode choice at build / open time.
|
|
9
|
+
*
|
|
10
|
+
* ## What this picks
|
|
11
|
+
*
|
|
12
|
+
* - **`"in-memory"`** — Mode 1. Build path: skip PQ entirely; vectors
|
|
13
|
+
* stay RAM-resident; the walk is exact (sub-ms latency). Open
|
|
14
|
+
* path: only valid when the on-disk header already records
|
|
15
|
+
* `pq_m=0` — otherwise a PQ-enabled file would force the
|
|
16
|
+
* `search_pq` path regardless of the open-time hint, so "in-memory"
|
|
17
|
+
* open mode on a PQ file would only waste RAM on vectors the walk
|
|
18
|
+
* doesn't read.
|
|
19
|
+
* - **`"hybrid"`** — Mode 2. PQ codes pinned in RAM via `MADV_WILLNEED`;
|
|
20
|
+
* vectors paged on demand for rerank. The default for medium-scale
|
|
21
|
+
* brains.
|
|
22
|
+
* - **`"on-disk"`** — Mode 3. PQ codes can page out under memory
|
|
23
|
+
* pressure. The choice for memory-constrained or multi-tenant
|
|
24
|
+
* deployments where many brainy+cor instances share a box.
|
|
25
|
+
*
|
|
26
|
+
* ## Algorithm
|
|
27
|
+
*
|
|
28
|
+
* Per-brain RAM budget = `0.5 × (availableBytes / max(1, activeBrains))`.
|
|
29
|
+
* The 0.5 headroom factor leaves the other half for OS slop, query
|
|
30
|
+
* intermediates, and the brain's other subsystems (metadata LSM,
|
|
31
|
+
* embeddings, etc.).
|
|
32
|
+
*
|
|
33
|
+
* 1. If the input names a Mode 1 file (`pqM === 0` from the header),
|
|
34
|
+
* return `"in-memory"` — no other open mode is valid for a no-PQ
|
|
35
|
+
* file.
|
|
36
|
+
* 2. At BUILD time (`pqM` undefined), try Mode 1 first: if the
|
|
37
|
+
* vectors + graph cost fits the RAM budget AND the build cost
|
|
38
|
+
* (`N × dim`) is below the dim-aware ceiling from
|
|
39
|
+
* {@link mode1BuildCostCeiling}, build no-PQ. Above the ceiling,
|
|
40
|
+
* Mode 1's `O(N · dim)` build becomes the limiting factor —
|
|
41
|
+
* Mode 2's PQ-coded distance compute is 5-10× faster at the same
|
|
42
|
+
* N because it replaces a `dim`-wide dot product with a
|
|
43
|
+
* `m=16`-wide table lookup. Falling through to PQ at large N is
|
|
44
|
+
* the right call even when vectors fit in RAM.
|
|
45
|
+
* 3. For PQ-enabled files (or build-time-too-big cases), compare the
|
|
46
|
+
* codes-section + codebook RAM cost against the budget:
|
|
47
|
+
* fits → `"hybrid"`; doesn't fit → `"on-disk"`.
|
|
48
|
+
* 4. If V8 heap pressure is `"critical"`, demote `"hybrid"` →
|
|
49
|
+
* `"on-disk"` — we don't want to pile JS-heap pressure on top of
|
|
50
|
+
* OS-RAM pressure when GC thrash will cost more than the SSD
|
|
51
|
+
* page-in.
|
|
52
|
+
*
|
|
53
|
+
* ## Decision granularity
|
|
54
|
+
*
|
|
55
|
+
* Brain-open-time decision; not re-evaluated mid-flight. Mode
|
|
56
|
+
* switches mid-flight would require a rebuild (Mode 1 → Mode 2 means
|
|
57
|
+
* training a PQ codebook; Mode 2 → Mode 1 means evicting codes +
|
|
58
|
+
* pinning vectors), so the selector picks once and the brain runs in
|
|
59
|
+
* the chosen profile until restart. A future piece may explore
|
|
60
|
+
* triggered rebuilds when sustained pressure crosses a threshold;
|
|
61
|
+
* for now the choice is sticky.
|
|
62
|
+
*/
|
|
63
|
+
import { ResourceManager, } from '../resource/ResourceManager.js';
|
|
64
|
+
/**
|
|
65
|
+
* Fraction of per-brain available memory the chosen mode's working
|
|
66
|
+
* set must fit within. Half the available; the other half is for OS
|
|
67
|
+
* slop, query intermediates, and the brain's other subsystems
|
|
68
|
+
* (metadata LSM, embeddings, page-cache headroom).
|
|
69
|
+
*/
|
|
70
|
+
const HEADROOM_FACTOR = 0.5;
|
|
71
|
+
/**
|
|
72
|
+
* Default PQ subspace count for BUILD-time cost estimation. Matches
|
|
73
|
+
* the {@link NativeDiskAnnWrapper} default. Used only when the
|
|
74
|
+
* caller hasn't pinned `pqM` themselves — an existing file's header
|
|
75
|
+
* always provides the real value.
|
|
76
|
+
*/
|
|
77
|
+
const DEFAULT_PQ_M_FOR_ESTIMATE = 16;
|
|
78
|
+
/**
|
|
79
|
+
* Default codebook ksub (centroids per subspace) — 256 is the
|
|
80
|
+
* 8-bit-code published default the build path uses (Piece 13).
|
|
81
|
+
*/
|
|
82
|
+
const DEFAULT_KSUB = 256;
|
|
83
|
+
/**
|
|
84
|
+
* Maximum `N × dim` above which Mode 1's `O(N · dim)` build cost
|
|
85
|
+
* dominates the selector decision. Even when the vectors + graph fit
|
|
86
|
+
* in RAM, the build time becomes the limiting factor at this scale —
|
|
87
|
+
* Mode 2's PQ-coded distance compute is 5-10× faster (16-byte code
|
|
88
|
+
* lookup vs `dim`-wide dot product) so a brain that builds in ~50
|
|
89
|
+
* minutes under Mode 1 builds in ~5-10 minutes under Mode 2 at the
|
|
90
|
+
* cost of ~3× query latency (sub-ms → ~3 ms p50).
|
|
91
|
+
*
|
|
92
|
+
* The ceiling is **dim-aware** because per-cost-unit wall-clock varies
|
|
93
|
+
* significantly with dim — SIMD width, cache footprint, and vectorised
|
|
94
|
+
* distance compute all favour smaller dims by roughly an order of
|
|
95
|
+
* magnitude per cost-unit. A single dim-blind ceiling miscalibrates
|
|
96
|
+
* one regime or the other.
|
|
97
|
+
*
|
|
98
|
+
* Anchor measurements (bxl9000, 32-core Zen 4, AVX-512 dispatched
|
|
99
|
+
* kernel from `e6d3756`):
|
|
100
|
+
*
|
|
101
|
+
* | Workload | N×dim | Build (Mode 1) | μs/cost-unit |
|
|
102
|
+
* |---|---:|---:|---:|
|
|
103
|
+
* | SIFT1M (dim=128) | 128M | 83 s | 0.65 |
|
|
104
|
+
* | SIFT10M (dim=128) | 1.28B | 20 min | 0.93 |
|
|
105
|
+
* | 1M × 384 | 384M | 17 min | 2.65 |
|
|
106
|
+
* | 1M × 384 (pre-SIMD) | 384M | 47 min | 7.33 |
|
|
107
|
+
*
|
|
108
|
+
* Result files: `docs/verification/result-sift1m-auto.json`,
|
|
109
|
+
* `result-sift10m-in-memory.json`, `result-1m-avx512.json`,
|
|
110
|
+
* `result-1m-auto.json`.
|
|
111
|
+
*
|
|
112
|
+
* Target wall-clock budget: ≤ 24 min (the "acceptable one-time build"
|
|
113
|
+
* boundary). At dim ≤ 256 the per-cost-unit time stays under ~1 μs,
|
|
114
|
+
* so 2 B cost-units ≈ 33 min worst case, safe with super-linear
|
|
115
|
+
* scaling headroom. At dim > 256 the per-cost-unit time climbs into
|
|
116
|
+
* the 2-7 μs range, capping the budget near 400 M cost-units.
|
|
117
|
+
*
|
|
118
|
+
* This is a single internal constant set, not a user-facing knob.
|
|
119
|
+
* Power users who need a specific mode override the entire selector
|
|
120
|
+
* via `DiskAnnConfig.mode = "in-memory" | "hybrid" | "on-disk"`, the
|
|
121
|
+
* existing escape hatch.
|
|
122
|
+
*/
|
|
123
|
+
const MODE1_BUILD_COST_CEILING_SMALL_DIM = 2_000_000_000;
|
|
124
|
+
const MODE1_BUILD_COST_CEILING_LARGE_DIM = 400_000_000;
|
|
125
|
+
const MODE1_DIM_BOUNDARY = 256;
|
|
126
|
+
/**
|
|
127
|
+
* Returns the build-cost ceiling above which Mode 1 falls through to
|
|
128
|
+
* Mode 2 at this dim. See {@link MODE1_BUILD_COST_CEILING_SMALL_DIM}.
|
|
129
|
+
*
|
|
130
|
+
* Exposed for tests that need to assert the boundary calibration
|
|
131
|
+
* directly; not exported beyond the module.
|
|
132
|
+
*/
|
|
133
|
+
export function mode1BuildCostCeiling(dim) {
|
|
134
|
+
return dim <= MODE1_DIM_BOUNDARY
|
|
135
|
+
? MODE1_BUILD_COST_CEILING_SMALL_DIM
|
|
136
|
+
: MODE1_BUILD_COST_CEILING_LARGE_DIM;
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Pure selector. The wrapper code calls this with concrete inputs;
|
|
140
|
+
* tests construct {@link BrainStats} + {@link SystemMemoryState}
|
|
141
|
+
* directly to assert each branch.
|
|
142
|
+
*/
|
|
143
|
+
export function pickMode(stats, sys) {
|
|
144
|
+
const perBrainAvailable = sys.availableBytes / Math.max(1, sys.activeBrains);
|
|
145
|
+
const budget = HEADROOM_FACTOR * perBrainAvailable;
|
|
146
|
+
// Case A: file is already Mode 1 (pqM === 0 in header). Only
|
|
147
|
+
// "in-memory" is algorithmically valid — codes don't exist.
|
|
148
|
+
if (stats.pqM === 0) {
|
|
149
|
+
return {
|
|
150
|
+
mode: 'in-memory',
|
|
151
|
+
reason: 'no-pq-file',
|
|
152
|
+
estimatedBytes: estimateMode1Bytes(stats),
|
|
153
|
+
perBrainAvailable,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
// Case B: BUILD-time decision (pqM undefined). Try Mode 1 first
|
|
157
|
+
// if BOTH the RAM cost fits the budget AND the build cost (N × dim)
|
|
158
|
+
// is below the dim-aware ceiling. The second guard catches the
|
|
159
|
+
// "vectors fit in RAM but the build would take 47 minutes" case —
|
|
160
|
+
// above the ceiling, Mode 2's PQ-coded build is ~5-10× faster at
|
|
161
|
+
// small recall cost, so the selector prefers it even though Mode 1
|
|
162
|
+
// would theoretically work.
|
|
163
|
+
if (stats.pqM === undefined) {
|
|
164
|
+
const mode1Cost = estimateMode1Bytes(stats);
|
|
165
|
+
const buildCost = stats.nodeCount * stats.dim;
|
|
166
|
+
const buildCostCeiling = mode1BuildCostCeiling(stats.dim);
|
|
167
|
+
if (mode1Cost <= budget && buildCost <= buildCostCeiling) {
|
|
168
|
+
return {
|
|
169
|
+
mode: 'in-memory',
|
|
170
|
+
reason: 'fits-in-memory',
|
|
171
|
+
estimatedBytes: mode1Cost,
|
|
172
|
+
perBrainAvailable,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
// Mode 1 was RAM-feasible but build-cost-prohibitive: route to
|
|
176
|
+
// Mode 2 (hybrid) with the distinct `mode1-build-too-large`
|
|
177
|
+
// reason so production logs can tell this fall-through apart
|
|
178
|
+
// from the RAM-driven fall-through (Case C below). Codes are
|
|
179
|
+
// ~m bytes/node, vectors are ~dim×4 bytes/node, so codes are
|
|
180
|
+
// strictly smaller than vectors at any dim ≥ 4 — if mode1Cost
|
|
181
|
+
// fit the budget, codesCost will too. Guaranteed hybrid (no
|
|
182
|
+
// on-disk branch needed here).
|
|
183
|
+
if (mode1Cost <= budget && buildCost > buildCostCeiling) {
|
|
184
|
+
const codesCost = estimateCodesBytes(stats, DEFAULT_PQ_M_FOR_ESTIMATE);
|
|
185
|
+
// V8-critical demote applies here too — same defensive rule
|
|
186
|
+
// as Case C below.
|
|
187
|
+
if (sys.v8Pressure === 'critical') {
|
|
188
|
+
return {
|
|
189
|
+
mode: 'on-disk',
|
|
190
|
+
reason: 'v8-pressure-demote',
|
|
191
|
+
estimatedBytes: codesCost,
|
|
192
|
+
perBrainAvailable,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
return {
|
|
196
|
+
mode: 'hybrid',
|
|
197
|
+
reason: 'mode1-build-too-large',
|
|
198
|
+
estimatedBytes: codesCost,
|
|
199
|
+
perBrainAvailable,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
// Case C: PQ-enabled file (pqM > 0) OR build-time too big for
|
|
204
|
+
// Mode 1. Choose between Hybrid (codes pinned) and OnDisk (codes
|
|
205
|
+
// can page) based on whether the codes section fits the budget.
|
|
206
|
+
const m = stats.pqM ?? DEFAULT_PQ_M_FOR_ESTIMATE;
|
|
207
|
+
const codesCost = estimateCodesBytes(stats, m);
|
|
208
|
+
let mode;
|
|
209
|
+
let reason;
|
|
210
|
+
if (codesCost <= budget) {
|
|
211
|
+
mode = 'hybrid';
|
|
212
|
+
reason = 'codes-pinned';
|
|
213
|
+
}
|
|
214
|
+
else {
|
|
215
|
+
mode = 'on-disk';
|
|
216
|
+
reason = 'codes-paged';
|
|
217
|
+
}
|
|
218
|
+
// Defensive demote: if V8 heap is critical, don't pile JS-heap
|
|
219
|
+
// pressure on top of OS-RAM pressure. Drop Hybrid → OnDisk so the
|
|
220
|
+
// OS page cache stays in charge of codes residency.
|
|
221
|
+
if (mode === 'hybrid' && sys.v8Pressure === 'critical') {
|
|
222
|
+
mode = 'on-disk';
|
|
223
|
+
reason = 'v8-pressure-demote';
|
|
224
|
+
}
|
|
225
|
+
return {
|
|
226
|
+
mode,
|
|
227
|
+
reason,
|
|
228
|
+
estimatedBytes: codesCost,
|
|
229
|
+
perBrainAvailable,
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Mode 1 RAM commitment. The on-disk file's codebook + codes
|
|
234
|
+
* sections are zero bytes in Mode 1; only the graph + vectors
|
|
235
|
+
* sections cost RAM if we pre-page them.
|
|
236
|
+
*/
|
|
237
|
+
function estimateMode1Bytes(stats) {
|
|
238
|
+
return stats.nodeCount * (stats.dim + stats.maxDegree) * 4;
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Codes-section + codebook RAM commitment for Modes 2/3. Codes
|
|
242
|
+
* dominate at scale (n × m bytes); codebook is constant per
|
|
243
|
+
* dimension (m × ksub × dsub × 4 = ksub × dim × 4 = ~1 MB at the
|
|
244
|
+
* default ksub=256).
|
|
245
|
+
*/
|
|
246
|
+
function estimateCodesBytes(stats, m) {
|
|
247
|
+
const codesBytes = stats.nodeCount * m;
|
|
248
|
+
const codebookBytes = DEFAULT_KSUB * stats.dim * 4;
|
|
249
|
+
return codesBytes + codebookBytes;
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Convenience wrapper that pulls inputs from a {@link ResourceManager}
|
|
253
|
+
* (default: the singleton) and returns the selector's choice.
|
|
254
|
+
* Production code in `NativeDiskAnnWrapper` calls this at build and
|
|
255
|
+
* open time; tests prefer {@link pickMode} directly so they can pin
|
|
256
|
+
* the system state without touching the singleton.
|
|
257
|
+
*/
|
|
258
|
+
export function selectModeFromResourceManager(stats, rm = ResourceManager.getInstance()) {
|
|
259
|
+
const osMemory = rm.getOsMemorySnapshot();
|
|
260
|
+
const profile = rm.getResourceProfile();
|
|
261
|
+
return pickMode(stats, {
|
|
262
|
+
availableBytes: osMemory.availableBytes,
|
|
263
|
+
activeBrains: profile.activeInstances,
|
|
264
|
+
v8Pressure: profile.v8Heap.pressure,
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* The mode `MappedIndex::open_with_mode(path, Auto)` would resolve to
|
|
269
|
+
* for a given header (Piece H semantics). Mirrored here so the
|
|
270
|
+
* wrapper can skip a reopen when the selector agrees with Auto —
|
|
271
|
+
* avoids opening the file twice on the cold path.
|
|
272
|
+
*/
|
|
273
|
+
export function autoModeForHeader(header) {
|
|
274
|
+
return header.pqM === 0 ? 'in-memory' : 'hybrid';
|
|
275
|
+
}
|
|
276
|
+
//# sourceMappingURL=AdaptiveDiskAnnModeSelector.js.map
|