@soulcraft/cortex 2.7.2 → 2.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +0 -0
- package/dist/hnsw/AdaptiveDiskAnnModeSelector.d.ts +168 -0
- package/dist/hnsw/AdaptiveDiskAnnModeSelector.js +276 -0
- package/dist/hnsw/NativeDiskAnnWrapper.d.ts +284 -0
- package/dist/hnsw/NativeDiskAnnWrapper.js +738 -0
- package/dist/hnsw/NativeHNSWWrapper.js +15 -2
- package/dist/legacyLayoutGuard.d.ts +61 -0
- package/dist/legacyLayoutGuard.js +187 -0
- package/dist/resource/OsMemoryProbe.d.ts +175 -0
- package/dist/resource/OsMemoryProbe.js +206 -0
- package/dist/utils/nativeBinaryEntityIdMapper.d.ts +199 -0
- package/dist/utils/nativeBinaryEntityIdMapper.js +358 -0
- package/native/brainy-native.node +0 -0
- package/package.json +1 -1
|
@@ -96,9 +96,22 @@ export class NativeHNSWWrapper {
|
|
|
96
96
|
const root = this.resolveRootDir();
|
|
97
97
|
if (!root)
|
|
98
98
|
return undefined;
|
|
99
|
-
const
|
|
100
|
-
|
|
99
|
+
const s = this.storage;
|
|
100
|
+
// Mirror brainy's OWN branch resolution EXACTLY (brainy.js `getCurrentBranch`:
|
|
101
|
+
// `this.storage.currentBranch || 'main'`):
|
|
102
|
+
// - Non-COW storage has no `currentBranch` property → snapshot lives at root.
|
|
103
|
+
// - COW storage (brainy >= 7.31, mandatory COW) resolves the branch as
|
|
104
|
+
// `currentBranch || 'main'` and loads `_hnsw.bin` from
|
|
105
|
+
// `branches/<branch>/entities/nouns/hnsw/`. `currentBranch` is UNSET by
|
|
106
|
+
// default (only populated after an explicit fork/checkout).
|
|
107
|
+
// The previous `if (!branch) return root` wrote the snapshot to the tenant
|
|
108
|
+
// ROOT on a default brain while brainy loaded from `branches/main/…` — the
|
|
109
|
+
// snapshot was never found and the index rebuilt per-entity on every cold
|
|
110
|
+
// start (Memory deployed-line regression, re-flagged 3×). Defaulting to
|
|
111
|
+
// 'main' for COW storage makes write + load agree.
|
|
112
|
+
if (!('currentBranch' in s))
|
|
101
113
|
return root;
|
|
114
|
+
const branch = s.currentBranch || 'main';
|
|
102
115
|
return require('node:path').join(root, 'branches', String(branch), 'entities', 'nouns', 'hnsw');
|
|
103
116
|
}
|
|
104
117
|
/** Absolute path of the `_hnsw.bin` snapshot for the active branch, or null. */
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module legacyLayoutGuard
|
|
3
|
+
* @description Refuses to start when a cor 3.0 process is pointed
|
|
4
|
+
* at a directory that still holds cortex 2.x on-disk artifacts.
|
|
5
|
+
*
|
|
6
|
+
* The failure mode this prevents is the most dangerous in production:
|
|
7
|
+
* an operator deploys brainy 8.0 + cor 3.0 to a host whose data
|
|
8
|
+
* directory hasn't been migrated yet, the service starts,
|
|
9
|
+
* `brain.init()` fires, and either (a) crashes mid-init with a
|
|
10
|
+
* confusing low-level error from a shadow-page store, or (b) — worse —
|
|
11
|
+
* cortex's auto-engagement creates a new 3.0 file alongside the 2.x
|
|
12
|
+
* remnants, producing a half-migrated state that's harder to roll
|
|
13
|
+
* back than a clean stop.
|
|
14
|
+
*
|
|
15
|
+
* The guard runs at the top of `corPlugin.activate()`, before any
|
|
16
|
+
* provider registers. It scans the storage root for known cortex 2.x
|
|
17
|
+
* markers, and if any are found, throws a single error with the exact
|
|
18
|
+
* migration command the operator needs to run. **Throwing here is the
|
|
19
|
+
* point** — a service that won't start is recoverable; a service that
|
|
20
|
+
* starts on the wrong format isn't.
|
|
21
|
+
*
|
|
22
|
+
* The guard is no-op when:
|
|
23
|
+
* - The storage adapter isn't filesystem-backed (cortex 2.x markers
|
|
24
|
+
* are inherently filesystem-rooted; cloud adapters can't have
|
|
25
|
+
* them).
|
|
26
|
+
* - The storage root is empty (fresh install — no migration needed).
|
|
27
|
+
* - Only cor 3.0 markers are present (the migration completed,
|
|
28
|
+
* or this brain was created fresh on 3.0).
|
|
29
|
+
*
|
|
30
|
+
* The guard fires when:
|
|
31
|
+
* - `_id_mapper/uuid_to_int.mkv` is a FILE (cortex 2.x). In cortex
|
|
32
|
+
* 3.0 this is a DIRECTORY containing the shadow-page head pointer
|
|
33
|
+
* + base + delta.
|
|
34
|
+
* - `_id_mapper/int_to_uuid.bin` is a FILE (cortex 2.x). Same
|
|
35
|
+
* story — 3.0 makes this a directory.
|
|
36
|
+
* - `_metadata/` contains the cortex 2.x JSON chunk envelope shape
|
|
37
|
+
* without the 3.0 LSM shard subdirectory.
|
|
38
|
+
* - HNSW provider artifacts exist alongside no DiskANN directory.
|
|
39
|
+
*
|
|
40
|
+
* Bypass: setting `COR_LEGACY_LAYOUT_GUARD=skip` (legacy `CORTEX_LEGACY_LAYOUT_GUARD` still honored) in the environment
|
|
41
|
+
* disables the guard. This exists for the migration script itself
|
|
42
|
+
* (which needs to be able to read the legacy layout to take its
|
|
43
|
+
* snapshot) and for emergency debugging. Production code should never
|
|
44
|
+
* set this.
|
|
45
|
+
*/
|
|
46
|
+
/**
|
|
47
|
+
* Storage shape we can inspect. Cortex plugin's
|
|
48
|
+
* `BrainyPluginContext.storage` exposes `getBinaryBlobPath` on
|
|
49
|
+
* filesystem-backed adapters; we use that to discover the root.
|
|
50
|
+
*/
|
|
51
|
+
interface StorageWithBlobPath {
|
|
52
|
+
getBinaryBlobPath?: (key: string) => string | null;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Public guard. Call at the top of `corPlugin.activate()` with
|
|
56
|
+
* the brainy storage adapter. Throws if cortex 2.x markers are
|
|
57
|
+
* present.
|
|
58
|
+
*/
|
|
59
|
+
export declare function assertNoLegacyLayout(storage: StorageWithBlobPath | null | undefined): void;
|
|
60
|
+
export {};
|
|
61
|
+
//# sourceMappingURL=legacyLayoutGuard.d.ts.map
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module legacyLayoutGuard
|
|
3
|
+
* @description Refuses to start when a cor 3.0 process is pointed
|
|
4
|
+
* at a directory that still holds cortex 2.x on-disk artifacts.
|
|
5
|
+
*
|
|
6
|
+
* The failure mode this prevents is the most dangerous in production:
|
|
7
|
+
* an operator deploys brainy 8.0 + cor 3.0 to a host whose data
|
|
8
|
+
* directory hasn't been migrated yet, the service starts,
|
|
9
|
+
* `brain.init()` fires, and either (a) crashes mid-init with a
|
|
10
|
+
* confusing low-level error from a shadow-page store, or (b) — worse —
|
|
11
|
+
* cortex's auto-engagement creates a new 3.0 file alongside the 2.x
|
|
12
|
+
* remnants, producing a half-migrated state that's harder to roll
|
|
13
|
+
* back than a clean stop.
|
|
14
|
+
*
|
|
15
|
+
* The guard runs at the top of `corPlugin.activate()`, before any
|
|
16
|
+
* provider registers. It scans the storage root for known cortex 2.x
|
|
17
|
+
* markers, and if any are found, throws a single error with the exact
|
|
18
|
+
* migration command the operator needs to run. **Throwing here is the
|
|
19
|
+
* point** — a service that won't start is recoverable; a service that
|
|
20
|
+
* starts on the wrong format isn't.
|
|
21
|
+
*
|
|
22
|
+
* The guard is no-op when:
|
|
23
|
+
* - The storage adapter isn't filesystem-backed (cortex 2.x markers
|
|
24
|
+
* are inherently filesystem-rooted; cloud adapters can't have
|
|
25
|
+
* them).
|
|
26
|
+
* - The storage root is empty (fresh install — no migration needed).
|
|
27
|
+
* - Only cor 3.0 markers are present (the migration completed,
|
|
28
|
+
* or this brain was created fresh on 3.0).
|
|
29
|
+
*
|
|
30
|
+
* The guard fires when:
|
|
31
|
+
* - `_id_mapper/uuid_to_int.mkv` is a FILE (cortex 2.x). In cortex
|
|
32
|
+
* 3.0 this is a DIRECTORY containing the shadow-page head pointer
|
|
33
|
+
* + base + delta.
|
|
34
|
+
* - `_id_mapper/int_to_uuid.bin` is a FILE (cortex 2.x). Same
|
|
35
|
+
* story — 3.0 makes this a directory.
|
|
36
|
+
* - `_metadata/` contains the cortex 2.x JSON chunk envelope shape
|
|
37
|
+
* without the 3.0 LSM shard subdirectory.
|
|
38
|
+
* - HNSW provider artifacts exist alongside no DiskANN directory.
|
|
39
|
+
*
|
|
40
|
+
* Bypass: setting `COR_LEGACY_LAYOUT_GUARD=skip` (legacy `CORTEX_LEGACY_LAYOUT_GUARD` still honored) in the environment
|
|
41
|
+
* disables the guard. This exists for the migration script itself
|
|
42
|
+
* (which needs to be able to read the legacy layout to take its
|
|
43
|
+
* snapshot) and for emergency debugging. Production code should never
|
|
44
|
+
* set this.
|
|
45
|
+
*/
|
|
46
|
+
import { existsSync, statSync, readdirSync } from 'node:fs';
|
|
47
|
+
import { join } from 'node:path';
|
|
48
|
+
const CORTEX_SUBDIRS = ['_id_mapper', '_metadata', '_diskann', '_graph_adjacency'];
|
|
49
|
+
/**
|
|
50
|
+
* Heuristic: cortex 2.x flat-file layout markers under `_id_mapper`.
|
|
51
|
+
* Cor 3.0 makes both of these directories instead.
|
|
52
|
+
*/
|
|
53
|
+
const LEGACY_ID_MAPPER_FILES = [
|
|
54
|
+
'_id_mapper/uuid_to_int.mkv',
|
|
55
|
+
'_id_mapper/int_to_uuid.bin',
|
|
56
|
+
];
|
|
57
|
+
/**
|
|
58
|
+
* Discover the storage root directory from the brainy
|
|
59
|
+
* `StorageAdapter`. Returns null for non-filesystem adapters (no
|
|
60
|
+
* legacy markers possible).
|
|
61
|
+
*/
|
|
62
|
+
function resolveStorageRoot(storage) {
|
|
63
|
+
if (!storage || typeof storage.getBinaryBlobPath !== 'function')
|
|
64
|
+
return null;
|
|
65
|
+
// Ask for a sentinel path; the returned path's parent is the
|
|
66
|
+
// storage root for our purposes.
|
|
67
|
+
const probe = storage.getBinaryBlobPath('__cortex_legacy_layout_guard_probe__');
|
|
68
|
+
if (!probe)
|
|
69
|
+
return null;
|
|
70
|
+
// The probe path doesn't exist; we want its containing directory.
|
|
71
|
+
// brainy's FileSystemStorage returns something like
|
|
72
|
+
// `${rootDirectory}/__cortex_legacy_layout_guard_probe__` or
|
|
73
|
+
// `${rootDirectory}/binaryBlobs/__cortex_legacy_layout_guard_probe__`.
|
|
74
|
+
// Walk up to the directory that contains `_id_mapper` or is the
|
|
75
|
+
// brain root.
|
|
76
|
+
let dir = probe;
|
|
77
|
+
// Strip the probe filename component.
|
|
78
|
+
dir = dir.replace(/\/[^/]+$/, '');
|
|
79
|
+
// If this is `binaryBlobs/`, go one more up.
|
|
80
|
+
if (dir.endsWith('/binaryBlobs')) {
|
|
81
|
+
dir = dir.slice(0, -'/binaryBlobs'.length);
|
|
82
|
+
}
|
|
83
|
+
if (!existsSync(dir))
|
|
84
|
+
return null;
|
|
85
|
+
return dir;
|
|
86
|
+
}
|
|
87
|
+
function findLegacyMarkers(root) {
|
|
88
|
+
const markers = [];
|
|
89
|
+
// (1) cortex 2.x flat-file id mapper layout.
|
|
90
|
+
for (const rel of LEGACY_ID_MAPPER_FILES) {
|
|
91
|
+
const p = join(root, rel);
|
|
92
|
+
if (!existsSync(p))
|
|
93
|
+
continue;
|
|
94
|
+
const stat = statSync(p);
|
|
95
|
+
if (stat.isFile()) {
|
|
96
|
+
markers.push({
|
|
97
|
+
path: p,
|
|
98
|
+
reason: `${rel} is a file (cortex 2.x format). ` +
|
|
99
|
+
`Cor 3.0 expects this to be a directory (shadow-page LSM store).`,
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// (2) cortex 2.x metadata index JSON chunk files at the metadata
|
|
104
|
+
// root, with no 3.0 LSM shard subdirectory.
|
|
105
|
+
const metadataDir = join(root, '_metadata');
|
|
106
|
+
if (existsSync(metadataDir) && statSync(metadataDir).isDirectory()) {
|
|
107
|
+
const entries = readdirSync(metadataDir);
|
|
108
|
+
const hasLsmShard = entries.some((e) => e === 'memtable' || e.startsWith('lsm-'));
|
|
109
|
+
const hasJsonChunks = entries.some((e) => e.endsWith('.json') || e.endsWith('.cidx'));
|
|
110
|
+
if (hasJsonChunks && !hasLsmShard) {
|
|
111
|
+
markers.push({
|
|
112
|
+
path: metadataDir,
|
|
113
|
+
reason: `${metadataDir} contains cortex 2.x JSON chunk files (.json/.cidx) but no 3.0 LSM shard layout. ` +
|
|
114
|
+
`Cor 3.0's metadata index is LSM-tree-backed.`,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
// (3) HNSW provider artifacts in `_diskann/` or root without DiskANN
|
|
119
|
+
// directory. The cortex 2.x HNSW provider wrote a `.hnsw` file at
|
|
120
|
+
// the root or under a `_hnsw/` subdirectory. Cor 3.0 writes
|
|
121
|
+
// `_diskann/` instead.
|
|
122
|
+
const candidateHnsw = readdirSync(root, { withFileTypes: true })
|
|
123
|
+
.filter((e) => e.isFile())
|
|
124
|
+
.filter((e) => e.name.endsWith('.hnsw'));
|
|
125
|
+
if (candidateHnsw.length > 0 && !existsSync(join(root, '_diskann'))) {
|
|
126
|
+
markers.push({
|
|
127
|
+
path: join(root, candidateHnsw[0].name),
|
|
128
|
+
reason: `${root} contains cortex 2.x HNSW provider files (.hnsw) with no 3.0 DiskANN directory. ` +
|
|
129
|
+
`Cor 3.0 uses Adaptive DiskANN as the default vector index.`,
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
return markers;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Public guard. Call at the top of `corPlugin.activate()` with
|
|
136
|
+
* the brainy storage adapter. Throws if cortex 2.x markers are
|
|
137
|
+
* present.
|
|
138
|
+
*/
|
|
139
|
+
export function assertNoLegacyLayout(storage) {
|
|
140
|
+
// Dual-read during the cortex→cor rename: COR_ preferred, legacy CORTEX_ honored.
|
|
141
|
+
if ((process.env.COR_LEGACY_LAYOUT_GUARD ?? process.env.CORTEX_LEGACY_LAYOUT_GUARD) === 'skip') {
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
const root = resolveStorageRoot(storage);
|
|
145
|
+
if (!root)
|
|
146
|
+
return; // non-filesystem adapter or unresolvable; nothing to check
|
|
147
|
+
if (!existsSync(root))
|
|
148
|
+
return; // fresh install
|
|
149
|
+
const markers = findLegacyMarkers(root);
|
|
150
|
+
if (markers.length === 0)
|
|
151
|
+
return;
|
|
152
|
+
// Build the error message. The point is to be SO clear that an
|
|
153
|
+
// operator can act on it without reading docs.
|
|
154
|
+
const lines = [
|
|
155
|
+
'',
|
|
156
|
+
'╔══════════════════════════════════════════════════════════════════════════╗',
|
|
157
|
+
'║ Cor 3.0 cannot start: this storage directory holds cortex 2.x data. ║',
|
|
158
|
+
'╚══════════════════════════════════════════════════════════════════════════╝',
|
|
159
|
+
'',
|
|
160
|
+
`Storage root: ${root}`,
|
|
161
|
+
'',
|
|
162
|
+
'Cortex 2.x markers found:',
|
|
163
|
+
...markers.map((m) => ` • ${m.reason}`),
|
|
164
|
+
'',
|
|
165
|
+
'Cor 3.0 broke the on-disk format intentionally to ship snapshot-safe',
|
|
166
|
+
'shadow-page LSM stores + Adaptive DiskANN. Reading 2.x files in 3.0 is not',
|
|
167
|
+
'supported by design.',
|
|
168
|
+
'',
|
|
169
|
+
'🚨 Do NOT delete this directory. Your data is intact in the 2.x format.',
|
|
170
|
+
'',
|
|
171
|
+
'To migrate (zero data loss, ~10 minutes for Venue-scale brains):',
|
|
172
|
+
'',
|
|
173
|
+
' node node_modules/@soulcraft/cor/scripts/migrate-cortex-2x-to-3x.mjs \\',
|
|
174
|
+
` --brain-dir ${root} \\`,
|
|
175
|
+
` --backup-dir ${root}/../backups/pre-3.0`,
|
|
176
|
+
'',
|
|
177
|
+
'Full migration guide: docs/migration-3.0.md',
|
|
178
|
+
'Venue-specific worked example in docs/migration-3.0.md § Worked example — Venue',
|
|
179
|
+
'',
|
|
180
|
+
'To rollback instead: downgrade @soulcraft/brainy + @soulcraft/cortex to',
|
|
181
|
+
'your previous versions (e.g. brainy@7.31.6 cortex@2.7.0) — no data',
|
|
182
|
+
'changes have happened yet because cor 3.0 refused to start.',
|
|
183
|
+
'',
|
|
184
|
+
];
|
|
185
|
+
throw new Error(lines.join('\n'));
|
|
186
|
+
}
|
|
187
|
+
//# sourceMappingURL=legacyLayoutGuard.js.map
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module resource/OsMemoryProbe
|
|
3
|
+
* @description OS-level memory observer for the cor 3.0 **Adaptive
|
|
4
|
+
* DiskANN** mode selector (Piece J of the cor 3.0 plan).
|
|
5
|
+
*
|
|
6
|
+
* The {@link ResourceManager} already observes the V8 heap and a
|
|
7
|
+
* coarse system-RSS pressure ratio (Piece 8). What it didn't have
|
|
8
|
+
* was a read on the kernel's `MemAvailable` figure — the metric the
|
|
9
|
+
* Linux kernel computes as "how much physical RAM can a new
|
|
10
|
+
* allocation claim without swapping," factoring in reclaimable page
|
|
11
|
+
* cache and slab. That's the right signal for the adaptive selector
|
|
12
|
+
* to pick a DiskANN open mode in multi-tenant scenarios where a
|
|
13
|
+
* shared box runs many brainy+cor instances: each instance's
|
|
14
|
+
* `os.freemem()` undercounts available memory (it reports MemFree,
|
|
15
|
+
* not MemAvailable), so a naïve `freemem`-based check would push
|
|
16
|
+
* the selector toward OnDisk hints even when there's plenty of
|
|
17
|
+
* reclaimable cache available.
|
|
18
|
+
*
|
|
19
|
+
* ## Surface
|
|
20
|
+
*
|
|
21
|
+
* - {@link parseMeminfo} — pure parser; fixture-friendly.
|
|
22
|
+
* - {@link OsMemoryProbe} — readers + caching wrapper. Dependency-
|
|
23
|
+
* injected so tests can simulate Linux on macOS and exercise the
|
|
24
|
+
* `MemAvailable` missing branch without touching the real
|
|
25
|
+
* `/proc/meminfo`.
|
|
26
|
+
*
|
|
27
|
+
* ## Why a cache
|
|
28
|
+
*
|
|
29
|
+
* `/proc/meminfo` reads cost ~10 μs of syscall time — not free, but
|
|
30
|
+
* also not worth re-reading on every single call. The 250 ms cache
|
|
31
|
+
* is comfortably below the 5 s pressure-tick cadence and the 30 s
|
|
32
|
+
* rebalance cadence the ResourceManager already runs, so any
|
|
33
|
+
* meaningful pressure event lands on a fresh read.
|
|
34
|
+
*
|
|
35
|
+
* ## Non-Linux fallback
|
|
36
|
+
*
|
|
37
|
+
* On macOS and Windows there is no `/proc/meminfo`. The probe falls
|
|
38
|
+
* back to `os.totalmem()` + `os.freemem()` and marks the snapshot
|
|
39
|
+
* `source: 'os-fallback'` so callers can degrade gracefully (the
|
|
40
|
+
* adaptive selector currently treats fallback snapshots as
|
|
41
|
+
* "available bytes is conservative, prefer Hybrid over OnDisk").
|
|
42
|
+
*/
|
|
43
|
+
/**
|
|
44
|
+
* Snapshot of OS-level memory state at observation time. All byte
|
|
45
|
+
* fields are in absolute bytes (the on-disk units in `/proc/meminfo`
|
|
46
|
+
* are KiB; the parser converts).
|
|
47
|
+
*/
|
|
48
|
+
export interface OsMemorySnapshot {
|
|
49
|
+
/**
|
|
50
|
+
* Total physical RAM on the machine — the `MemTotal` line in
|
|
51
|
+
* `/proc/meminfo`, or `os.totalmem()` on non-Linux fallback.
|
|
52
|
+
*/
|
|
53
|
+
totalBytes: number;
|
|
54
|
+
/**
|
|
55
|
+
* Memory the kernel reports as available for new allocations
|
|
56
|
+
* without swapping. On Linux ≥ 3.14, this is the `MemAvailable`
|
|
57
|
+
* line — accounts for reclaimable page cache + slab. On older
|
|
58
|
+
* kernels and on the os-fallback path, falls back to
|
|
59
|
+
* {@link freeBytes}.
|
|
60
|
+
*
|
|
61
|
+
* This is the load-bearing field for the Adaptive DiskANN mode
|
|
62
|
+
* selector: it answers "can this brain afford to keep PQ codes
|
|
63
|
+
* RAM-resident?".
|
|
64
|
+
*/
|
|
65
|
+
availableBytes: number;
|
|
66
|
+
/**
|
|
67
|
+
* Physically free memory — the `MemFree` line in `/proc/meminfo`,
|
|
68
|
+
* or `os.freemem()` on non-Linux fallback. Smaller than
|
|
69
|
+
* {@link availableBytes} on a healthy Linux system because it
|
|
70
|
+
* excludes reclaimable page cache.
|
|
71
|
+
*/
|
|
72
|
+
freeBytes: number;
|
|
73
|
+
/**
|
|
74
|
+
* `1 - (availableBytes / totalBytes)` clamped to `[0, 1]`. A high
|
|
75
|
+
* pressure score means little headroom for new allocations; the
|
|
76
|
+
* adaptive selector uses this as the primary mode-down signal.
|
|
77
|
+
*/
|
|
78
|
+
pressureScore: number;
|
|
79
|
+
/**
|
|
80
|
+
* `'proc-meminfo'` when the values came from a successful
|
|
81
|
+
* `/proc/meminfo` read; `'os-fallback'` when the probe degraded
|
|
82
|
+
* to `os.totalmem()`/`os.freemem()`. Surface for diagnostics —
|
|
83
|
+
* a fallback snapshot is a hint that the deployment is on macOS
|
|
84
|
+
* or Windows.
|
|
85
|
+
*/
|
|
86
|
+
source: 'proc-meminfo' | 'os-fallback';
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Subset of `/proc/meminfo` lines the probe cares about. Returned by
|
|
90
|
+
* {@link parseMeminfo} so the snapshot composition stays unit-
|
|
91
|
+
* testable.
|
|
92
|
+
*/
|
|
93
|
+
export interface MeminfoFields {
|
|
94
|
+
totalKib?: number;
|
|
95
|
+
availableKib?: number;
|
|
96
|
+
freeKib?: number;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Parse a `/proc/meminfo` text blob and extract the three fields the
|
|
100
|
+
* probe needs. Pure function — no I/O, fully deterministic, the
|
|
101
|
+
* only thing fixture tests need to cover.
|
|
102
|
+
*
|
|
103
|
+
* Returns `null` only when `MemTotal` is missing, which would mean
|
|
104
|
+
* the input isn't `/proc/meminfo` at all (every Linux kernel since
|
|
105
|
+
* the file existed has emitted it). Missing `MemAvailable` or
|
|
106
|
+
* `MemFree` is normal on old or specialised kernels and is left for
|
|
107
|
+
* the snapshot composer to handle.
|
|
108
|
+
*/
|
|
109
|
+
export declare function parseMeminfo(text: string): MeminfoFields | null;
|
|
110
|
+
/**
|
|
111
|
+
* Build an {@link OsMemorySnapshot} from the parsed `/proc/meminfo`
|
|
112
|
+
* fields. Visible for tests; production callers use
|
|
113
|
+
* {@link OsMemoryProbe.snapshot}.
|
|
114
|
+
*/
|
|
115
|
+
export declare function snapshotFromMeminfo(fields: MeminfoFields): OsMemorySnapshot;
|
|
116
|
+
/**
|
|
117
|
+
* Build the os-fallback snapshot from Node's `os` module. Visible
|
|
118
|
+
* for tests; production callers use {@link OsMemoryProbe.snapshot}.
|
|
119
|
+
*/
|
|
120
|
+
export declare function snapshotFromOs(): OsMemorySnapshot;
|
|
121
|
+
/**
|
|
122
|
+
* Optional config for {@link OsMemoryProbe}. All fields are mainly
|
|
123
|
+
* for tests — production callers construct with defaults.
|
|
124
|
+
*/
|
|
125
|
+
export interface OsMemoryProbeOptions {
|
|
126
|
+
/**
|
|
127
|
+
* Cache TTL in milliseconds. The snapshot is reused for repeated
|
|
128
|
+
* calls within this window. Default 250 ms — comfortably below
|
|
129
|
+
* the ResourceManager's 5 s pressure-tick.
|
|
130
|
+
*/
|
|
131
|
+
cacheMs?: number;
|
|
132
|
+
/**
|
|
133
|
+
* Override the `/proc/meminfo` reader. Tests inject custom text
|
|
134
|
+
* to exercise the parser's edge cases without touching the real
|
|
135
|
+
* filesystem.
|
|
136
|
+
*/
|
|
137
|
+
procReader?: () => string | null;
|
|
138
|
+
/**
|
|
139
|
+
* Override the os-fallback snapshot generator. Tests use this to
|
|
140
|
+
* pin the fallback shape without depending on the host's actual
|
|
141
|
+
* `os.totalmem()` reading.
|
|
142
|
+
*/
|
|
143
|
+
osFallback?: () => OsMemorySnapshot;
|
|
144
|
+
/**
|
|
145
|
+
* Override the wall clock used for cache expiry. Tests pass a
|
|
146
|
+
* controllable timestamp source to assert cache behaviour
|
|
147
|
+
* deterministically.
|
|
148
|
+
*/
|
|
149
|
+
now?: () => number;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Cached `/proc/meminfo` observer. Wire one instance into the
|
|
153
|
+
* {@link ResourceManager}; the adaptive DiskANN selector (Piece I)
|
|
154
|
+
* will read `snapshot()` at brain-open time.
|
|
155
|
+
*/
|
|
156
|
+
export declare class OsMemoryProbe {
|
|
157
|
+
private readonly cacheMs;
|
|
158
|
+
private readonly procReader;
|
|
159
|
+
private readonly osFallback;
|
|
160
|
+
private readonly now;
|
|
161
|
+
private cached;
|
|
162
|
+
constructor(options?: OsMemoryProbeOptions);
|
|
163
|
+
/**
|
|
164
|
+
* Latest memory snapshot. Cached for {@link OsMemoryProbeOptions.cacheMs}
|
|
165
|
+
* milliseconds; calls inside the window reuse the cached value.
|
|
166
|
+
*/
|
|
167
|
+
snapshot(): OsMemorySnapshot;
|
|
168
|
+
/**
|
|
169
|
+
* Invalidate the cache so the next {@link snapshot} call performs
|
|
170
|
+
* a fresh read. Used by tests; production callers shouldn't need
|
|
171
|
+
* this because the cache TTL is short.
|
|
172
|
+
*/
|
|
173
|
+
invalidate(): void;
|
|
174
|
+
}
|
|
175
|
+
//# sourceMappingURL=OsMemoryProbe.d.ts.map
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module resource/OsMemoryProbe
|
|
3
|
+
* @description OS-level memory observer for the cor 3.0 **Adaptive
|
|
4
|
+
* DiskANN** mode selector (Piece J of the cor 3.0 plan).
|
|
5
|
+
*
|
|
6
|
+
* The {@link ResourceManager} already observes the V8 heap and a
|
|
7
|
+
* coarse system-RSS pressure ratio (Piece 8). What it didn't have
|
|
8
|
+
* was a read on the kernel's `MemAvailable` figure — the metric the
|
|
9
|
+
* Linux kernel computes as "how much physical RAM can a new
|
|
10
|
+
* allocation claim without swapping," factoring in reclaimable page
|
|
11
|
+
* cache and slab. That's the right signal for the adaptive selector
|
|
12
|
+
* to pick a DiskANN open mode in multi-tenant scenarios where a
|
|
13
|
+
* shared box runs many brainy+cor instances: each instance's
|
|
14
|
+
* `os.freemem()` undercounts available memory (it reports MemFree,
|
|
15
|
+
* not MemAvailable), so a naïve `freemem`-based check would push
|
|
16
|
+
* the selector toward OnDisk hints even when there's plenty of
|
|
17
|
+
* reclaimable cache available.
|
|
18
|
+
*
|
|
19
|
+
* ## Surface
|
|
20
|
+
*
|
|
21
|
+
* - {@link parseMeminfo} — pure parser; fixture-friendly.
|
|
22
|
+
* - {@link OsMemoryProbe} — readers + caching wrapper. Dependency-
|
|
23
|
+
* injected so tests can simulate Linux on macOS and exercise the
|
|
24
|
+
* `MemAvailable` missing branch without touching the real
|
|
25
|
+
* `/proc/meminfo`.
|
|
26
|
+
*
|
|
27
|
+
* ## Why a cache
|
|
28
|
+
*
|
|
29
|
+
* `/proc/meminfo` reads cost ~10 μs of syscall time — not free, but
|
|
30
|
+
* also not worth re-reading on every single call. The 250 ms cache
|
|
31
|
+
* is comfortably below the 5 s pressure-tick cadence and the 30 s
|
|
32
|
+
* rebalance cadence the ResourceManager already runs, so any
|
|
33
|
+
* meaningful pressure event lands on a fresh read.
|
|
34
|
+
*
|
|
35
|
+
* ## Non-Linux fallback
|
|
36
|
+
*
|
|
37
|
+
* On macOS and Windows there is no `/proc/meminfo`. The probe falls
|
|
38
|
+
* back to `os.totalmem()` + `os.freemem()` and marks the snapshot
|
|
39
|
+
* `source: 'os-fallback'` so callers can degrade gracefully (the
|
|
40
|
+
* adaptive selector currently treats fallback snapshots as
|
|
41
|
+
* "available bytes is conservative, prefer Hybrid over OnDisk").
|
|
42
|
+
*/
|
|
43
|
+
import os from 'node:os';
|
|
44
|
+
const KIB = 1024;
|
|
45
|
+
/**
|
|
46
|
+
* Match a `Field: value [kB]` line in `/proc/meminfo`.
|
|
47
|
+
*
|
|
48
|
+
* Allowed key chars cover the punctuation that real kernel-side
|
|
49
|
+
* names use: `Active(anon)`, `Inactive(file)`, etc. We don't read
|
|
50
|
+
* those fields ourselves, but the regex tolerates them so a typo in
|
|
51
|
+
* a future key extension doesn't silently fall through.
|
|
52
|
+
*/
|
|
53
|
+
const FIELD_PATTERN = /^([A-Za-z()_]+):\s+(\d+)(?:\s+kB)?\s*$/;
|
|
54
|
+
/**
|
|
55
|
+
* Parse a `/proc/meminfo` text blob and extract the three fields the
|
|
56
|
+
* probe needs. Pure function — no I/O, fully deterministic, the
|
|
57
|
+
* only thing fixture tests need to cover.
|
|
58
|
+
*
|
|
59
|
+
* Returns `null` only when `MemTotal` is missing, which would mean
|
|
60
|
+
* the input isn't `/proc/meminfo` at all (every Linux kernel since
|
|
61
|
+
* the file existed has emitted it). Missing `MemAvailable` or
|
|
62
|
+
* `MemFree` is normal on old or specialised kernels and is left for
|
|
63
|
+
* the snapshot composer to handle.
|
|
64
|
+
*/
|
|
65
|
+
export function parseMeminfo(text) {
|
|
66
|
+
const fields = {};
|
|
67
|
+
for (const rawLine of text.split('\n')) {
|
|
68
|
+
const line = rawLine.trimEnd();
|
|
69
|
+
if (line.length === 0)
|
|
70
|
+
continue;
|
|
71
|
+
const match = line.match(FIELD_PATTERN);
|
|
72
|
+
if (!match)
|
|
73
|
+
continue;
|
|
74
|
+
const key = match[1];
|
|
75
|
+
const value = parseInt(match[2], 10);
|
|
76
|
+
if (!Number.isFinite(value) || value < 0)
|
|
77
|
+
continue;
|
|
78
|
+
if (key === 'MemTotal')
|
|
79
|
+
fields.totalKib = value;
|
|
80
|
+
else if (key === 'MemAvailable')
|
|
81
|
+
fields.availableKib = value;
|
|
82
|
+
else if (key === 'MemFree')
|
|
83
|
+
fields.freeKib = value;
|
|
84
|
+
}
|
|
85
|
+
if (fields.totalKib === undefined)
|
|
86
|
+
return null;
|
|
87
|
+
return fields;
|
|
88
|
+
}
|
|
89
|
+
function clamp01(x) {
|
|
90
|
+
if (!Number.isFinite(x))
|
|
91
|
+
return 0;
|
|
92
|
+
if (x < 0)
|
|
93
|
+
return 0;
|
|
94
|
+
if (x > 1)
|
|
95
|
+
return 1;
|
|
96
|
+
return x;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Build an {@link OsMemorySnapshot} from the parsed `/proc/meminfo`
|
|
100
|
+
* fields. Visible for tests; production callers use
|
|
101
|
+
* {@link OsMemoryProbe.snapshot}.
|
|
102
|
+
*/
|
|
103
|
+
export function snapshotFromMeminfo(fields) {
|
|
104
|
+
const totalBytes = (fields.totalKib ?? 0) * KIB;
|
|
105
|
+
const freeBytes = (fields.freeKib ?? 0) * KIB;
|
|
106
|
+
// MemAvailable landed in Linux 3.14. Older kernels need the
|
|
107
|
+
// MemFree fallback — strictly worse signal but the best we have.
|
|
108
|
+
const availableBytes = fields.availableKib !== undefined ? fields.availableKib * KIB : freeBytes;
|
|
109
|
+
const pressureScore = totalBytes > 0 ? clamp01(1 - availableBytes / totalBytes) : 0;
|
|
110
|
+
return {
|
|
111
|
+
totalBytes,
|
|
112
|
+
availableBytes,
|
|
113
|
+
freeBytes,
|
|
114
|
+
pressureScore,
|
|
115
|
+
source: 'proc-meminfo',
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Build the os-fallback snapshot from Node's `os` module. Visible
|
|
120
|
+
* for tests; production callers use {@link OsMemoryProbe.snapshot}.
|
|
121
|
+
*/
|
|
122
|
+
export function snapshotFromOs() {
|
|
123
|
+
const totalBytes = os.totalmem();
|
|
124
|
+
const freeBytes = os.freemem();
|
|
125
|
+
// No MemAvailable equivalent without a platform-specific syscall;
|
|
126
|
+
// freemem is the best approximation available cross-platform.
|
|
127
|
+
const pressureScore = totalBytes > 0 ? clamp01(1 - freeBytes / totalBytes) : 0;
|
|
128
|
+
return {
|
|
129
|
+
totalBytes,
|
|
130
|
+
availableBytes: freeBytes,
|
|
131
|
+
freeBytes,
|
|
132
|
+
pressureScore,
|
|
133
|
+
source: 'os-fallback',
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Default `/proc/meminfo` reader. Returns the file's text or `null`
|
|
138
|
+
* when the file doesn't exist / is unreadable (the expected case on
|
|
139
|
+
* macOS and Windows). Synchronous because the file is virtual and
|
|
140
|
+
* reads cost microseconds.
|
|
141
|
+
*/
|
|
142
|
+
function defaultProcReader() {
|
|
143
|
+
try {
|
|
144
|
+
const { readFileSync } = require('node:fs');
|
|
145
|
+
return readFileSync('/proc/meminfo', 'utf8');
|
|
146
|
+
}
|
|
147
|
+
catch {
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Cached `/proc/meminfo` observer. Wire one instance into the
|
|
153
|
+
* {@link ResourceManager}; the adaptive DiskANN selector (Piece I)
|
|
154
|
+
* will read `snapshot()` at brain-open time.
|
|
155
|
+
*/
|
|
156
|
+
export class OsMemoryProbe {
|
|
157
|
+
cacheMs;
|
|
158
|
+
procReader;
|
|
159
|
+
osFallback;
|
|
160
|
+
now;
|
|
161
|
+
cached = null;
|
|
162
|
+
constructor(options = {}) {
|
|
163
|
+
this.cacheMs = options.cacheMs ?? 250;
|
|
164
|
+
this.procReader = options.procReader ?? defaultProcReader;
|
|
165
|
+
this.osFallback = options.osFallback ?? snapshotFromOs;
|
|
166
|
+
this.now = options.now ?? Date.now;
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Latest memory snapshot. Cached for {@link OsMemoryProbeOptions.cacheMs}
|
|
170
|
+
* milliseconds; calls inside the window reuse the cached value.
|
|
171
|
+
*/
|
|
172
|
+
snapshot() {
|
|
173
|
+
const now = this.now();
|
|
174
|
+
if (this.cached && this.cached.expires > now) {
|
|
175
|
+
return this.cached.snapshot;
|
|
176
|
+
}
|
|
177
|
+
const text = this.procReader();
|
|
178
|
+
let fresh;
|
|
179
|
+
if (text !== null) {
|
|
180
|
+
const fields = parseMeminfo(text);
|
|
181
|
+
if (fields !== null) {
|
|
182
|
+
fresh = snapshotFromMeminfo(fields);
|
|
183
|
+
}
|
|
184
|
+
else {
|
|
185
|
+
// The proc reader succeeded but the parse failed — malformed
|
|
186
|
+
// file. Degrade rather than throw so a flaky kernel doesn't
|
|
187
|
+
// crash the whole observation surface.
|
|
188
|
+
fresh = this.osFallback();
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
else {
|
|
192
|
+
fresh = this.osFallback();
|
|
193
|
+
}
|
|
194
|
+
this.cached = { snapshot: fresh, expires: now + this.cacheMs };
|
|
195
|
+
return fresh;
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Invalidate the cache so the next {@link snapshot} call performs
|
|
199
|
+
* a fresh read. Used by tests; production callers shouldn't need
|
|
200
|
+
* this because the cache TTL is short.
|
|
201
|
+
*/
|
|
202
|
+
invalidate() {
|
|
203
|
+
this.cached = null;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
//# sourceMappingURL=OsMemoryProbe.js.map
|