@soulcraft/brainy 3.35.0 → 3.36.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +62 -0
- package/README.md +32 -12
- package/dist/hnsw/hnswIndex.d.ts +104 -1
- package/dist/hnsw/hnswIndex.js +282 -25
- package/dist/hnsw/hnswIndexOptimized.d.ts +1 -2
- package/dist/hnsw/hnswIndexOptimized.js +3 -5
- package/dist/hnsw/partitionedHNSWIndex.js +1 -1
- package/dist/interfaces/IIndex.d.ts +14 -5
- package/dist/storage/adapters/gcsStorage.js +15 -5
- package/dist/utils/memoryDetection.d.ts +119 -0
- package/dist/utils/memoryDetection.js +321 -0
- package/dist/utils/unifiedCache.d.ts +75 -1
- package/dist/utils/unifiedCache.js +123 -4
- package/package.json +1 -1
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory Detection Utilities
|
|
3
|
+
* Detects available system memory across different environments:
|
|
4
|
+
* - Docker/Kubernetes (cgroups v1 and v2)
|
|
5
|
+
* - Bare metal servers
|
|
6
|
+
* - Cloud instances
|
|
7
|
+
* - Development environments
|
|
8
|
+
*
|
|
9
|
+
* Scales from 2GB to 128GB+ with intelligent allocation
|
|
10
|
+
*/
|
|
11
|
+
export interface MemoryInfo {
|
|
12
|
+
/** Total memory available to this process (bytes) */
|
|
13
|
+
available: number;
|
|
14
|
+
/** Source of memory information */
|
|
15
|
+
source: 'cgroup-v2' | 'cgroup-v1' | 'system' | 'fallback';
|
|
16
|
+
/** Whether running in a container */
|
|
17
|
+
isContainer: boolean;
|
|
18
|
+
/** System total memory (may differ from available in containers) */
|
|
19
|
+
systemTotal: number;
|
|
20
|
+
/** Currently free memory (best-effort estimate) */
|
|
21
|
+
free: number;
|
|
22
|
+
/** Detection warnings (if any) */
|
|
23
|
+
warnings: string[];
|
|
24
|
+
}
|
|
25
|
+
export interface CacheAllocationStrategy {
|
|
26
|
+
/** Recommended cache size (bytes) */
|
|
27
|
+
cacheSize: number;
|
|
28
|
+
/** Allocation ratio used (0-1) */
|
|
29
|
+
ratio: number;
|
|
30
|
+
/** Minimum guaranteed size (bytes) */
|
|
31
|
+
minSize: number;
|
|
32
|
+
/** Maximum allowed size (bytes) */
|
|
33
|
+
maxSize: number | null;
|
|
34
|
+
/** Environment type detected */
|
|
35
|
+
environment: 'production' | 'development' | 'container' | 'unknown';
|
|
36
|
+
/** Model memory reserved (bytes) - v3.36.0+ */
|
|
37
|
+
modelMemory: number;
|
|
38
|
+
/** Model precision (q8 or fp32) */
|
|
39
|
+
modelPrecision: 'q8' | 'fp32';
|
|
40
|
+
/** Available memory after model reservation (bytes) */
|
|
41
|
+
availableForCache: number;
|
|
42
|
+
/** Reasoning for allocation */
|
|
43
|
+
reasoning: string;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Detect available memory across all environments
|
|
47
|
+
*/
|
|
48
|
+
export declare function detectAvailableMemory(): MemoryInfo;
|
|
49
|
+
/**
|
|
50
|
+
* Calculate optimal cache size based on available memory
|
|
51
|
+
* Scales intelligently from 2GB to 128GB+
|
|
52
|
+
*
|
|
53
|
+
* v3.36.0+: Accounts for embedding model memory (150MB Q8, 250MB FP32)
|
|
54
|
+
*/
|
|
55
|
+
export declare function calculateOptimalCacheSize(memoryInfo: MemoryInfo, options?: {
|
|
56
|
+
/** Manual override (bytes) - takes precedence */
|
|
57
|
+
manualSize?: number;
|
|
58
|
+
/** Minimum cache size (bytes) - default 256MB */
|
|
59
|
+
minSize?: number;
|
|
60
|
+
/** Maximum cache size (bytes) - default unlimited */
|
|
61
|
+
maxSize?: number;
|
|
62
|
+
/** Force development mode allocation (more conservative) */
|
|
63
|
+
developmentMode?: boolean;
|
|
64
|
+
/** Model precision for memory calculation - default 'q8' */
|
|
65
|
+
modelPrecision?: 'q8' | 'fp32';
|
|
66
|
+
}): CacheAllocationStrategy;
|
|
67
|
+
/**
|
|
68
|
+
* Get recommended cache configuration for current environment
|
|
69
|
+
*/
|
|
70
|
+
export declare function getRecommendedCacheConfig(options?: {
|
|
71
|
+
/** Manual cache size override (bytes) */
|
|
72
|
+
manualSize?: number;
|
|
73
|
+
/** Minimum cache size (bytes) */
|
|
74
|
+
minSize?: number;
|
|
75
|
+
/** Maximum cache size (bytes) */
|
|
76
|
+
maxSize?: number;
|
|
77
|
+
/** Force development mode */
|
|
78
|
+
developmentMode?: boolean;
|
|
79
|
+
}): {
|
|
80
|
+
memoryInfo: MemoryInfo;
|
|
81
|
+
allocation: CacheAllocationStrategy;
|
|
82
|
+
warnings: string[];
|
|
83
|
+
};
|
|
84
|
+
/**
|
|
85
|
+
* Detect embedding model memory usage
|
|
86
|
+
*
|
|
87
|
+
* Returns estimated runtime memory for the embedding model:
|
|
88
|
+
* - Q8 (quantized, default): ~150MB runtime (22MB on disk)
|
|
89
|
+
* - FP32 (full precision): ~250MB runtime (86MB on disk)
|
|
90
|
+
*
|
|
91
|
+
* Breakdown for Q8:
|
|
92
|
+
* - Model weights: 22MB
|
|
93
|
+
* - ONNX Runtime: 15-30MB
|
|
94
|
+
* - Session workspace: 50-100MB (peak during inference)
|
|
95
|
+
* - Total: ~100-150MB (we use 150MB conservative)
|
|
96
|
+
*/
|
|
97
|
+
export declare function detectModelMemory(options?: {
|
|
98
|
+
/** Model precision (default: 'q8') */
|
|
99
|
+
precision?: 'q8' | 'fp32';
|
|
100
|
+
}): {
|
|
101
|
+
bytes: number;
|
|
102
|
+
precision: 'q8' | 'fp32';
|
|
103
|
+
breakdown: {
|
|
104
|
+
modelWeights: number;
|
|
105
|
+
onnxRuntime: number;
|
|
106
|
+
sessionWorkspace: number;
|
|
107
|
+
};
|
|
108
|
+
};
|
|
109
|
+
/**
|
|
110
|
+
* Format bytes to human-readable string
|
|
111
|
+
*/
|
|
112
|
+
export declare function formatBytes(bytes: number): string;
|
|
113
|
+
/**
|
|
114
|
+
* Monitor memory usage and warn if approaching limits
|
|
115
|
+
*/
|
|
116
|
+
export declare function checkMemoryPressure(cacheSize: number, memoryInfo: MemoryInfo): {
|
|
117
|
+
pressure: 'none' | 'moderate' | 'high' | 'critical';
|
|
118
|
+
warnings: string[];
|
|
119
|
+
};
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory Detection Utilities
|
|
3
|
+
* Detects available system memory across different environments:
|
|
4
|
+
* - Docker/Kubernetes (cgroups v1 and v2)
|
|
5
|
+
* - Bare metal servers
|
|
6
|
+
* - Cloud instances
|
|
7
|
+
* - Development environments
|
|
8
|
+
*
|
|
9
|
+
* Scales from 2GB to 128GB+ with intelligent allocation
|
|
10
|
+
*/
|
|
11
|
+
import * as os from 'os';
|
|
12
|
+
import * as fs from 'fs';
|
|
13
|
+
import { prodLog } from './logger.js';
|
|
14
|
+
/**
|
|
15
|
+
* Detect available memory across all environments
|
|
16
|
+
*/
|
|
17
|
+
export function detectAvailableMemory() {
|
|
18
|
+
const warnings = [];
|
|
19
|
+
// Try cgroups v2 first (modern Docker/K8s)
|
|
20
|
+
const cgroupV2 = detectCgroupV2Memory();
|
|
21
|
+
if (cgroupV2 !== null) {
|
|
22
|
+
const systemTotal = os.totalmem();
|
|
23
|
+
const free = os.freemem();
|
|
24
|
+
return {
|
|
25
|
+
available: cgroupV2,
|
|
26
|
+
source: 'cgroup-v2',
|
|
27
|
+
isContainer: true,
|
|
28
|
+
systemTotal,
|
|
29
|
+
free,
|
|
30
|
+
warnings: cgroupV2 < systemTotal
|
|
31
|
+
? [`Container limited to ${formatBytes(cgroupV2)} (host has ${formatBytes(systemTotal)})`]
|
|
32
|
+
: []
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
// Try cgroups v1 (older Docker/K8s)
|
|
36
|
+
const cgroupV1 = detectCgroupV1Memory();
|
|
37
|
+
if (cgroupV1 !== null) {
|
|
38
|
+
const systemTotal = os.totalmem();
|
|
39
|
+
const free = os.freemem();
|
|
40
|
+
return {
|
|
41
|
+
available: cgroupV1,
|
|
42
|
+
source: 'cgroup-v1',
|
|
43
|
+
isContainer: true,
|
|
44
|
+
systemTotal,
|
|
45
|
+
free,
|
|
46
|
+
warnings: cgroupV1 < systemTotal
|
|
47
|
+
? [`Container limited to ${formatBytes(cgroupV1)} (host has ${formatBytes(systemTotal)})`]
|
|
48
|
+
: []
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
// Use system memory (bare metal, VM, or unlimited container)
|
|
52
|
+
const systemTotal = os.totalmem();
|
|
53
|
+
const free = os.freemem();
|
|
54
|
+
// Check if we might be in an unlimited container
|
|
55
|
+
if (process.env.KUBERNETES_SERVICE_HOST || process.env.DOCKER_CONTAINER) {
|
|
56
|
+
warnings.push('Container detected but no memory limit set - using host memory');
|
|
57
|
+
}
|
|
58
|
+
return {
|
|
59
|
+
available: systemTotal,
|
|
60
|
+
source: 'system',
|
|
61
|
+
isContainer: false,
|
|
62
|
+
systemTotal,
|
|
63
|
+
free,
|
|
64
|
+
warnings
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Detect memory limit from cgroups v2 (modern containers)
|
|
69
|
+
* Path: /sys/fs/cgroup/memory.max
|
|
70
|
+
*/
|
|
71
|
+
function detectCgroupV2Memory() {
|
|
72
|
+
try {
|
|
73
|
+
const memoryMaxPath = '/sys/fs/cgroup/memory.max';
|
|
74
|
+
if (!fs.existsSync(memoryMaxPath)) {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
const content = fs.readFileSync(memoryMaxPath, 'utf8').trim();
|
|
78
|
+
// 'max' means unlimited
|
|
79
|
+
if (content === 'max') {
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
const bytes = parseInt(content, 10);
|
|
83
|
+
// Sanity check: Must be reasonable number (between 64MB and 1TB)
|
|
84
|
+
if (bytes < 64 * 1024 * 1024 || bytes > 1024 * 1024 * 1024 * 1024) {
|
|
85
|
+
prodLog.warn(`Suspicious cgroup v2 memory limit: ${formatBytes(bytes)}`);
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
return bytes;
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
// Not in a cgroup v2 environment
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Detect memory limit from cgroups v1 (older containers)
|
|
97
|
+
* Path: /sys/fs/cgroup/memory/memory.limit_in_bytes
|
|
98
|
+
*/
|
|
99
|
+
function detectCgroupV1Memory() {
|
|
100
|
+
try {
|
|
101
|
+
const limitPath = '/sys/fs/cgroup/memory/memory.limit_in_bytes';
|
|
102
|
+
if (!fs.existsSync(limitPath)) {
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
const content = fs.readFileSync(limitPath, 'utf8').trim();
|
|
106
|
+
const bytes = parseInt(content, 10);
|
|
107
|
+
// cgroup v1 uses very large number (2^63-1) to indicate unlimited
|
|
108
|
+
// If limit is > 1TB, consider it unlimited
|
|
109
|
+
if (bytes > 1024 * 1024 * 1024 * 1024) {
|
|
110
|
+
return null;
|
|
111
|
+
}
|
|
112
|
+
// Sanity check: Must be reasonable number (between 64MB and 1TB)
|
|
113
|
+
if (bytes < 64 * 1024 * 1024) {
|
|
114
|
+
prodLog.warn(`Suspicious cgroup v1 memory limit: ${formatBytes(bytes)}`);
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
return bytes;
|
|
118
|
+
}
|
|
119
|
+
catch (error) {
|
|
120
|
+
// Not in a cgroup v1 environment
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Calculate optimal cache size based on available memory
|
|
126
|
+
* Scales intelligently from 2GB to 128GB+
|
|
127
|
+
*
|
|
128
|
+
* v3.36.0+: Accounts for embedding model memory (150MB Q8, 250MB FP32)
|
|
129
|
+
*/
|
|
130
|
+
export function calculateOptimalCacheSize(memoryInfo, options = {}) {
|
|
131
|
+
const minSize = options.minSize || 256 * 1024 * 1024; // 256MB minimum
|
|
132
|
+
const maxSize = options.maxSize || null;
|
|
133
|
+
// Detect model memory usage (v3.36.0+)
|
|
134
|
+
const modelInfo = detectModelMemory({ precision: options.modelPrecision || 'q8' });
|
|
135
|
+
const modelMemory = modelInfo.bytes;
|
|
136
|
+
// Reserve model memory from available RAM BEFORE calculating cache
|
|
137
|
+
// This ensures we don't over-allocate and cause OOM
|
|
138
|
+
const availableForCache = Math.max(0, memoryInfo.available - modelMemory);
|
|
139
|
+
// Manual override takes precedence
|
|
140
|
+
if (options.manualSize !== undefined) {
|
|
141
|
+
const clamped = Math.max(minSize, options.manualSize);
|
|
142
|
+
return {
|
|
143
|
+
cacheSize: clamped,
|
|
144
|
+
ratio: clamped / availableForCache,
|
|
145
|
+
minSize,
|
|
146
|
+
maxSize,
|
|
147
|
+
environment: 'unknown',
|
|
148
|
+
modelMemory,
|
|
149
|
+
modelPrecision: modelInfo.precision,
|
|
150
|
+
availableForCache,
|
|
151
|
+
reasoning: 'Manual override specified'
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
// Determine environment and allocation ratio
|
|
155
|
+
let ratio;
|
|
156
|
+
let environment;
|
|
157
|
+
let reasoning;
|
|
158
|
+
if (options.developmentMode || process.env.NODE_ENV === 'development') {
|
|
159
|
+
// Development: More conservative (25%)
|
|
160
|
+
ratio = 0.25;
|
|
161
|
+
environment = 'development';
|
|
162
|
+
reasoning = `Development mode - conservative allocation (25% of ${formatBytes(availableForCache)} after ${formatBytes(modelMemory)} model)`;
|
|
163
|
+
}
|
|
164
|
+
else if (memoryInfo.isContainer) {
|
|
165
|
+
// Container: Moderate allocation (40%)
|
|
166
|
+
// Containers often have tight limits, leave room for heap growth
|
|
167
|
+
ratio = 0.40;
|
|
168
|
+
environment = 'container';
|
|
169
|
+
reasoning = `Container environment - moderate allocation (40% of ${formatBytes(availableForCache)} after ${formatBytes(modelMemory)} model)`;
|
|
170
|
+
}
|
|
171
|
+
else {
|
|
172
|
+
// Production bare metal/VM: Aggressive allocation (50%)
|
|
173
|
+
// More memory available, can be more aggressive
|
|
174
|
+
ratio = 0.50;
|
|
175
|
+
environment = 'production';
|
|
176
|
+
reasoning = `Production environment - aggressive allocation (50% of ${formatBytes(availableForCache)} after ${formatBytes(modelMemory)} model)`;
|
|
177
|
+
}
|
|
178
|
+
// Calculate base cache size from AVAILABLE memory (after model reservation)
|
|
179
|
+
let cacheSize = Math.floor(availableForCache * ratio);
|
|
180
|
+
// Apply minimum constraint
|
|
181
|
+
if (cacheSize < minSize) {
|
|
182
|
+
const originalSize = cacheSize;
|
|
183
|
+
cacheSize = minSize;
|
|
184
|
+
reasoning += ` (increased from ${formatBytes(originalSize)} to meet minimum)`;
|
|
185
|
+
// Warn if available memory is very low
|
|
186
|
+
if (availableForCache < minSize * 2) {
|
|
187
|
+
prodLog.warn(`⚠️ Low available memory for cache (${formatBytes(availableForCache)} after ${formatBytes(modelMemory)} model). ` +
|
|
188
|
+
`Cache size ${formatBytes(cacheSize)} may cause memory pressure.`);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
// Apply maximum constraint
|
|
192
|
+
if (maxSize !== null && cacheSize > maxSize) {
|
|
193
|
+
const originalSize = cacheSize;
|
|
194
|
+
cacheSize = maxSize;
|
|
195
|
+
reasoning += ` (capped from ${formatBytes(originalSize)} to maximum)`;
|
|
196
|
+
}
|
|
197
|
+
// Intelligent scaling for large memory systems
|
|
198
|
+
// For systems with >64GB available for cache, use logarithmic scaling to avoid over-allocation
|
|
199
|
+
if (availableForCache > 64 * 1024 * 1024 * 1024) {
|
|
200
|
+
// Above 64GB, scale more conservatively
|
|
201
|
+
// Formula: base + log2(availableForCache/64GB) * 8GB
|
|
202
|
+
const base = 32 * 1024 * 1024 * 1024; // 32GB base
|
|
203
|
+
const scaleFactor = Math.log2(availableForCache / (64 * 1024 * 1024 * 1024));
|
|
204
|
+
const scaled = base + scaleFactor * 8 * 1024 * 1024 * 1024; // +8GB per doubling
|
|
205
|
+
if (scaled < cacheSize) {
|
|
206
|
+
const originalSize = cacheSize;
|
|
207
|
+
cacheSize = Math.floor(scaled);
|
|
208
|
+
reasoning += ` (scaled down from ${formatBytes(originalSize)} for large memory system)`;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return {
|
|
212
|
+
cacheSize,
|
|
213
|
+
ratio,
|
|
214
|
+
minSize,
|
|
215
|
+
maxSize,
|
|
216
|
+
environment,
|
|
217
|
+
modelMemory,
|
|
218
|
+
modelPrecision: modelInfo.precision,
|
|
219
|
+
availableForCache,
|
|
220
|
+
reasoning
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Get recommended cache configuration for current environment
|
|
225
|
+
*/
|
|
226
|
+
export function getRecommendedCacheConfig(options = {}) {
|
|
227
|
+
const memoryInfo = detectAvailableMemory();
|
|
228
|
+
const allocation = calculateOptimalCacheSize(memoryInfo, options);
|
|
229
|
+
const warnings = [...memoryInfo.warnings];
|
|
230
|
+
// Add allocation warnings
|
|
231
|
+
if (allocation.cacheSize === allocation.minSize) {
|
|
232
|
+
warnings.push(`Cache size at minimum (${formatBytes(allocation.minSize)}). ` +
|
|
233
|
+
`Consider increasing available memory for better performance.`);
|
|
234
|
+
}
|
|
235
|
+
if (allocation.ratio > 0.6) {
|
|
236
|
+
warnings.push(`Cache using ${(allocation.ratio * 100).toFixed(0)}% of available memory. ` +
|
|
237
|
+
`Monitor for memory pressure.`);
|
|
238
|
+
}
|
|
239
|
+
return {
|
|
240
|
+
memoryInfo,
|
|
241
|
+
allocation,
|
|
242
|
+
warnings
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Detect embedding model memory usage
|
|
247
|
+
*
|
|
248
|
+
* Returns estimated runtime memory for the embedding model:
|
|
249
|
+
* - Q8 (quantized, default): ~150MB runtime (22MB on disk)
|
|
250
|
+
* - FP32 (full precision): ~250MB runtime (86MB on disk)
|
|
251
|
+
*
|
|
252
|
+
* Breakdown for Q8:
|
|
253
|
+
* - Model weights: 22MB
|
|
254
|
+
* - ONNX Runtime: 15-30MB
|
|
255
|
+
* - Session workspace: 50-100MB (peak during inference)
|
|
256
|
+
* - Total: ~100-150MB (we use 150MB conservative)
|
|
257
|
+
*/
|
|
258
|
+
export function detectModelMemory(options = {}) {
|
|
259
|
+
const precision = options.precision || 'q8';
|
|
260
|
+
if (precision === 'q8') {
|
|
261
|
+
// Q8 quantized model (default)
|
|
262
|
+
return {
|
|
263
|
+
bytes: 150 * 1024 * 1024, // 150MB
|
|
264
|
+
precision: 'q8',
|
|
265
|
+
breakdown: {
|
|
266
|
+
modelWeights: 22 * 1024 * 1024, // 22MB
|
|
267
|
+
onnxRuntime: 30 * 1024 * 1024, // 30MB (conservative)
|
|
268
|
+
sessionWorkspace: 98 * 1024 * 1024 // 98MB (peak during inference)
|
|
269
|
+
}
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
else {
|
|
273
|
+
// FP32 full precision model
|
|
274
|
+
return {
|
|
275
|
+
bytes: 250 * 1024 * 1024, // 250MB
|
|
276
|
+
precision: 'fp32',
|
|
277
|
+
breakdown: {
|
|
278
|
+
modelWeights: 86 * 1024 * 1024, // 86MB
|
|
279
|
+
onnxRuntime: 30 * 1024 * 1024, // 30MB
|
|
280
|
+
sessionWorkspace: 134 * 1024 * 1024 // 134MB (peak during inference)
|
|
281
|
+
}
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
/**
|
|
286
|
+
* Format bytes to human-readable string
|
|
287
|
+
*/
|
|
288
|
+
export function formatBytes(bytes) {
|
|
289
|
+
if (bytes === 0)
|
|
290
|
+
return '0 B';
|
|
291
|
+
const k = 1024;
|
|
292
|
+
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
|
|
293
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
294
|
+
return `${(bytes / Math.pow(k, i)).toFixed(2)} ${sizes[i]}`;
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* Monitor memory usage and warn if approaching limits
|
|
298
|
+
*/
|
|
299
|
+
export function checkMemoryPressure(cacheSize, memoryInfo) {
|
|
300
|
+
const warnings = [];
|
|
301
|
+
const heapUsed = process.memoryUsage().heapUsed;
|
|
302
|
+
const totalUsed = heapUsed + cacheSize;
|
|
303
|
+
const utilization = totalUsed / memoryInfo.available;
|
|
304
|
+
if (utilization > 0.95) {
|
|
305
|
+
warnings.push(`🔴 CRITICAL: Memory utilization at ${(utilization * 100).toFixed(1)}%. ` +
|
|
306
|
+
`Reduce cache size or increase available memory.`);
|
|
307
|
+
return { pressure: 'critical', warnings };
|
|
308
|
+
}
|
|
309
|
+
if (utilization > 0.85) {
|
|
310
|
+
warnings.push(`🟠 HIGH: Memory utilization at ${(utilization * 100).toFixed(1)}%. ` +
|
|
311
|
+
`Consider increasing available memory.`);
|
|
312
|
+
return { pressure: 'high', warnings };
|
|
313
|
+
}
|
|
314
|
+
if (utilization > 0.70) {
|
|
315
|
+
warnings.push(`🟡 MODERATE: Memory utilization at ${(utilization * 100).toFixed(1)}%. ` +
|
|
316
|
+
`Monitor for memory pressure.`);
|
|
317
|
+
return { pressure: 'moderate', warnings };
|
|
318
|
+
}
|
|
319
|
+
return { pressure: 'none', warnings: [] };
|
|
320
|
+
}
|
|
321
|
+
//# sourceMappingURL=memoryDetection.js.map
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* UnifiedCache - Single cache for both HNSW and MetadataIndex
|
|
3
3
|
* Prevents resource competition with cost-aware eviction
|
|
4
|
+
*
|
|
5
|
+
* Features (v3.36.0+):
|
|
6
|
+
* - Adaptive sizing: Automatically scales from 2GB to 128GB+ based on available memory
|
|
7
|
+
* - Container-aware: Detects Docker/K8s limits (cgroups v1/v2)
|
|
8
|
+
* - Environment detection: Production vs development allocation strategies
|
|
9
|
+
* - Memory pressure monitoring: Warns when approaching limits
|
|
4
10
|
*/
|
|
5
11
|
export interface CacheItem {
|
|
6
12
|
key: string;
|
|
@@ -12,11 +18,24 @@ export interface CacheItem {
|
|
|
12
18
|
accessCount: number;
|
|
13
19
|
}
|
|
14
20
|
export interface UnifiedCacheConfig {
|
|
21
|
+
/** Maximum cache size in bytes (auto-detected if not specified) */
|
|
15
22
|
maxSize?: number;
|
|
23
|
+
/** Minimum cache size in bytes (default 256MB) */
|
|
24
|
+
minSize?: number;
|
|
25
|
+
/** Force development mode allocation (25% instead of 40-50%) */
|
|
26
|
+
developmentMode?: boolean;
|
|
27
|
+
/** Enable request coalescing to prevent duplicate loads */
|
|
16
28
|
enableRequestCoalescing?: boolean;
|
|
29
|
+
/** Enable fairness monitoring to prevent cache starvation */
|
|
17
30
|
enableFairnessCheck?: boolean;
|
|
31
|
+
/** Fairness check interval in milliseconds */
|
|
18
32
|
fairnessCheckInterval?: number;
|
|
33
|
+
/** Enable access pattern persistence for warm starts */
|
|
19
34
|
persistPatterns?: boolean;
|
|
35
|
+
/** Enable memory pressure monitoring (default true) */
|
|
36
|
+
enableMemoryMonitoring?: boolean;
|
|
37
|
+
/** Memory pressure check interval in milliseconds (default 30s) */
|
|
38
|
+
memoryCheckInterval?: number;
|
|
20
39
|
}
|
|
21
40
|
export declare class UnifiedCache {
|
|
22
41
|
private cache;
|
|
@@ -27,11 +46,21 @@ export declare class UnifiedCache {
|
|
|
27
46
|
private currentSize;
|
|
28
47
|
private readonly maxSize;
|
|
29
48
|
private readonly config;
|
|
49
|
+
private readonly memoryInfo;
|
|
50
|
+
private readonly allocationStrategy;
|
|
51
|
+
private memoryPressureCheckTimer;
|
|
52
|
+
private lastMemoryWarning;
|
|
30
53
|
constructor(config?: UnifiedCacheConfig);
|
|
31
54
|
/**
|
|
32
55
|
* Get item from cache with request coalescing
|
|
33
56
|
*/
|
|
34
57
|
get(key: string, loadFn?: () => Promise<any>): Promise<any>;
|
|
58
|
+
/**
|
|
59
|
+
* Synchronous cache lookup (v3.36.0+)
|
|
60
|
+
* Returns cached data immediately or undefined if not cached
|
|
61
|
+
* Use for sync fast path optimization - zero async overhead
|
|
62
|
+
*/
|
|
63
|
+
getSync(key: string): any | undefined;
|
|
35
64
|
/**
|
|
36
65
|
* Set item in cache with cost-aware eviction
|
|
37
66
|
*/
|
|
@@ -62,7 +91,16 @@ export declare class UnifiedCache {
|
|
|
62
91
|
*/
|
|
63
92
|
clear(type?: 'hnsw' | 'metadata' | 'embedding' | 'other'): void;
|
|
64
93
|
/**
|
|
65
|
-
*
|
|
94
|
+
* Start memory pressure monitoring
|
|
95
|
+
* Periodically checks if we're approaching memory limits
|
|
96
|
+
*/
|
|
97
|
+
private startMemoryPressureMonitor;
|
|
98
|
+
/**
|
|
99
|
+
* Check current memory pressure and warn if needed
|
|
100
|
+
*/
|
|
101
|
+
private checkMemoryPressure;
|
|
102
|
+
/**
|
|
103
|
+
* Get cache statistics with memory information
|
|
66
104
|
*/
|
|
67
105
|
getStats(): {
|
|
68
106
|
totalSize: number;
|
|
@@ -89,6 +127,42 @@ export declare class UnifiedCache {
|
|
|
89
127
|
};
|
|
90
128
|
totalAccessCount: number;
|
|
91
129
|
hitRate: number;
|
|
130
|
+
memory: {
|
|
131
|
+
available: number;
|
|
132
|
+
source: "cgroup-v2" | "cgroup-v1" | "system" | "fallback";
|
|
133
|
+
isContainer: boolean;
|
|
134
|
+
systemTotal: number;
|
|
135
|
+
allocationRatio: number;
|
|
136
|
+
environment: "production" | "development" | "container" | "unknown";
|
|
137
|
+
};
|
|
138
|
+
};
|
|
139
|
+
/**
|
|
140
|
+
* Get detailed memory information
|
|
141
|
+
*/
|
|
142
|
+
getMemoryInfo(): {
|
|
143
|
+
memoryInfo: {
|
|
144
|
+
available: number;
|
|
145
|
+
source: "cgroup-v2" | "cgroup-v1" | "system" | "fallback";
|
|
146
|
+
isContainer: boolean;
|
|
147
|
+
systemTotal: number;
|
|
148
|
+
free: number;
|
|
149
|
+
warnings: string[];
|
|
150
|
+
};
|
|
151
|
+
allocationStrategy: {
|
|
152
|
+
cacheSize: number;
|
|
153
|
+
ratio: number;
|
|
154
|
+
minSize: number;
|
|
155
|
+
maxSize: number | null;
|
|
156
|
+
environment: "production" | "development" | "container" | "unknown";
|
|
157
|
+
modelMemory: number;
|
|
158
|
+
modelPrecision: "q8" | "fp32";
|
|
159
|
+
availableForCache: number;
|
|
160
|
+
reasoning: string;
|
|
161
|
+
};
|
|
162
|
+
currentPressure: {
|
|
163
|
+
pressure: "none" | "moderate" | "high" | "critical";
|
|
164
|
+
warnings: string[];
|
|
165
|
+
};
|
|
92
166
|
};
|
|
93
167
|
/**
|
|
94
168
|
* Save access patterns for cold start optimization
|