@soulcraft/brainy 6.2.6 → 6.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brainy.d.ts +7 -0
- package/dist/brainy.js +28 -3
- package/dist/hnsw/hnswIndex.d.ts +27 -0
- package/dist/hnsw/hnswIndex.js +102 -12
- package/dist/hnsw/typeAwareHNSWIndex.d.ts +21 -1
- package/dist/hnsw/typeAwareHNSWIndex.js +43 -3
- package/dist/storage/adapters/azureBlobStorage.d.ts +2 -8
- package/dist/storage/adapters/azureBlobStorage.js +13 -51
- package/dist/storage/adapters/gcsStorage.d.ts +2 -8
- package/dist/storage/adapters/gcsStorage.js +13 -53
- package/dist/storage/adapters/r2Storage.d.ts +5 -8
- package/dist/storage/adapters/r2Storage.js +15 -43
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +1 -8
- package/dist/storage/adapters/s3CompatibleStorage.js +7 -89
- package/dist/types/brainy.types.d.ts +1 -0
- package/package.json +1 -1
package/dist/brainy.d.ts
CHANGED
|
@@ -1795,6 +1795,10 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
1795
1795
|
* - 87% memory reduction through separate graphs per entity type
|
|
1796
1796
|
* - 10x faster type-specific queries
|
|
1797
1797
|
* - Automatic type routing
|
|
1798
|
+
*
|
|
1799
|
+
* v6.2.8: Smart defaults for HNSW persistence mode
|
|
1800
|
+
* - Cloud storage (GCS/S3/R2/Azure): 'deferred' for 30-50× faster adds
|
|
1801
|
+
* - Local storage (FileSystem/Memory/OPFS): 'immediate' (already fast)
|
|
1798
1802
|
*/
|
|
1799
1803
|
private setupIndex;
|
|
1800
1804
|
/**
|
|
@@ -1846,6 +1850,9 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
1846
1850
|
private rebuildIndexesIfNeeded;
|
|
1847
1851
|
/**
|
|
1848
1852
|
* Close and cleanup
|
|
1853
|
+
*
|
|
1854
|
+
* v6.2.8: Now flushes HNSW dirty nodes before closing
|
|
1855
|
+
* This ensures deferred persistence mode data is saved
|
|
1849
1856
|
*/
|
|
1850
1857
|
close(): Promise<void>;
|
|
1851
1858
|
/**
|
package/dist/brainy.js
CHANGED
|
@@ -3974,20 +3974,35 @@ export class Brainy {
|
|
|
3974
3974
|
* - 87% memory reduction through separate graphs per entity type
|
|
3975
3975
|
* - 10x faster type-specific queries
|
|
3976
3976
|
* - Automatic type routing
|
|
3977
|
+
*
|
|
3978
|
+
* v6.2.8: Smart defaults for HNSW persistence mode
|
|
3979
|
+
* - Cloud storage (GCS/S3/R2/Azure): 'deferred' for 30-50× faster adds
|
|
3980
|
+
* - Local storage (FileSystem/Memory/OPFS): 'immediate' (already fast)
|
|
3977
3981
|
*/
|
|
3978
3982
|
setupIndex() {
|
|
3979
3983
|
const indexConfig = {
|
|
3980
3984
|
...this.config.index,
|
|
3981
3985
|
distanceFunction: this.distance
|
|
3982
3986
|
};
|
|
3987
|
+
// v6.2.8: Determine persist mode (user config > smart default)
|
|
3988
|
+
let persistMode = this.config.hnswPersistMode || 'immediate';
|
|
3989
|
+
// Smart default: Use deferred mode for cloud storage adapters
|
|
3990
|
+
if (!this.config.hnswPersistMode) {
|
|
3991
|
+
const storageType = this.config.storage?.type || 'auto';
|
|
3992
|
+
const cloudStorageTypes = ['gcs', 's3', 'r2', 'azure'];
|
|
3993
|
+
if (cloudStorageTypes.includes(storageType)) {
|
|
3994
|
+
persistMode = 'deferred';
|
|
3995
|
+
}
|
|
3996
|
+
}
|
|
3983
3997
|
// Phase 2: Use TypeAwareHNSWIndex for billion-scale optimization
|
|
3984
3998
|
if (this.config.storage?.type !== 'memory') {
|
|
3985
3999
|
return new TypeAwareHNSWIndex(indexConfig, this.distance, {
|
|
3986
4000
|
storage: this.storage,
|
|
3987
|
-
useParallelization: true
|
|
4001
|
+
useParallelization: true,
|
|
4002
|
+
persistMode
|
|
3988
4003
|
});
|
|
3989
4004
|
}
|
|
3990
|
-
return new HNSWIndex(indexConfig);
|
|
4005
|
+
return new HNSWIndex(indexConfig, this.distance, { persistMode });
|
|
3991
4006
|
}
|
|
3992
4007
|
/**
|
|
3993
4008
|
* Setup augmentations
|
|
@@ -4071,7 +4086,9 @@ export class Brainy {
|
|
|
4071
4086
|
maxConcurrentOperations: config?.maxConcurrentOperations ?? 10,
|
|
4072
4087
|
// Memory management options (v5.11.0)
|
|
4073
4088
|
maxQueryLimit: config?.maxQueryLimit ?? undefined,
|
|
4074
|
-
reservedQueryMemory: config?.reservedQueryMemory ?? undefined
|
|
4089
|
+
reservedQueryMemory: config?.reservedQueryMemory ?? undefined,
|
|
4090
|
+
// HNSW persistence mode (v6.2.8) - undefined = smart default in setupIndex
|
|
4091
|
+
hnswPersistMode: config?.hnswPersistMode ?? undefined
|
|
4075
4092
|
};
|
|
4076
4093
|
}
|
|
4077
4094
|
/**
|
|
@@ -4241,8 +4258,16 @@ export class Brainy {
|
|
|
4241
4258
|
}
|
|
4242
4259
|
/**
|
|
4243
4260
|
* Close and cleanup
|
|
4261
|
+
*
|
|
4262
|
+
* v6.2.8: Now flushes HNSW dirty nodes before closing
|
|
4263
|
+
* This ensures deferred persistence mode data is saved
|
|
4244
4264
|
*/
|
|
4245
4265
|
async close() {
|
|
4266
|
+
// v6.2.8: Flush HNSW dirty nodes before closing
|
|
4267
|
+
// In deferred persistence mode, this persists all pending HNSW graph data
|
|
4268
|
+
if (this.index && typeof this.index.flush === 'function') {
|
|
4269
|
+
await this.index.flush();
|
|
4270
|
+
}
|
|
4246
4271
|
// Shutdown augmentations
|
|
4247
4272
|
const augs = this.augmentationRegistry.getAll();
|
|
4248
4273
|
for (const aug of augs) {
|
package/dist/hnsw/hnswIndex.d.ts
CHANGED
|
@@ -19,9 +19,13 @@ export declare class HNSWIndex {
|
|
|
19
19
|
private cowEnabled;
|
|
20
20
|
private cowModifiedNodes;
|
|
21
21
|
private cowParent;
|
|
22
|
+
private persistMode;
|
|
23
|
+
private dirtyNodes;
|
|
24
|
+
private dirtySystem;
|
|
22
25
|
constructor(config?: Partial<HNSWConfig>, distanceFunction?: DistanceFunction, options?: {
|
|
23
26
|
useParallelization?: boolean;
|
|
24
27
|
storage?: BaseStorage;
|
|
28
|
+
persistMode?: 'immediate' | 'deferred';
|
|
25
29
|
});
|
|
26
30
|
/**
|
|
27
31
|
* Set whether to use parallelization for performance-critical operations
|
|
@@ -31,6 +35,29 @@ export declare class HNSWIndex {
|
|
|
31
35
|
* Get whether parallelization is enabled
|
|
32
36
|
*/
|
|
33
37
|
getUseParallelization(): boolean;
|
|
38
|
+
/**
|
|
39
|
+
* v6.2.8: Flush dirty HNSW data to storage
|
|
40
|
+
*
|
|
41
|
+
* In deferred persistence mode, HNSW connections are tracked as dirty but not
|
|
42
|
+
* immediately persisted. Call flush() to persist all pending changes.
|
|
43
|
+
*
|
|
44
|
+
* This is automatically called by:
|
|
45
|
+
* - brain.close()
|
|
46
|
+
* - brain.flush()
|
|
47
|
+
* - Process shutdown (SIGTERM/SIGINT)
|
|
48
|
+
*
|
|
49
|
+
* @returns Number of nodes flushed
|
|
50
|
+
*/
|
|
51
|
+
flush(): Promise<number>;
|
|
52
|
+
/**
|
|
53
|
+
* Get the number of dirty (unpersisted) nodes
|
|
54
|
+
* Useful for monitoring and debugging
|
|
55
|
+
*/
|
|
56
|
+
getDirtyNodeCount(): number;
|
|
57
|
+
/**
|
|
58
|
+
* Get the current persist mode
|
|
59
|
+
*/
|
|
60
|
+
getPersistMode(): 'immediate' | 'deferred';
|
|
34
61
|
/**
|
|
35
62
|
* Enable COW (Copy-on-Write) mode - Instant fork via shallow copy
|
|
36
63
|
*
|
package/dist/hnsw/hnswIndex.js
CHANGED
|
@@ -28,6 +28,12 @@ export class HNSWIndex {
|
|
|
28
28
|
this.cowEnabled = false;
|
|
29
29
|
this.cowModifiedNodes = new Set();
|
|
30
30
|
this.cowParent = null;
|
|
31
|
+
// v6.2.8: Deferred HNSW persistence for cloud storage performance
|
|
32
|
+
// In deferred mode, HNSW connections are only persisted on flush/close
|
|
33
|
+
// This reduces GCS operations from 70 to 2-3 per add() (30-50× faster)
|
|
34
|
+
this.persistMode = 'immediate';
|
|
35
|
+
this.dirtyNodes = new Set(); // Nodes with unpersisted HNSW data
|
|
36
|
+
this.dirtySystem = false; // Whether system data (entryPoint, maxLevel) needs persist
|
|
31
37
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
32
38
|
this.distanceFunction = distanceFunction;
|
|
33
39
|
this.useParallelization =
|
|
@@ -35,6 +41,7 @@ export class HNSWIndex {
|
|
|
35
41
|
? options.useParallelization
|
|
36
42
|
: true;
|
|
37
43
|
this.storage = options.storage || null;
|
|
44
|
+
this.persistMode = options.persistMode || 'immediate';
|
|
38
45
|
// Use SAME UnifiedCache as Graph and Metadata for fair memory competition
|
|
39
46
|
this.unifiedCache = getGlobalCache();
|
|
40
47
|
}
|
|
@@ -50,6 +57,82 @@ export class HNSWIndex {
|
|
|
50
57
|
getUseParallelization() {
|
|
51
58
|
return this.useParallelization;
|
|
52
59
|
}
|
|
60
|
+
/**
|
|
61
|
+
* v6.2.8: Flush dirty HNSW data to storage
|
|
62
|
+
*
|
|
63
|
+
* In deferred persistence mode, HNSW connections are tracked as dirty but not
|
|
64
|
+
* immediately persisted. Call flush() to persist all pending changes.
|
|
65
|
+
*
|
|
66
|
+
* This is automatically called by:
|
|
67
|
+
* - brain.close()
|
|
68
|
+
* - brain.flush()
|
|
69
|
+
* - Process shutdown (SIGTERM/SIGINT)
|
|
70
|
+
*
|
|
71
|
+
* @returns Number of nodes flushed
|
|
72
|
+
*/
|
|
73
|
+
async flush() {
|
|
74
|
+
if (!this.storage) {
|
|
75
|
+
return 0;
|
|
76
|
+
}
|
|
77
|
+
if (this.dirtyNodes.size === 0 && !this.dirtySystem) {
|
|
78
|
+
return 0;
|
|
79
|
+
}
|
|
80
|
+
const startTime = Date.now();
|
|
81
|
+
const nodeCount = this.dirtyNodes.size;
|
|
82
|
+
// Batch persist all dirty nodes concurrently
|
|
83
|
+
if (this.dirtyNodes.size > 0) {
|
|
84
|
+
const batchSize = 50; // Reasonable batch size for cloud storage
|
|
85
|
+
const nodeIds = Array.from(this.dirtyNodes);
|
|
86
|
+
for (let i = 0; i < nodeIds.length; i += batchSize) {
|
|
87
|
+
const batch = nodeIds.slice(i, i + batchSize);
|
|
88
|
+
const promises = batch.map(nodeId => {
|
|
89
|
+
const noun = this.nouns.get(nodeId);
|
|
90
|
+
if (!noun)
|
|
91
|
+
return Promise.resolve(); // Node was deleted
|
|
92
|
+
const connectionsObj = {};
|
|
93
|
+
for (const [level, nounIds] of noun.connections.entries()) {
|
|
94
|
+
connectionsObj[level.toString()] = Array.from(nounIds);
|
|
95
|
+
}
|
|
96
|
+
return this.storage.saveHNSWData(nodeId, {
|
|
97
|
+
level: noun.level,
|
|
98
|
+
connections: connectionsObj
|
|
99
|
+
}).catch(error => {
|
|
100
|
+
console.error(`[HNSW flush] Failed to persist node ${nodeId}:`, error);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
await Promise.allSettled(promises);
|
|
104
|
+
}
|
|
105
|
+
this.dirtyNodes.clear();
|
|
106
|
+
}
|
|
107
|
+
// Persist system data if dirty
|
|
108
|
+
if (this.dirtySystem) {
|
|
109
|
+
await this.storage.saveHNSWSystem({
|
|
110
|
+
entryPointId: this.entryPointId,
|
|
111
|
+
maxLevel: this.maxLevel
|
|
112
|
+
}).catch(error => {
|
|
113
|
+
console.error('[HNSW flush] Failed to persist system data:', error);
|
|
114
|
+
});
|
|
115
|
+
this.dirtySystem = false;
|
|
116
|
+
}
|
|
117
|
+
const duration = Date.now() - startTime;
|
|
118
|
+
if (nodeCount > 0) {
|
|
119
|
+
prodLog.info(`[HNSW] Flushed ${nodeCount} dirty nodes in ${duration}ms`);
|
|
120
|
+
}
|
|
121
|
+
return nodeCount;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Get the number of dirty (unpersisted) nodes
|
|
125
|
+
* Useful for monitoring and debugging
|
|
126
|
+
*/
|
|
127
|
+
getDirtyNodeCount() {
|
|
128
|
+
return this.dirtyNodes.size;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Get the current persist mode
|
|
132
|
+
*/
|
|
133
|
+
getPersistMode() {
|
|
134
|
+
return this.persistMode;
|
|
135
|
+
}
|
|
53
136
|
/**
|
|
54
137
|
* Enable COW (Copy-on-Write) mode - Instant fork via shallow copy
|
|
55
138
|
*
|
|
@@ -284,14 +367,11 @@ export class HNSWIndex {
|
|
|
284
367
|
}
|
|
285
368
|
// Persist updated neighbor HNSW data (v3.35.0+)
|
|
286
369
|
//
|
|
287
|
-
//
|
|
288
|
-
//
|
|
289
|
-
//
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
// - Mutex serialization (Memory/OPFS/FileSystem)
|
|
293
|
-
// Trade-off: More retry activity under high contention (expected and handled)
|
|
294
|
-
if (this.storage) {
|
|
370
|
+
// v6.2.8: Deferred persistence mode for cloud storage performance
|
|
371
|
+
// In deferred mode, we track dirty nodes instead of persisting immediately
|
|
372
|
+
// This reduces GCS operations from 70 to 2-3 per add() (30-50× faster)
|
|
373
|
+
if (this.storage && this.persistMode === 'immediate') {
|
|
374
|
+
// IMMEDIATE MODE: Original behavior - persist each neighbor update
|
|
295
375
|
const neighborConnectionsObj = {};
|
|
296
376
|
for (const [lvl, nounIds] of neighbor.connections.entries()) {
|
|
297
377
|
neighborConnectionsObj[lvl.toString()] = Array.from(nounIds);
|
|
@@ -304,9 +384,13 @@ export class HNSWIndex {
|
|
|
304
384
|
})
|
|
305
385
|
});
|
|
306
386
|
}
|
|
387
|
+
else if (this.persistMode === 'deferred') {
|
|
388
|
+
// DEFERRED MODE: Track dirty nodes for later batch persistence
|
|
389
|
+
this.dirtyNodes.add(neighborId);
|
|
390
|
+
}
|
|
307
391
|
}
|
|
308
|
-
// Execute all neighbor updates concurrently (
|
|
309
|
-
if (neighborUpdates.length > 0) {
|
|
392
|
+
// Execute all neighbor updates concurrently (only in immediate mode)
|
|
393
|
+
if (neighborUpdates.length > 0 && this.persistMode === 'immediate') {
|
|
310
394
|
const batchSize = this.config.maxConcurrentNeighborWrites || neighborUpdates.length;
|
|
311
395
|
const allFailures = [];
|
|
312
396
|
// Process in chunks if batch size specified
|
|
@@ -360,8 +444,9 @@ export class HNSWIndex {
|
|
|
360
444
|
this.highLevelNodes.get(nounLevel).add(id);
|
|
361
445
|
}
|
|
362
446
|
// Persist HNSW graph data to storage (v3.35.0+)
|
|
363
|
-
|
|
364
|
-
|
|
447
|
+
// v6.2.8: Respect persistMode setting
|
|
448
|
+
if (this.storage && this.persistMode === 'immediate') {
|
|
449
|
+
// IMMEDIATE MODE: Original behavior - persist new entity and system data
|
|
365
450
|
const connectionsObj = {};
|
|
366
451
|
for (const [level, nounIds] of noun.connections.entries()) {
|
|
367
452
|
connectionsObj[level.toString()] = Array.from(nounIds);
|
|
@@ -380,6 +465,11 @@ export class HNSWIndex {
|
|
|
380
465
|
console.error('Failed to persist HNSW system data:', error);
|
|
381
466
|
});
|
|
382
467
|
}
|
|
468
|
+
else if (this.persistMode === 'deferred') {
|
|
469
|
+
// DEFERRED MODE: Track dirty nodes for later batch persistence
|
|
470
|
+
this.dirtyNodes.add(id);
|
|
471
|
+
this.dirtySystem = true;
|
|
472
|
+
}
|
|
383
473
|
return id;
|
|
384
474
|
}
|
|
385
475
|
/**
|
|
@@ -43,16 +43,18 @@ export declare class TypeAwareHNSWIndex {
|
|
|
43
43
|
private distanceFunction;
|
|
44
44
|
private storage;
|
|
45
45
|
private useParallelization;
|
|
46
|
+
private persistMode;
|
|
46
47
|
/**
|
|
47
48
|
* Create a new TypeAwareHNSWIndex
|
|
48
49
|
*
|
|
49
50
|
* @param config HNSW configuration (M, efConstruction, efSearch, ml)
|
|
50
51
|
* @param distanceFunction Distance function (default: euclidean)
|
|
51
|
-
* @param options Additional options (storage, parallelization)
|
|
52
|
+
* @param options Additional options (storage, parallelization, persistMode)
|
|
52
53
|
*/
|
|
53
54
|
constructor(config?: Partial<HNSWConfig>, distanceFunction?: DistanceFunction, options?: {
|
|
54
55
|
useParallelization?: boolean;
|
|
55
56
|
storage?: BaseStorage;
|
|
57
|
+
persistMode?: 'immediate' | 'deferred';
|
|
56
58
|
});
|
|
57
59
|
/**
|
|
58
60
|
* Enable COW (Copy-on-Write) mode - Instant fork via shallow copy
|
|
@@ -63,6 +65,24 @@ export declare class TypeAwareHNSWIndex {
|
|
|
63
65
|
* @param parent - Parent TypeAwareHNSWIndex to copy from
|
|
64
66
|
*/
|
|
65
67
|
enableCOW(parent: TypeAwareHNSWIndex): void;
|
|
68
|
+
/**
|
|
69
|
+
* v6.2.8: Flush dirty HNSW data to storage for all type-specific indexes
|
|
70
|
+
*
|
|
71
|
+
* In deferred persistence mode, HNSW connections are tracked as dirty but not
|
|
72
|
+
* immediately persisted. Call flush() to persist all pending changes across
|
|
73
|
+
* all type-specific indexes.
|
|
74
|
+
*
|
|
75
|
+
* @returns Total number of nodes flushed across all indexes
|
|
76
|
+
*/
|
|
77
|
+
flush(): Promise<number>;
|
|
78
|
+
/**
|
|
79
|
+
* Get the total number of dirty (unpersisted) nodes across all type-specific indexes
|
|
80
|
+
*/
|
|
81
|
+
getDirtyNodeCount(): number;
|
|
82
|
+
/**
|
|
83
|
+
* Get the current persist mode
|
|
84
|
+
*/
|
|
85
|
+
getPersistMode(): 'immediate' | 'deferred';
|
|
66
86
|
/**
|
|
67
87
|
* Get or create HNSW index for a specific type (lazy initialization)
|
|
68
88
|
*
|
|
@@ -35,7 +35,7 @@ export class TypeAwareHNSWIndex {
|
|
|
35
35
|
*
|
|
36
36
|
* @param config HNSW configuration (M, efConstruction, efSearch, ml)
|
|
37
37
|
* @param distanceFunction Distance function (default: euclidean)
|
|
38
|
-
* @param options Additional options (storage, parallelization)
|
|
38
|
+
* @param options Additional options (storage, parallelization, persistMode)
|
|
39
39
|
*/
|
|
40
40
|
constructor(config = {}, distanceFunction = euclideanDistance, options = {}) {
|
|
41
41
|
// One HNSW index per noun type (lazy initialization)
|
|
@@ -47,6 +47,7 @@ export class TypeAwareHNSWIndex {
|
|
|
47
47
|
options.useParallelization !== undefined
|
|
48
48
|
? options.useParallelization
|
|
49
49
|
: true;
|
|
50
|
+
this.persistMode = options.persistMode || 'immediate';
|
|
50
51
|
prodLog.info('TypeAwareHNSWIndex initialized (Phase 2: Type-Aware HNSW)');
|
|
51
52
|
}
|
|
52
53
|
/**
|
|
@@ -64,13 +65,51 @@ export class TypeAwareHNSWIndex {
|
|
|
64
65
|
for (const [type, parentIndex] of parent.indexes.entries()) {
|
|
65
66
|
const childIndex = new HNSWIndex(this.config, this.distanceFunction, {
|
|
66
67
|
useParallelization: this.useParallelization,
|
|
67
|
-
storage: this.storage || undefined
|
|
68
|
+
storage: this.storage || undefined,
|
|
69
|
+
persistMode: this.persistMode
|
|
68
70
|
});
|
|
69
71
|
childIndex.enableCOW(parentIndex);
|
|
70
72
|
this.indexes.set(type, childIndex);
|
|
71
73
|
}
|
|
72
74
|
prodLog.info(`TypeAwareHNSWIndex COW enabled: ${parent.indexes.size} type-specific indexes shallow copied`);
|
|
73
75
|
}
|
|
76
|
+
/**
|
|
77
|
+
* v6.2.8: Flush dirty HNSW data to storage for all type-specific indexes
|
|
78
|
+
*
|
|
79
|
+
* In deferred persistence mode, HNSW connections are tracked as dirty but not
|
|
80
|
+
* immediately persisted. Call flush() to persist all pending changes across
|
|
81
|
+
* all type-specific indexes.
|
|
82
|
+
*
|
|
83
|
+
* @returns Total number of nodes flushed across all indexes
|
|
84
|
+
*/
|
|
85
|
+
async flush() {
|
|
86
|
+
if (this.indexes.size === 0) {
|
|
87
|
+
return 0;
|
|
88
|
+
}
|
|
89
|
+
const flushPromises = Array.from(this.indexes.values()).map(index => index.flush());
|
|
90
|
+
const results = await Promise.all(flushPromises);
|
|
91
|
+
const totalFlushed = results.reduce((sum, count) => sum + count, 0);
|
|
92
|
+
if (totalFlushed > 0) {
|
|
93
|
+
prodLog.info(`[TypeAwareHNSW] Flushed ${totalFlushed} dirty nodes across ${this.indexes.size} type indexes`);
|
|
94
|
+
}
|
|
95
|
+
return totalFlushed;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Get the total number of dirty (unpersisted) nodes across all type-specific indexes
|
|
99
|
+
*/
|
|
100
|
+
getDirtyNodeCount() {
|
|
101
|
+
let total = 0;
|
|
102
|
+
for (const index of this.indexes.values()) {
|
|
103
|
+
total += index.getDirtyNodeCount();
|
|
104
|
+
}
|
|
105
|
+
return total;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Get the current persist mode
|
|
109
|
+
*/
|
|
110
|
+
getPersistMode() {
|
|
111
|
+
return this.persistMode;
|
|
112
|
+
}
|
|
74
113
|
/**
|
|
75
114
|
* Get or create HNSW index for a specific type (lazy initialization)
|
|
76
115
|
*
|
|
@@ -90,7 +129,8 @@ export class TypeAwareHNSWIndex {
|
|
|
90
129
|
prodLog.info(`Creating HNSW index for type: ${type}`);
|
|
91
130
|
const index = new HNSWIndex(this.config, this.distanceFunction, {
|
|
92
131
|
useParallelization: this.useParallelization,
|
|
93
|
-
storage: this.storage || undefined
|
|
132
|
+
storage: this.storage || undefined,
|
|
133
|
+
persistMode: this.persistMode
|
|
94
134
|
});
|
|
95
135
|
this.indexes.set(type, index);
|
|
96
136
|
}
|
|
@@ -52,10 +52,6 @@ export declare class AzureBlobStorage extends BaseStorage {
|
|
|
52
52
|
private nounWriteBuffer;
|
|
53
53
|
private verbWriteBuffer;
|
|
54
54
|
private requestCoalescer;
|
|
55
|
-
private highVolumeMode;
|
|
56
|
-
private lastVolumeCheck;
|
|
57
|
-
private volumeCheckInterval;
|
|
58
|
-
private forceHighVolumeMode;
|
|
59
55
|
private nounCacheManager;
|
|
60
56
|
private verbCacheManager;
|
|
61
57
|
private logger;
|
|
@@ -155,10 +151,6 @@ export declare class AzureBlobStorage extends BaseStorage {
|
|
|
155
151
|
* @param requestId Request ID from applyBackpressure()
|
|
156
152
|
*/
|
|
157
153
|
private releaseBackpressure;
|
|
158
|
-
/**
|
|
159
|
-
* Check if high-volume mode should be enabled
|
|
160
|
-
*/
|
|
161
|
-
private checkVolumeMode;
|
|
162
154
|
/**
|
|
163
155
|
* Flush noun buffer to Azure
|
|
164
156
|
*/
|
|
@@ -169,6 +161,7 @@ export declare class AzureBlobStorage extends BaseStorage {
|
|
|
169
161
|
private flushVerbBuffer;
|
|
170
162
|
/**
|
|
171
163
|
* Save a node to storage
|
|
164
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
172
165
|
*/
|
|
173
166
|
protected saveNode(node: HNSWNode): Promise<void>;
|
|
174
167
|
/**
|
|
@@ -231,6 +224,7 @@ export declare class AzureBlobStorage extends BaseStorage {
|
|
|
231
224
|
private streamToBuffer;
|
|
232
225
|
/**
|
|
233
226
|
* Save an edge to storage
|
|
227
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
234
228
|
*/
|
|
235
229
|
protected saveEdge(edge: Edge): Promise<void>;
|
|
236
230
|
/**
|
|
@@ -59,11 +59,6 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
59
59
|
this.verbWriteBuffer = null;
|
|
60
60
|
// Request coalescer for deduplication
|
|
61
61
|
this.requestCoalescer = null;
|
|
62
|
-
// High-volume mode detection
|
|
63
|
-
this.highVolumeMode = false;
|
|
64
|
-
this.lastVolumeCheck = 0;
|
|
65
|
-
this.volumeCheckInterval = 1000; // Check every second
|
|
66
|
-
this.forceHighVolumeMode = false; // Environment variable override
|
|
67
62
|
// Module logger
|
|
68
63
|
this.logger = createModuleLogger('AzureBlobStorage');
|
|
69
64
|
// v5.4.0: HNSW mutex locks to prevent read-modify-write races
|
|
@@ -83,12 +78,7 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
83
78
|
// Initialize cache managers
|
|
84
79
|
this.nounCacheManager = new CacheManager(options.cacheConfig);
|
|
85
80
|
this.verbCacheManager = new CacheManager(options.cacheConfig);
|
|
86
|
-
//
|
|
87
|
-
if (typeof process !== 'undefined' && process.env?.BRAINY_FORCE_HIGH_VOLUME === 'true') {
|
|
88
|
-
this.forceHighVolumeMode = true;
|
|
89
|
-
this.highVolumeMode = true;
|
|
90
|
-
prodLog.info('🚀 High-volume mode FORCED via BRAINY_FORCE_HIGH_VOLUME environment variable');
|
|
91
|
-
}
|
|
81
|
+
// v6.2.7: Write buffering always enabled - no env var check needed
|
|
92
82
|
}
|
|
93
83
|
/**
|
|
94
84
|
* Get Azure Blob-optimized batch configuration with native batch API support
|
|
@@ -326,29 +316,7 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
326
316
|
this.backpressure.releasePermission(requestId, success);
|
|
327
317
|
}
|
|
328
318
|
}
|
|
329
|
-
|
|
330
|
-
* Check if high-volume mode should be enabled
|
|
331
|
-
*/
|
|
332
|
-
checkVolumeMode() {
|
|
333
|
-
if (this.forceHighVolumeMode) {
|
|
334
|
-
return; // Already forced on
|
|
335
|
-
}
|
|
336
|
-
const now = Date.now();
|
|
337
|
-
if (now - this.lastVolumeCheck < this.volumeCheckInterval) {
|
|
338
|
-
return;
|
|
339
|
-
}
|
|
340
|
-
this.lastVolumeCheck = now;
|
|
341
|
-
// Enable high-volume mode if we have many pending operations
|
|
342
|
-
const shouldEnable = this.pendingOperations > 20;
|
|
343
|
-
if (shouldEnable && !this.highVolumeMode) {
|
|
344
|
-
this.highVolumeMode = true;
|
|
345
|
-
prodLog.info('🚀 High-volume mode ENABLED (pending operations:', this.pendingOperations, ')');
|
|
346
|
-
}
|
|
347
|
-
else if (!shouldEnable && this.highVolumeMode && !this.forceHighVolumeMode) {
|
|
348
|
-
this.highVolumeMode = false;
|
|
349
|
-
prodLog.info('🐌 High-volume mode DISABLED (pending operations:', this.pendingOperations, ')');
|
|
350
|
-
}
|
|
351
|
-
}
|
|
319
|
+
// v6.2.7: Removed checkVolumeMode() - write buffering always enabled for cloud storage
|
|
352
320
|
/**
|
|
353
321
|
* Flush noun buffer to Azure
|
|
354
322
|
*/
|
|
@@ -380,26 +348,21 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
380
348
|
// v5.4.0: Removed saveNoun_internal - now inherit from BaseStorage's type-first implementation
|
|
381
349
|
/**
|
|
382
350
|
* Save a node to storage
|
|
351
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
383
352
|
*/
|
|
384
353
|
async saveNode(node) {
|
|
385
354
|
await this.ensureInitialized();
|
|
386
|
-
//
|
|
387
|
-
this.
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
this.logger.trace(`📝 BUFFERING: Adding noun ${node.id} to write buffer (high-volume mode active)`);
|
|
391
|
-
// v6.2.6: CRITICAL FIX - Populate cache BEFORE buffering for read-after-write consistency
|
|
392
|
-
// Without this, add() returns but relate() can't find the entity (cloud storage production bug)
|
|
355
|
+
// v6.2.7: Always use write buffer - cloud storage benefits from batching
|
|
356
|
+
if (this.nounWriteBuffer) {
|
|
357
|
+
this.logger.trace(`📝 BUFFERING: Adding noun ${node.id} to write buffer`);
|
|
358
|
+
// v6.2.6: Populate cache BEFORE buffering for read-after-write consistency
|
|
393
359
|
if (node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
|
|
394
360
|
this.nounCacheManager.set(node.id, node);
|
|
395
361
|
}
|
|
396
362
|
await this.nounWriteBuffer.add(node.id, node);
|
|
397
363
|
return;
|
|
398
364
|
}
|
|
399
|
-
|
|
400
|
-
this.logger.trace(`📝 DIRECT WRITE: Saving noun ${node.id} directly (high-volume mode inactive)`);
|
|
401
|
-
}
|
|
402
|
-
// Direct write in normal mode
|
|
365
|
+
// Fallback to direct write if buffer not initialized (shouldn't happen after init)
|
|
403
366
|
await this.saveNodeDirect(node);
|
|
404
367
|
}
|
|
405
368
|
/**
|
|
@@ -776,20 +739,19 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
776
739
|
// v5.4.0: Removed saveVerb_internal - now inherit from BaseStorage's type-first implementation
|
|
777
740
|
/**
|
|
778
741
|
* Save an edge to storage
|
|
742
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
779
743
|
*/
|
|
780
744
|
async saveEdge(edge) {
|
|
781
745
|
await this.ensureInitialized();
|
|
782
|
-
//
|
|
783
|
-
this.
|
|
784
|
-
// Use write buffer in high-volume mode
|
|
785
|
-
if (this.highVolumeMode && this.verbWriteBuffer) {
|
|
746
|
+
// v6.2.7: Always use write buffer - cloud storage benefits from batching
|
|
747
|
+
if (this.verbWriteBuffer) {
|
|
786
748
|
this.logger.trace(`📝 BUFFERING: Adding verb ${edge.id} to write buffer`);
|
|
787
|
-
// v6.2.6:
|
|
749
|
+
// v6.2.6: Populate cache BEFORE buffering for read-after-write consistency
|
|
788
750
|
this.verbCacheManager.set(edge.id, edge);
|
|
789
751
|
await this.verbWriteBuffer.add(edge.id, edge);
|
|
790
752
|
return;
|
|
791
753
|
}
|
|
792
|
-
//
|
|
754
|
+
// Fallback to direct write if buffer not initialized (shouldn't happen after init)
|
|
793
755
|
await this.saveEdgeDirect(edge);
|
|
794
756
|
}
|
|
795
757
|
/**
|
|
@@ -54,10 +54,6 @@ export declare class GcsStorage extends BaseStorage {
|
|
|
54
54
|
private nounWriteBuffer;
|
|
55
55
|
private verbWriteBuffer;
|
|
56
56
|
private requestCoalescer;
|
|
57
|
-
private highVolumeMode;
|
|
58
|
-
private lastVolumeCheck;
|
|
59
|
-
private volumeCheckInterval;
|
|
60
|
-
private forceHighVolumeMode;
|
|
61
57
|
private nounCacheManager;
|
|
62
58
|
private verbCacheManager;
|
|
63
59
|
private logger;
|
|
@@ -133,10 +129,6 @@ export declare class GcsStorage extends BaseStorage {
|
|
|
133
129
|
* @param requestId Request ID from applyBackpressure()
|
|
134
130
|
*/
|
|
135
131
|
private releaseBackpressure;
|
|
136
|
-
/**
|
|
137
|
-
* Check if high-volume mode should be enabled
|
|
138
|
-
*/
|
|
139
|
-
private checkVolumeMode;
|
|
140
132
|
/**
|
|
141
133
|
* Flush noun buffer to GCS
|
|
142
134
|
*/
|
|
@@ -147,6 +139,7 @@ export declare class GcsStorage extends BaseStorage {
|
|
|
147
139
|
private flushVerbBuffer;
|
|
148
140
|
/**
|
|
149
141
|
* Save a node to storage
|
|
142
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
150
143
|
*/
|
|
151
144
|
protected saveNode(node: HNSWNode): Promise<void>;
|
|
152
145
|
/**
|
|
@@ -212,6 +205,7 @@ export declare class GcsStorage extends BaseStorage {
|
|
|
212
205
|
protected listObjectsUnderPath(prefix: string): Promise<string[]>;
|
|
213
206
|
/**
|
|
214
207
|
* Save an edge to storage
|
|
208
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
215
209
|
*/
|
|
216
210
|
protected saveEdge(edge: Edge): Promise<void>;
|
|
217
211
|
/**
|
|
@@ -63,11 +63,6 @@ export class GcsStorage extends BaseStorage {
|
|
|
63
63
|
this.verbWriteBuffer = null;
|
|
64
64
|
// Request coalescer for deduplication
|
|
65
65
|
this.requestCoalescer = null;
|
|
66
|
-
// High-volume mode detection - MUCH more aggressive
|
|
67
|
-
this.highVolumeMode = false;
|
|
68
|
-
this.lastVolumeCheck = 0;
|
|
69
|
-
this.volumeCheckInterval = 1000; // Check every second, not 5
|
|
70
|
-
this.forceHighVolumeMode = false; // Environment variable override
|
|
71
66
|
// Module logger
|
|
72
67
|
this.logger = createModuleLogger('GcsStorage');
|
|
73
68
|
// v5.4.0: HNSW mutex locks to prevent read-modify-write races
|
|
@@ -92,12 +87,7 @@ export class GcsStorage extends BaseStorage {
|
|
|
92
87
|
// Initialize cache managers
|
|
93
88
|
this.nounCacheManager = new CacheManager(options.cacheConfig);
|
|
94
89
|
this.verbCacheManager = new CacheManager(options.cacheConfig);
|
|
95
|
-
//
|
|
96
|
-
if (typeof process !== 'undefined' && process.env?.BRAINY_FORCE_HIGH_VOLUME === 'true') {
|
|
97
|
-
this.forceHighVolumeMode = true;
|
|
98
|
-
this.highVolumeMode = true;
|
|
99
|
-
prodLog.info('🚀 High-volume mode FORCED via BRAINY_FORCE_HIGH_VOLUME environment variable');
|
|
100
|
-
}
|
|
90
|
+
// v6.2.7: Write buffering always enabled - no env var check needed
|
|
101
91
|
}
|
|
102
92
|
/**
|
|
103
93
|
* Initialize the storage adapter
|
|
@@ -251,29 +241,7 @@ export class GcsStorage extends BaseStorage {
|
|
|
251
241
|
this.backpressure.releasePermission(requestId, success);
|
|
252
242
|
}
|
|
253
243
|
}
|
|
254
|
-
|
|
255
|
-
* Check if high-volume mode should be enabled
|
|
256
|
-
*/
|
|
257
|
-
checkVolumeMode() {
|
|
258
|
-
if (this.forceHighVolumeMode) {
|
|
259
|
-
return; // Already forced on
|
|
260
|
-
}
|
|
261
|
-
const now = Date.now();
|
|
262
|
-
if (now - this.lastVolumeCheck < this.volumeCheckInterval) {
|
|
263
|
-
return;
|
|
264
|
-
}
|
|
265
|
-
this.lastVolumeCheck = now;
|
|
266
|
-
// Enable high-volume mode if we have many pending operations
|
|
267
|
-
const shouldEnable = this.pendingOperations > 20;
|
|
268
|
-
if (shouldEnable && !this.highVolumeMode) {
|
|
269
|
-
this.highVolumeMode = true;
|
|
270
|
-
prodLog.info('🚀 High-volume mode ENABLED (pending operations:', this.pendingOperations, ')');
|
|
271
|
-
}
|
|
272
|
-
else if (!shouldEnable && this.highVolumeMode && !this.forceHighVolumeMode) {
|
|
273
|
-
this.highVolumeMode = false;
|
|
274
|
-
prodLog.info('🐌 High-volume mode DISABLED (pending operations:', this.pendingOperations, ')');
|
|
275
|
-
}
|
|
276
|
-
}
|
|
244
|
+
// v6.2.7: Removed checkVolumeMode() - write buffering always enabled for cloud storage
|
|
277
245
|
/**
|
|
278
246
|
* Flush noun buffer to GCS
|
|
279
247
|
*/
|
|
@@ -305,27 +273,21 @@ export class GcsStorage extends BaseStorage {
|
|
|
305
273
|
// v5.4.0: Removed saveNoun_internal - now inherit from BaseStorage's type-first implementation
|
|
306
274
|
/**
|
|
307
275
|
* Save a node to storage
|
|
276
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
308
277
|
*/
|
|
309
278
|
async saveNode(node) {
|
|
310
279
|
await this.ensureInitialized();
|
|
311
|
-
//
|
|
312
|
-
this.
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
this.logger.trace(`📝 BUFFERING: Adding noun ${node.id} to write buffer (high-volume mode active)`);
|
|
316
|
-
// v6.2.6: CRITICAL FIX - Populate cache BEFORE buffering for read-after-write consistency
|
|
317
|
-
// Without this, add() returns but relate() can't find the entity (GCS production bug)
|
|
318
|
-
// The buffer flushes asynchronously, but cache ensures immediate reads succeed
|
|
280
|
+
// v6.2.7: Always use write buffer - cloud storage benefits from batching
|
|
281
|
+
if (this.nounWriteBuffer) {
|
|
282
|
+
this.logger.trace(`📝 BUFFERING: Adding noun ${node.id} to write buffer`);
|
|
283
|
+
// v6.2.6: Populate cache BEFORE buffering for read-after-write consistency
|
|
319
284
|
if (node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
|
|
320
285
|
this.nounCacheManager.set(node.id, node);
|
|
321
286
|
}
|
|
322
287
|
await this.nounWriteBuffer.add(node.id, node);
|
|
323
288
|
return;
|
|
324
289
|
}
|
|
325
|
-
|
|
326
|
-
this.logger.trace(`📝 DIRECT WRITE: Saving noun ${node.id} directly (high-volume mode inactive)`);
|
|
327
|
-
}
|
|
328
|
-
// Direct write in normal mode
|
|
290
|
+
// Fallback to direct write if buffer not initialized (shouldn't happen after init)
|
|
329
291
|
await this.saveNodeDirect(node);
|
|
330
292
|
}
|
|
331
293
|
/**
|
|
@@ -646,21 +608,19 @@ export class GcsStorage extends BaseStorage {
|
|
|
646
608
|
// v5.4.0: Removed saveVerb_internal - now inherit from BaseStorage's type-first implementation
|
|
647
609
|
/**
|
|
648
610
|
* Save an edge to storage
|
|
611
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
649
612
|
*/
|
|
650
613
|
async saveEdge(edge) {
|
|
651
614
|
await this.ensureInitialized();
|
|
652
|
-
//
|
|
653
|
-
this.
|
|
654
|
-
// Use write buffer in high-volume mode
|
|
655
|
-
if (this.highVolumeMode && this.verbWriteBuffer) {
|
|
615
|
+
// v6.2.7: Always use write buffer - cloud storage benefits from batching
|
|
616
|
+
if (this.verbWriteBuffer) {
|
|
656
617
|
this.logger.trace(`📝 BUFFERING: Adding verb ${edge.id} to write buffer`);
|
|
657
|
-
// v6.2.6:
|
|
658
|
-
// Without this, relate() might not find the verb immediately after creation
|
|
618
|
+
// v6.2.6: Populate cache BEFORE buffering for read-after-write consistency
|
|
659
619
|
this.verbCacheManager.set(edge.id, edge);
|
|
660
620
|
await this.verbWriteBuffer.add(edge.id, edge);
|
|
661
621
|
return;
|
|
662
622
|
}
|
|
663
|
-
//
|
|
623
|
+
// Fallback to direct write if buffer not initialized (shouldn't happen after init)
|
|
664
624
|
await this.saveEdgeDirect(edge);
|
|
665
625
|
}
|
|
666
626
|
/**
|
|
@@ -58,10 +58,6 @@ export declare class R2Storage extends BaseStorage {
|
|
|
58
58
|
private nounWriteBuffer;
|
|
59
59
|
private verbWriteBuffer;
|
|
60
60
|
private requestCoalescer;
|
|
61
|
-
private highVolumeMode;
|
|
62
|
-
private lastVolumeCheck;
|
|
63
|
-
private volumeCheckInterval;
|
|
64
|
-
private forceHighVolumeMode;
|
|
65
61
|
private nounCacheManager;
|
|
66
62
|
private verbCacheManager;
|
|
67
63
|
private logger;
|
|
@@ -139,10 +135,6 @@ export declare class R2Storage extends BaseStorage {
|
|
|
139
135
|
* Release backpressure after completing an operation
|
|
140
136
|
*/
|
|
141
137
|
private releaseBackpressure;
|
|
142
|
-
/**
|
|
143
|
-
* Check if high-volume mode should be enabled
|
|
144
|
-
*/
|
|
145
|
-
private checkVolumeMode;
|
|
146
138
|
/**
|
|
147
139
|
* Flush noun buffer to R2
|
|
148
140
|
*/
|
|
@@ -153,6 +145,7 @@ export declare class R2Storage extends BaseStorage {
|
|
|
153
145
|
private flushVerbBuffer;
|
|
154
146
|
/**
|
|
155
147
|
* Save a node to storage
|
|
148
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
156
149
|
*/
|
|
157
150
|
protected saveNode(node: HNSWNode): Promise<void>;
|
|
158
151
|
/**
|
|
@@ -179,6 +172,10 @@ export declare class R2Storage extends BaseStorage {
|
|
|
179
172
|
* List all objects under a specific prefix in R2
|
|
180
173
|
*/
|
|
181
174
|
protected listObjectsUnderPath(prefix: string): Promise<string[]>;
|
|
175
|
+
/**
|
|
176
|
+
* Save an edge to storage
|
|
177
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
178
|
+
*/
|
|
182
179
|
protected saveEdge(edge: Edge): Promise<void>;
|
|
183
180
|
private saveEdgeDirect;
|
|
184
181
|
protected getEdge(id: string): Promise<Edge | null>;
|
|
@@ -65,11 +65,6 @@ export class R2Storage extends BaseStorage {
|
|
|
65
65
|
this.verbWriteBuffer = null;
|
|
66
66
|
// Request coalescer for deduplication
|
|
67
67
|
this.requestCoalescer = null;
|
|
68
|
-
// High-volume mode detection (R2-specific thresholds)
|
|
69
|
-
this.highVolumeMode = false;
|
|
70
|
-
this.lastVolumeCheck = 0;
|
|
71
|
-
this.volumeCheckInterval = 800; // Check more frequently on R2
|
|
72
|
-
this.forceHighVolumeMode = false;
|
|
73
68
|
// Module logger
|
|
74
69
|
this.logger = createModuleLogger('R2Storage');
|
|
75
70
|
// v5.4.0: HNSW mutex locks to prevent read-modify-write races
|
|
@@ -94,12 +89,7 @@ export class R2Storage extends BaseStorage {
|
|
|
94
89
|
warmCacheTTL: options.cacheConfig?.warmCacheTTL || 3600000 // 1 hour
|
|
95
90
|
});
|
|
96
91
|
this.verbCacheManager = new CacheManager(options.cacheConfig);
|
|
97
|
-
//
|
|
98
|
-
if (typeof process !== 'undefined' && process.env?.BRAINY_FORCE_HIGH_VOLUME === 'true') {
|
|
99
|
-
this.forceHighVolumeMode = true;
|
|
100
|
-
this.highVolumeMode = true;
|
|
101
|
-
prodLog.info('🚀 R2: High-volume mode FORCED via environment variable');
|
|
102
|
-
}
|
|
92
|
+
// v6.2.7: Write buffering always enabled - no env var check needed
|
|
103
93
|
}
|
|
104
94
|
/**
|
|
105
95
|
* Get R2-optimized batch configuration with native batch API support
|
|
@@ -295,29 +285,7 @@ export class R2Storage extends BaseStorage {
|
|
|
295
285
|
this.backpressure.releasePermission(requestId, success);
|
|
296
286
|
}
|
|
297
287
|
}
|
|
298
|
-
|
|
299
|
-
* Check if high-volume mode should be enabled
|
|
300
|
-
*/
|
|
301
|
-
checkVolumeMode() {
|
|
302
|
-
if (this.forceHighVolumeMode) {
|
|
303
|
-
return;
|
|
304
|
-
}
|
|
305
|
-
const now = Date.now();
|
|
306
|
-
if (now - this.lastVolumeCheck < this.volumeCheckInterval) {
|
|
307
|
-
return;
|
|
308
|
-
}
|
|
309
|
-
this.lastVolumeCheck = now;
|
|
310
|
-
// R2 threshold: enable at 15 pending operations (lower than S3/GCS)
|
|
311
|
-
const shouldEnable = this.pendingOperations > 15;
|
|
312
|
-
if (shouldEnable && !this.highVolumeMode) {
|
|
313
|
-
this.highVolumeMode = true;
|
|
314
|
-
prodLog.info('🚀 R2: High-volume mode ENABLED (pending:', this.pendingOperations, ')');
|
|
315
|
-
}
|
|
316
|
-
else if (!shouldEnable && this.highVolumeMode && !this.forceHighVolumeMode) {
|
|
317
|
-
this.highVolumeMode = false;
|
|
318
|
-
prodLog.info('🐌 R2: High-volume mode DISABLED (pending:', this.pendingOperations, ')');
|
|
319
|
-
}
|
|
320
|
-
}
|
|
288
|
+
// v6.2.7: Removed checkVolumeMode() - write buffering always enabled for cloud storage
|
|
321
289
|
/**
|
|
322
290
|
* Flush noun buffer to R2
|
|
323
291
|
*/
|
|
@@ -348,22 +316,21 @@ export class R2Storage extends BaseStorage {
|
|
|
348
316
|
}
|
|
349
317
|
/**
|
|
350
318
|
* Save a node to storage
|
|
319
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
351
320
|
*/
|
|
352
321
|
async saveNode(node) {
|
|
353
322
|
await this.ensureInitialized();
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
if (this.highVolumeMode && this.nounWriteBuffer) {
|
|
323
|
+
// v6.2.7: Always use write buffer - cloud storage benefits from batching
|
|
324
|
+
if (this.nounWriteBuffer) {
|
|
357
325
|
this.logger.trace(`📝 BUFFERING: Adding noun ${node.id} to write buffer`);
|
|
358
|
-
// v6.2.6:
|
|
359
|
-
// Without this, add() returns but relate() can't find the entity (cloud storage production bug)
|
|
326
|
+
// v6.2.6: Populate cache BEFORE buffering for read-after-write consistency
|
|
360
327
|
if (node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
|
|
361
328
|
this.nounCacheManager.set(node.id, node);
|
|
362
329
|
}
|
|
363
330
|
await this.nounWriteBuffer.add(node.id, node);
|
|
364
331
|
return;
|
|
365
332
|
}
|
|
366
|
-
//
|
|
333
|
+
// Fallback to direct write if buffer not initialized (shouldn't happen after init)
|
|
367
334
|
await this.saveNodeDirect(node);
|
|
368
335
|
}
|
|
369
336
|
/**
|
|
@@ -572,15 +539,20 @@ export class R2Storage extends BaseStorage {
|
|
|
572
539
|
}
|
|
573
540
|
}
|
|
574
541
|
// Verb storage methods (similar to noun methods - implementing key methods for space)
|
|
542
|
+
/**
|
|
543
|
+
* Save an edge to storage
|
|
544
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
545
|
+
*/
|
|
575
546
|
async saveEdge(edge) {
|
|
576
547
|
await this.ensureInitialized();
|
|
577
|
-
|
|
578
|
-
if (this.
|
|
579
|
-
// v6.2.6:
|
|
548
|
+
// v6.2.7: Always use write buffer - cloud storage benefits from batching
|
|
549
|
+
if (this.verbWriteBuffer) {
|
|
550
|
+
// v6.2.6: Populate cache BEFORE buffering for read-after-write consistency
|
|
580
551
|
this.verbCacheManager.set(edge.id, edge);
|
|
581
552
|
await this.verbWriteBuffer.add(edge.id, edge);
|
|
582
553
|
return;
|
|
583
554
|
}
|
|
555
|
+
// Fallback to direct write if buffer not initialized (shouldn't happen after init)
|
|
584
556
|
await this.saveEdgeDirect(edge);
|
|
585
557
|
}
|
|
586
558
|
async saveEdgeDirect(edge) {
|
|
@@ -73,10 +73,6 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
73
73
|
private cacheSync?;
|
|
74
74
|
private readWriteSeparation?;
|
|
75
75
|
private requestCoalescer;
|
|
76
|
-
private highVolumeMode;
|
|
77
|
-
private lastVolumeCheck;
|
|
78
|
-
private volumeCheckInterval;
|
|
79
|
-
private forceHighVolumeMode;
|
|
80
76
|
private operationExecutors;
|
|
81
77
|
private nounCacheManager;
|
|
82
78
|
private verbCacheManager;
|
|
@@ -193,10 +189,6 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
193
189
|
* Initialize request coalescer
|
|
194
190
|
*/
|
|
195
191
|
private initializeCoalescer;
|
|
196
|
-
/**
|
|
197
|
-
* Check if we should enable high-volume mode
|
|
198
|
-
*/
|
|
199
|
-
private checkVolumeMode;
|
|
200
192
|
/**
|
|
201
193
|
* Bulk write nouns to S3
|
|
202
194
|
*/
|
|
@@ -239,6 +231,7 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
239
231
|
private getBatchSize;
|
|
240
232
|
/**
|
|
241
233
|
* Save a node to storage
|
|
234
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
242
235
|
*/
|
|
243
236
|
protected saveNode(node: HNSWNode): Promise<void>;
|
|
244
237
|
/**
|
|
@@ -78,11 +78,6 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
78
78
|
// ShardManager is no longer used - sharding is deterministic
|
|
79
79
|
// Request coalescer for deduplication
|
|
80
80
|
this.requestCoalescer = null;
|
|
81
|
-
// High-volume mode detection - MUCH more aggressive
|
|
82
|
-
this.highVolumeMode = false;
|
|
83
|
-
this.lastVolumeCheck = 0;
|
|
84
|
-
this.volumeCheckInterval = 1000; // Check every second, not 5
|
|
85
|
-
this.forceHighVolumeMode = false; // Environment variable override
|
|
86
81
|
// Module logger
|
|
87
82
|
this.logger = createModuleLogger('S3Storage');
|
|
88
83
|
// v5.4.0: HNSW mutex locks to prevent read-modify-write races
|
|
@@ -526,79 +521,7 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
526
521
|
await this.processCoalescedBatch(batch);
|
|
527
522
|
});
|
|
528
523
|
}
|
|
529
|
-
|
|
530
|
-
* Check if we should enable high-volume mode
|
|
531
|
-
*/
|
|
532
|
-
checkVolumeMode() {
|
|
533
|
-
const now = Date.now();
|
|
534
|
-
if (now - this.lastVolumeCheck < this.volumeCheckInterval) {
|
|
535
|
-
return;
|
|
536
|
-
}
|
|
537
|
-
this.lastVolumeCheck = now;
|
|
538
|
-
// Check environment variable override
|
|
539
|
-
const envThreshold = process.env.BRAINY_BUFFER_THRESHOLD;
|
|
540
|
-
const threshold = envThreshold ? parseInt(envThreshold) : 0; // Default to 0 for immediate activation!
|
|
541
|
-
// Force enable from environment
|
|
542
|
-
if (process.env.BRAINY_FORCE_BUFFERING === 'true') {
|
|
543
|
-
this.forceHighVolumeMode = true;
|
|
544
|
-
}
|
|
545
|
-
// Get metrics
|
|
546
|
-
const backpressureStatus = this.backpressure.getStatus();
|
|
547
|
-
const socketMetrics = this.socketManager.getMetrics();
|
|
548
|
-
// Reasonable high-volume detection - only activate under real load
|
|
549
|
-
const isTestEnvironment = process.env.NODE_ENV === 'test';
|
|
550
|
-
const explicitlyDisabled = process.env.BRAINY_FORCE_BUFFERING === 'false';
|
|
551
|
-
// Use reasonable thresholds instead of emergency aggressive ones
|
|
552
|
-
const reasonableThreshold = Math.max(threshold, 10); // At least 10 pending operations
|
|
553
|
-
const highSocketUtilization = 0.8; // 80% socket utilization
|
|
554
|
-
const highRequestRate = 50; // 50 requests per second
|
|
555
|
-
const significantErrors = 5; // 5 consecutive errors
|
|
556
|
-
const shouldEnableHighVolume = !isTestEnvironment && // Disable in test environment
|
|
557
|
-
!explicitlyDisabled && // Allow explicit disabling
|
|
558
|
-
(this.forceHighVolumeMode || // Environment override
|
|
559
|
-
backpressureStatus.queueLength >= reasonableThreshold || // High queue backlog
|
|
560
|
-
socketMetrics.pendingRequests >= reasonableThreshold || // Many pending requests
|
|
561
|
-
this.pendingOperations >= reasonableThreshold || // Many pending ops
|
|
562
|
-
socketMetrics.socketUtilization >= highSocketUtilization || // High socket pressure
|
|
563
|
-
(socketMetrics.requestsPerSecond >= highRequestRate) || // High request rate
|
|
564
|
-
(this.consecutiveErrors >= significantErrors)); // Significant error pattern
|
|
565
|
-
if (shouldEnableHighVolume && !this.highVolumeMode) {
|
|
566
|
-
this.highVolumeMode = true;
|
|
567
|
-
this.logger.warn(`🚨 HIGH-VOLUME MODE ACTIVATED 🚨`);
|
|
568
|
-
this.logger.warn(` Queue Length: ${backpressureStatus.queueLength}`);
|
|
569
|
-
this.logger.warn(` Pending Requests: ${socketMetrics.pendingRequests}`);
|
|
570
|
-
this.logger.warn(` Pending Operations: ${this.pendingOperations}`);
|
|
571
|
-
this.logger.warn(` Socket Utilization: ${(socketMetrics.socketUtilization * 100).toFixed(1)}%`);
|
|
572
|
-
this.logger.warn(` Requests/sec: ${socketMetrics.requestsPerSecond}`);
|
|
573
|
-
this.logger.warn(` Consecutive Errors: ${this.consecutiveErrors}`);
|
|
574
|
-
this.logger.warn(` Threshold: ${threshold}`);
|
|
575
|
-
// Adjust buffer parameters for high volume
|
|
576
|
-
const queueLength = Math.max(backpressureStatus.queueLength, socketMetrics.pendingRequests, 100);
|
|
577
|
-
if (this.nounWriteBuffer) {
|
|
578
|
-
this.nounWriteBuffer.adjustForLoad(queueLength);
|
|
579
|
-
const stats = this.nounWriteBuffer.getStats();
|
|
580
|
-
this.logger.warn(` Noun Buffer: ${stats.bufferSize} items, ${stats.totalWrites} total writes`);
|
|
581
|
-
}
|
|
582
|
-
if (this.verbWriteBuffer) {
|
|
583
|
-
this.verbWriteBuffer.adjustForLoad(queueLength);
|
|
584
|
-
const stats = this.verbWriteBuffer.getStats();
|
|
585
|
-
this.logger.warn(` Verb Buffer: ${stats.bufferSize} items, ${stats.totalWrites} total writes`);
|
|
586
|
-
}
|
|
587
|
-
if (this.requestCoalescer) {
|
|
588
|
-
this.requestCoalescer.adjustParameters(queueLength);
|
|
589
|
-
const sizes = this.requestCoalescer.getQueueSizes();
|
|
590
|
-
this.logger.warn(` Coalescer: ${sizes.total} queued operations`);
|
|
591
|
-
}
|
|
592
|
-
}
|
|
593
|
-
else if (!shouldEnableHighVolume && this.highVolumeMode && !this.forceHighVolumeMode) {
|
|
594
|
-
this.highVolumeMode = false;
|
|
595
|
-
this.logger.info('✅ High-volume mode deactivated - load normalized');
|
|
596
|
-
}
|
|
597
|
-
// Log current status every 10 checks when in high-volume mode
|
|
598
|
-
if (this.highVolumeMode && (now % 10000) < this.volumeCheckInterval) {
|
|
599
|
-
this.logger.info(`📊 High-volume mode status: Queue=${backpressureStatus.queueLength}, Pending=${socketMetrics.pendingRequests}, Sockets=${(socketMetrics.socketUtilization * 100).toFixed(1)}%`);
|
|
600
|
-
}
|
|
601
|
-
}
|
|
524
|
+
// v6.2.7: Removed checkVolumeMode() - write buffering always enabled for cloud storage
|
|
602
525
|
/**
|
|
603
526
|
* Bulk write nouns to S3
|
|
604
527
|
*/
|
|
@@ -802,26 +725,21 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
802
725
|
// v5.4.0: Removed 10 *_internal method overrides (lines 984-2069) - now inherit from BaseStorage's type-first implementation
|
|
803
726
|
/**
|
|
804
727
|
* Save a node to storage
|
|
728
|
+
* v6.2.7: Always uses write buffer for consistent performance
|
|
805
729
|
*/
|
|
806
730
|
async saveNode(node) {
|
|
807
731
|
await this.ensureInitialized();
|
|
808
|
-
//
|
|
809
|
-
this.
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
this.logger.trace(`📝 BUFFERING: Adding noun ${node.id} to write buffer (high-volume mode active)`);
|
|
813
|
-
// v6.2.6: CRITICAL FIX - Populate cache BEFORE buffering for read-after-write consistency
|
|
814
|
-
// Without this, add() returns but relate() can't find the entity (cloud storage production bug)
|
|
815
|
-
// The buffer flushes asynchronously, but cache ensures immediate reads succeed
|
|
732
|
+
// v6.2.7: Always use write buffer - cloud storage benefits from batching
|
|
733
|
+
if (this.nounWriteBuffer) {
|
|
734
|
+
this.logger.trace(`📝 BUFFERING: Adding noun ${node.id} to write buffer`);
|
|
735
|
+
// v6.2.6: Populate cache BEFORE buffering for read-after-write consistency
|
|
816
736
|
if (node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
|
|
817
737
|
this.nounCacheManager.set(node.id, node);
|
|
818
738
|
}
|
|
819
739
|
await this.nounWriteBuffer.add(node.id, node);
|
|
820
740
|
return;
|
|
821
741
|
}
|
|
822
|
-
|
|
823
|
-
this.logger.trace(`📝 DIRECT WRITE: Saving noun ${node.id} directly (high-volume mode inactive)`);
|
|
824
|
-
}
|
|
742
|
+
// Fallback to direct write if buffer not initialized (shouldn't happen after init)
|
|
825
743
|
// Apply backpressure before starting operation
|
|
826
744
|
const requestId = await this.applyBackpressure();
|
|
827
745
|
try {
|
|
@@ -552,6 +552,7 @@ export interface BrainyConfig {
|
|
|
552
552
|
disableAutoOptimize?: boolean;
|
|
553
553
|
batchWrites?: boolean;
|
|
554
554
|
maxConcurrentOperations?: number;
|
|
555
|
+
hnswPersistMode?: 'immediate' | 'deferred';
|
|
555
556
|
maxQueryLimit?: number;
|
|
556
557
|
reservedQueryMemory?: number;
|
|
557
558
|
verbose?: boolean;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "6.2.
|
|
3
|
+
"version": "6.2.8",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|