@soulcraft/brainy 6.2.7 → 6.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brainy.d.ts +7 -0
- package/dist/brainy.js +28 -3
- package/dist/hnsw/hnswIndex.d.ts +27 -0
- package/dist/hnsw/hnswIndex.js +102 -12
- package/dist/hnsw/typeAwareHNSWIndex.d.ts +21 -1
- package/dist/hnsw/typeAwareHNSWIndex.js +43 -3
- package/dist/storage/baseStorage.js +67 -6
- package/dist/types/brainy.types.d.ts +1 -0
- package/dist/utils/unifiedCache.d.ts +7 -0
- package/dist/utils/unifiedCache.js +17 -0
- package/dist/vfs/PathResolver.d.ts +2 -0
- package/dist/vfs/PathResolver.js +19 -7
- package/package.json +1 -1
package/dist/brainy.d.ts
CHANGED
|
@@ -1795,6 +1795,10 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
1795
1795
|
* - 87% memory reduction through separate graphs per entity type
|
|
1796
1796
|
* - 10x faster type-specific queries
|
|
1797
1797
|
* - Automatic type routing
|
|
1798
|
+
*
|
|
1799
|
+
* v6.2.8: Smart defaults for HNSW persistence mode
|
|
1800
|
+
* - Cloud storage (GCS/S3/R2/Azure): 'deferred' for 30-50× faster adds
|
|
1801
|
+
* - Local storage (FileSystem/Memory/OPFS): 'immediate' (already fast)
|
|
1798
1802
|
*/
|
|
1799
1803
|
private setupIndex;
|
|
1800
1804
|
/**
|
|
@@ -1846,6 +1850,9 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
1846
1850
|
private rebuildIndexesIfNeeded;
|
|
1847
1851
|
/**
|
|
1848
1852
|
* Close and cleanup
|
|
1853
|
+
*
|
|
1854
|
+
* v6.2.8: Now flushes HNSW dirty nodes before closing
|
|
1855
|
+
* This ensures deferred persistence mode data is saved
|
|
1849
1856
|
*/
|
|
1850
1857
|
close(): Promise<void>;
|
|
1851
1858
|
/**
|
package/dist/brainy.js
CHANGED
|
@@ -3974,20 +3974,35 @@ export class Brainy {
|
|
|
3974
3974
|
* - 87% memory reduction through separate graphs per entity type
|
|
3975
3975
|
* - 10x faster type-specific queries
|
|
3976
3976
|
* - Automatic type routing
|
|
3977
|
+
*
|
|
3978
|
+
* v6.2.8: Smart defaults for HNSW persistence mode
|
|
3979
|
+
* - Cloud storage (GCS/S3/R2/Azure): 'deferred' for 30-50× faster adds
|
|
3980
|
+
* - Local storage (FileSystem/Memory/OPFS): 'immediate' (already fast)
|
|
3977
3981
|
*/
|
|
3978
3982
|
setupIndex() {
|
|
3979
3983
|
const indexConfig = {
|
|
3980
3984
|
...this.config.index,
|
|
3981
3985
|
distanceFunction: this.distance
|
|
3982
3986
|
};
|
|
3987
|
+
// v6.2.8: Determine persist mode (user config > smart default)
|
|
3988
|
+
let persistMode = this.config.hnswPersistMode || 'immediate';
|
|
3989
|
+
// Smart default: Use deferred mode for cloud storage adapters
|
|
3990
|
+
if (!this.config.hnswPersistMode) {
|
|
3991
|
+
const storageType = this.config.storage?.type || 'auto';
|
|
3992
|
+
const cloudStorageTypes = ['gcs', 's3', 'r2', 'azure'];
|
|
3993
|
+
if (cloudStorageTypes.includes(storageType)) {
|
|
3994
|
+
persistMode = 'deferred';
|
|
3995
|
+
}
|
|
3996
|
+
}
|
|
3983
3997
|
// Phase 2: Use TypeAwareHNSWIndex for billion-scale optimization
|
|
3984
3998
|
if (this.config.storage?.type !== 'memory') {
|
|
3985
3999
|
return new TypeAwareHNSWIndex(indexConfig, this.distance, {
|
|
3986
4000
|
storage: this.storage,
|
|
3987
|
-
useParallelization: true
|
|
4001
|
+
useParallelization: true,
|
|
4002
|
+
persistMode
|
|
3988
4003
|
});
|
|
3989
4004
|
}
|
|
3990
|
-
return new HNSWIndex(indexConfig);
|
|
4005
|
+
return new HNSWIndex(indexConfig, this.distance, { persistMode });
|
|
3991
4006
|
}
|
|
3992
4007
|
/**
|
|
3993
4008
|
* Setup augmentations
|
|
@@ -4071,7 +4086,9 @@ export class Brainy {
|
|
|
4071
4086
|
maxConcurrentOperations: config?.maxConcurrentOperations ?? 10,
|
|
4072
4087
|
// Memory management options (v5.11.0)
|
|
4073
4088
|
maxQueryLimit: config?.maxQueryLimit ?? undefined,
|
|
4074
|
-
reservedQueryMemory: config?.reservedQueryMemory ?? undefined
|
|
4089
|
+
reservedQueryMemory: config?.reservedQueryMemory ?? undefined,
|
|
4090
|
+
// HNSW persistence mode (v6.2.8) - undefined = smart default in setupIndex
|
|
4091
|
+
hnswPersistMode: config?.hnswPersistMode ?? undefined
|
|
4075
4092
|
};
|
|
4076
4093
|
}
|
|
4077
4094
|
/**
|
|
@@ -4241,8 +4258,16 @@ export class Brainy {
|
|
|
4241
4258
|
}
|
|
4242
4259
|
/**
|
|
4243
4260
|
* Close and cleanup
|
|
4261
|
+
*
|
|
4262
|
+
* v6.2.8: Now flushes HNSW dirty nodes before closing
|
|
4263
|
+
* This ensures deferred persistence mode data is saved
|
|
4244
4264
|
*/
|
|
4245
4265
|
async close() {
|
|
4266
|
+
// v6.2.8: Flush HNSW dirty nodes before closing
|
|
4267
|
+
// In deferred persistence mode, this persists all pending HNSW graph data
|
|
4268
|
+
if (this.index && typeof this.index.flush === 'function') {
|
|
4269
|
+
await this.index.flush();
|
|
4270
|
+
}
|
|
4246
4271
|
// Shutdown augmentations
|
|
4247
4272
|
const augs = this.augmentationRegistry.getAll();
|
|
4248
4273
|
for (const aug of augs) {
|
package/dist/hnsw/hnswIndex.d.ts
CHANGED
|
@@ -19,9 +19,13 @@ export declare class HNSWIndex {
|
|
|
19
19
|
private cowEnabled;
|
|
20
20
|
private cowModifiedNodes;
|
|
21
21
|
private cowParent;
|
|
22
|
+
private persistMode;
|
|
23
|
+
private dirtyNodes;
|
|
24
|
+
private dirtySystem;
|
|
22
25
|
constructor(config?: Partial<HNSWConfig>, distanceFunction?: DistanceFunction, options?: {
|
|
23
26
|
useParallelization?: boolean;
|
|
24
27
|
storage?: BaseStorage;
|
|
28
|
+
persistMode?: 'immediate' | 'deferred';
|
|
25
29
|
});
|
|
26
30
|
/**
|
|
27
31
|
* Set whether to use parallelization for performance-critical operations
|
|
@@ -31,6 +35,29 @@ export declare class HNSWIndex {
|
|
|
31
35
|
* Get whether parallelization is enabled
|
|
32
36
|
*/
|
|
33
37
|
getUseParallelization(): boolean;
|
|
38
|
+
/**
|
|
39
|
+
* v6.2.8: Flush dirty HNSW data to storage
|
|
40
|
+
*
|
|
41
|
+
* In deferred persistence mode, HNSW connections are tracked as dirty but not
|
|
42
|
+
* immediately persisted. Call flush() to persist all pending changes.
|
|
43
|
+
*
|
|
44
|
+
* This is automatically called by:
|
|
45
|
+
* - brain.close()
|
|
46
|
+
* - brain.flush()
|
|
47
|
+
* - Process shutdown (SIGTERM/SIGINT)
|
|
48
|
+
*
|
|
49
|
+
* @returns Number of nodes flushed
|
|
50
|
+
*/
|
|
51
|
+
flush(): Promise<number>;
|
|
52
|
+
/**
|
|
53
|
+
* Get the number of dirty (unpersisted) nodes
|
|
54
|
+
* Useful for monitoring and debugging
|
|
55
|
+
*/
|
|
56
|
+
getDirtyNodeCount(): number;
|
|
57
|
+
/**
|
|
58
|
+
* Get the current persist mode
|
|
59
|
+
*/
|
|
60
|
+
getPersistMode(): 'immediate' | 'deferred';
|
|
34
61
|
/**
|
|
35
62
|
* Enable COW (Copy-on-Write) mode - Instant fork via shallow copy
|
|
36
63
|
*
|
package/dist/hnsw/hnswIndex.js
CHANGED
|
@@ -28,6 +28,12 @@ export class HNSWIndex {
|
|
|
28
28
|
this.cowEnabled = false;
|
|
29
29
|
this.cowModifiedNodes = new Set();
|
|
30
30
|
this.cowParent = null;
|
|
31
|
+
// v6.2.8: Deferred HNSW persistence for cloud storage performance
|
|
32
|
+
// In deferred mode, HNSW connections are only persisted on flush/close
|
|
33
|
+
// This reduces GCS operations from 70 to 2-3 per add() (30-50× faster)
|
|
34
|
+
this.persistMode = 'immediate';
|
|
35
|
+
this.dirtyNodes = new Set(); // Nodes with unpersisted HNSW data
|
|
36
|
+
this.dirtySystem = false; // Whether system data (entryPoint, maxLevel) needs persist
|
|
31
37
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
32
38
|
this.distanceFunction = distanceFunction;
|
|
33
39
|
this.useParallelization =
|
|
@@ -35,6 +41,7 @@ export class HNSWIndex {
|
|
|
35
41
|
? options.useParallelization
|
|
36
42
|
: true;
|
|
37
43
|
this.storage = options.storage || null;
|
|
44
|
+
this.persistMode = options.persistMode || 'immediate';
|
|
38
45
|
// Use SAME UnifiedCache as Graph and Metadata for fair memory competition
|
|
39
46
|
this.unifiedCache = getGlobalCache();
|
|
40
47
|
}
|
|
@@ -50,6 +57,82 @@ export class HNSWIndex {
|
|
|
50
57
|
getUseParallelization() {
|
|
51
58
|
return this.useParallelization;
|
|
52
59
|
}
|
|
60
|
+
/**
|
|
61
|
+
* v6.2.8: Flush dirty HNSW data to storage
|
|
62
|
+
*
|
|
63
|
+
* In deferred persistence mode, HNSW connections are tracked as dirty but not
|
|
64
|
+
* immediately persisted. Call flush() to persist all pending changes.
|
|
65
|
+
*
|
|
66
|
+
* This is automatically called by:
|
|
67
|
+
* - brain.close()
|
|
68
|
+
* - brain.flush()
|
|
69
|
+
* - Process shutdown (SIGTERM/SIGINT)
|
|
70
|
+
*
|
|
71
|
+
* @returns Number of nodes flushed
|
|
72
|
+
*/
|
|
73
|
+
async flush() {
|
|
74
|
+
if (!this.storage) {
|
|
75
|
+
return 0;
|
|
76
|
+
}
|
|
77
|
+
if (this.dirtyNodes.size === 0 && !this.dirtySystem) {
|
|
78
|
+
return 0;
|
|
79
|
+
}
|
|
80
|
+
const startTime = Date.now();
|
|
81
|
+
const nodeCount = this.dirtyNodes.size;
|
|
82
|
+
// Batch persist all dirty nodes concurrently
|
|
83
|
+
if (this.dirtyNodes.size > 0) {
|
|
84
|
+
const batchSize = 50; // Reasonable batch size for cloud storage
|
|
85
|
+
const nodeIds = Array.from(this.dirtyNodes);
|
|
86
|
+
for (let i = 0; i < nodeIds.length; i += batchSize) {
|
|
87
|
+
const batch = nodeIds.slice(i, i + batchSize);
|
|
88
|
+
const promises = batch.map(nodeId => {
|
|
89
|
+
const noun = this.nouns.get(nodeId);
|
|
90
|
+
if (!noun)
|
|
91
|
+
return Promise.resolve(); // Node was deleted
|
|
92
|
+
const connectionsObj = {};
|
|
93
|
+
for (const [level, nounIds] of noun.connections.entries()) {
|
|
94
|
+
connectionsObj[level.toString()] = Array.from(nounIds);
|
|
95
|
+
}
|
|
96
|
+
return this.storage.saveHNSWData(nodeId, {
|
|
97
|
+
level: noun.level,
|
|
98
|
+
connections: connectionsObj
|
|
99
|
+
}).catch(error => {
|
|
100
|
+
console.error(`[HNSW flush] Failed to persist node ${nodeId}:`, error);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
await Promise.allSettled(promises);
|
|
104
|
+
}
|
|
105
|
+
this.dirtyNodes.clear();
|
|
106
|
+
}
|
|
107
|
+
// Persist system data if dirty
|
|
108
|
+
if (this.dirtySystem) {
|
|
109
|
+
await this.storage.saveHNSWSystem({
|
|
110
|
+
entryPointId: this.entryPointId,
|
|
111
|
+
maxLevel: this.maxLevel
|
|
112
|
+
}).catch(error => {
|
|
113
|
+
console.error('[HNSW flush] Failed to persist system data:', error);
|
|
114
|
+
});
|
|
115
|
+
this.dirtySystem = false;
|
|
116
|
+
}
|
|
117
|
+
const duration = Date.now() - startTime;
|
|
118
|
+
if (nodeCount > 0) {
|
|
119
|
+
prodLog.info(`[HNSW] Flushed ${nodeCount} dirty nodes in ${duration}ms`);
|
|
120
|
+
}
|
|
121
|
+
return nodeCount;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Get the number of dirty (unpersisted) nodes
|
|
125
|
+
* Useful for monitoring and debugging
|
|
126
|
+
*/
|
|
127
|
+
getDirtyNodeCount() {
|
|
128
|
+
return this.dirtyNodes.size;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Get the current persist mode
|
|
132
|
+
*/
|
|
133
|
+
getPersistMode() {
|
|
134
|
+
return this.persistMode;
|
|
135
|
+
}
|
|
53
136
|
/**
|
|
54
137
|
* Enable COW (Copy-on-Write) mode - Instant fork via shallow copy
|
|
55
138
|
*
|
|
@@ -284,14 +367,11 @@ export class HNSWIndex {
|
|
|
284
367
|
}
|
|
285
368
|
// Persist updated neighbor HNSW data (v3.35.0+)
|
|
286
369
|
//
|
|
287
|
-
//
|
|
288
|
-
//
|
|
289
|
-
//
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
// - Mutex serialization (Memory/OPFS/FileSystem)
|
|
293
|
-
// Trade-off: More retry activity under high contention (expected and handled)
|
|
294
|
-
if (this.storage) {
|
|
370
|
+
// v6.2.8: Deferred persistence mode for cloud storage performance
|
|
371
|
+
// In deferred mode, we track dirty nodes instead of persisting immediately
|
|
372
|
+
// This reduces GCS operations from 70 to 2-3 per add() (30-50× faster)
|
|
373
|
+
if (this.storage && this.persistMode === 'immediate') {
|
|
374
|
+
// IMMEDIATE MODE: Original behavior - persist each neighbor update
|
|
295
375
|
const neighborConnectionsObj = {};
|
|
296
376
|
for (const [lvl, nounIds] of neighbor.connections.entries()) {
|
|
297
377
|
neighborConnectionsObj[lvl.toString()] = Array.from(nounIds);
|
|
@@ -304,9 +384,13 @@ export class HNSWIndex {
|
|
|
304
384
|
})
|
|
305
385
|
});
|
|
306
386
|
}
|
|
387
|
+
else if (this.persistMode === 'deferred') {
|
|
388
|
+
// DEFERRED MODE: Track dirty nodes for later batch persistence
|
|
389
|
+
this.dirtyNodes.add(neighborId);
|
|
390
|
+
}
|
|
307
391
|
}
|
|
308
|
-
// Execute all neighbor updates concurrently (
|
|
309
|
-
if (neighborUpdates.length > 0) {
|
|
392
|
+
// Execute all neighbor updates concurrently (only in immediate mode)
|
|
393
|
+
if (neighborUpdates.length > 0 && this.persistMode === 'immediate') {
|
|
310
394
|
const batchSize = this.config.maxConcurrentNeighborWrites || neighborUpdates.length;
|
|
311
395
|
const allFailures = [];
|
|
312
396
|
// Process in chunks if batch size specified
|
|
@@ -360,8 +444,9 @@ export class HNSWIndex {
|
|
|
360
444
|
this.highLevelNodes.get(nounLevel).add(id);
|
|
361
445
|
}
|
|
362
446
|
// Persist HNSW graph data to storage (v3.35.0+)
|
|
363
|
-
|
|
364
|
-
|
|
447
|
+
// v6.2.8: Respect persistMode setting
|
|
448
|
+
if (this.storage && this.persistMode === 'immediate') {
|
|
449
|
+
// IMMEDIATE MODE: Original behavior - persist new entity and system data
|
|
365
450
|
const connectionsObj = {};
|
|
366
451
|
for (const [level, nounIds] of noun.connections.entries()) {
|
|
367
452
|
connectionsObj[level.toString()] = Array.from(nounIds);
|
|
@@ -380,6 +465,11 @@ export class HNSWIndex {
|
|
|
380
465
|
console.error('Failed to persist HNSW system data:', error);
|
|
381
466
|
});
|
|
382
467
|
}
|
|
468
|
+
else if (this.persistMode === 'deferred') {
|
|
469
|
+
// DEFERRED MODE: Track dirty nodes for later batch persistence
|
|
470
|
+
this.dirtyNodes.add(id);
|
|
471
|
+
this.dirtySystem = true;
|
|
472
|
+
}
|
|
383
473
|
return id;
|
|
384
474
|
}
|
|
385
475
|
/**
|
|
@@ -43,16 +43,18 @@ export declare class TypeAwareHNSWIndex {
|
|
|
43
43
|
private distanceFunction;
|
|
44
44
|
private storage;
|
|
45
45
|
private useParallelization;
|
|
46
|
+
private persistMode;
|
|
46
47
|
/**
|
|
47
48
|
* Create a new TypeAwareHNSWIndex
|
|
48
49
|
*
|
|
49
50
|
* @param config HNSW configuration (M, efConstruction, efSearch, ml)
|
|
50
51
|
* @param distanceFunction Distance function (default: euclidean)
|
|
51
|
-
* @param options Additional options (storage, parallelization)
|
|
52
|
+
* @param options Additional options (storage, parallelization, persistMode)
|
|
52
53
|
*/
|
|
53
54
|
constructor(config?: Partial<HNSWConfig>, distanceFunction?: DistanceFunction, options?: {
|
|
54
55
|
useParallelization?: boolean;
|
|
55
56
|
storage?: BaseStorage;
|
|
57
|
+
persistMode?: 'immediate' | 'deferred';
|
|
56
58
|
});
|
|
57
59
|
/**
|
|
58
60
|
* Enable COW (Copy-on-Write) mode - Instant fork via shallow copy
|
|
@@ -63,6 +65,24 @@ export declare class TypeAwareHNSWIndex {
|
|
|
63
65
|
* @param parent - Parent TypeAwareHNSWIndex to copy from
|
|
64
66
|
*/
|
|
65
67
|
enableCOW(parent: TypeAwareHNSWIndex): void;
|
|
68
|
+
/**
|
|
69
|
+
* v6.2.8: Flush dirty HNSW data to storage for all type-specific indexes
|
|
70
|
+
*
|
|
71
|
+
* In deferred persistence mode, HNSW connections are tracked as dirty but not
|
|
72
|
+
* immediately persisted. Call flush() to persist all pending changes across
|
|
73
|
+
* all type-specific indexes.
|
|
74
|
+
*
|
|
75
|
+
* @returns Total number of nodes flushed across all indexes
|
|
76
|
+
*/
|
|
77
|
+
flush(): Promise<number>;
|
|
78
|
+
/**
|
|
79
|
+
* Get the total number of dirty (unpersisted) nodes across all type-specific indexes
|
|
80
|
+
*/
|
|
81
|
+
getDirtyNodeCount(): number;
|
|
82
|
+
/**
|
|
83
|
+
* Get the current persist mode
|
|
84
|
+
*/
|
|
85
|
+
getPersistMode(): 'immediate' | 'deferred';
|
|
66
86
|
/**
|
|
67
87
|
* Get or create HNSW index for a specific type (lazy initialization)
|
|
68
88
|
*
|
|
@@ -35,7 +35,7 @@ export class TypeAwareHNSWIndex {
|
|
|
35
35
|
*
|
|
36
36
|
* @param config HNSW configuration (M, efConstruction, efSearch, ml)
|
|
37
37
|
* @param distanceFunction Distance function (default: euclidean)
|
|
38
|
-
* @param options Additional options (storage, parallelization)
|
|
38
|
+
* @param options Additional options (storage, parallelization, persistMode)
|
|
39
39
|
*/
|
|
40
40
|
constructor(config = {}, distanceFunction = euclideanDistance, options = {}) {
|
|
41
41
|
// One HNSW index per noun type (lazy initialization)
|
|
@@ -47,6 +47,7 @@ export class TypeAwareHNSWIndex {
|
|
|
47
47
|
options.useParallelization !== undefined
|
|
48
48
|
? options.useParallelization
|
|
49
49
|
: true;
|
|
50
|
+
this.persistMode = options.persistMode || 'immediate';
|
|
50
51
|
prodLog.info('TypeAwareHNSWIndex initialized (Phase 2: Type-Aware HNSW)');
|
|
51
52
|
}
|
|
52
53
|
/**
|
|
@@ -64,13 +65,51 @@ export class TypeAwareHNSWIndex {
|
|
|
64
65
|
for (const [type, parentIndex] of parent.indexes.entries()) {
|
|
65
66
|
const childIndex = new HNSWIndex(this.config, this.distanceFunction, {
|
|
66
67
|
useParallelization: this.useParallelization,
|
|
67
|
-
storage: this.storage || undefined
|
|
68
|
+
storage: this.storage || undefined,
|
|
69
|
+
persistMode: this.persistMode
|
|
68
70
|
});
|
|
69
71
|
childIndex.enableCOW(parentIndex);
|
|
70
72
|
this.indexes.set(type, childIndex);
|
|
71
73
|
}
|
|
72
74
|
prodLog.info(`TypeAwareHNSWIndex COW enabled: ${parent.indexes.size} type-specific indexes shallow copied`);
|
|
73
75
|
}
|
|
76
|
+
/**
|
|
77
|
+
* v6.2.8: Flush dirty HNSW data to storage for all type-specific indexes
|
|
78
|
+
*
|
|
79
|
+
* In deferred persistence mode, HNSW connections are tracked as dirty but not
|
|
80
|
+
* immediately persisted. Call flush() to persist all pending changes across
|
|
81
|
+
* all type-specific indexes.
|
|
82
|
+
*
|
|
83
|
+
* @returns Total number of nodes flushed across all indexes
|
|
84
|
+
*/
|
|
85
|
+
async flush() {
|
|
86
|
+
if (this.indexes.size === 0) {
|
|
87
|
+
return 0;
|
|
88
|
+
}
|
|
89
|
+
const flushPromises = Array.from(this.indexes.values()).map(index => index.flush());
|
|
90
|
+
const results = await Promise.all(flushPromises);
|
|
91
|
+
const totalFlushed = results.reduce((sum, count) => sum + count, 0);
|
|
92
|
+
if (totalFlushed > 0) {
|
|
93
|
+
prodLog.info(`[TypeAwareHNSW] Flushed ${totalFlushed} dirty nodes across ${this.indexes.size} type indexes`);
|
|
94
|
+
}
|
|
95
|
+
return totalFlushed;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Get the total number of dirty (unpersisted) nodes across all type-specific indexes
|
|
99
|
+
*/
|
|
100
|
+
getDirtyNodeCount() {
|
|
101
|
+
let total = 0;
|
|
102
|
+
for (const index of this.indexes.values()) {
|
|
103
|
+
total += index.getDirtyNodeCount();
|
|
104
|
+
}
|
|
105
|
+
return total;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Get the current persist mode
|
|
109
|
+
*/
|
|
110
|
+
getPersistMode() {
|
|
111
|
+
return this.persistMode;
|
|
112
|
+
}
|
|
74
113
|
/**
|
|
75
114
|
* Get or create HNSW index for a specific type (lazy initialization)
|
|
76
115
|
*
|
|
@@ -90,7 +129,8 @@ export class TypeAwareHNSWIndex {
|
|
|
90
129
|
prodLog.info(`Creating HNSW index for type: ${type}`);
|
|
91
130
|
const index = new HNSWIndex(this.config, this.distanceFunction, {
|
|
92
131
|
useParallelization: this.useParallelization,
|
|
93
|
-
storage: this.storage || undefined
|
|
132
|
+
storage: this.storage || undefined,
|
|
133
|
+
persistMode: this.persistMode
|
|
94
134
|
});
|
|
95
135
|
this.indexes.set(type, index);
|
|
96
136
|
}
|
|
@@ -1276,6 +1276,42 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1276
1276
|
nextCursor
|
|
1277
1277
|
};
|
|
1278
1278
|
}
|
|
1279
|
+
// v6.2.9: Fast path for SINGLE sourceId + verbType combo (common VFS pattern)
|
|
1280
|
+
// This avoids the slow type-iteration fallback for VFS operations
|
|
1281
|
+
// NOTE: Only use fast path for single sourceId to avoid incomplete results
|
|
1282
|
+
const isSingleSourceId = options.filter.sourceId &&
|
|
1283
|
+
!Array.isArray(options.filter.sourceId);
|
|
1284
|
+
if (isSingleSourceId &&
|
|
1285
|
+
options.filter.verbType &&
|
|
1286
|
+
!options.filter.targetId &&
|
|
1287
|
+
!options.filter.service &&
|
|
1288
|
+
!options.filter.metadata) {
|
|
1289
|
+
const sourceId = options.filter.sourceId;
|
|
1290
|
+
const verbTypes = Array.isArray(options.filter.verbType)
|
|
1291
|
+
? options.filter.verbType
|
|
1292
|
+
: [options.filter.verbType];
|
|
1293
|
+
prodLog.debug(`[BaseStorage] getVerbs: Using fast path for sourceId=${sourceId}, verbTypes=${verbTypes.join(',')}`);
|
|
1294
|
+
// Get verbs by source (uses GraphAdjacencyIndex if available)
|
|
1295
|
+
const verbsBySource = await this.getVerbsBySource_internal(sourceId);
|
|
1296
|
+
// Filter by verbType in memory (fast - usually small number of verbs per source)
|
|
1297
|
+
const filtered = verbsBySource.filter(v => verbTypes.includes(v.verb));
|
|
1298
|
+
// Apply pagination
|
|
1299
|
+
const paginatedVerbs = filtered.slice(offset, offset + limit);
|
|
1300
|
+
const hasMore = offset + limit < filtered.length;
|
|
1301
|
+
// Set next cursor if there are more items
|
|
1302
|
+
let nextCursor = undefined;
|
|
1303
|
+
if (hasMore && paginatedVerbs.length > 0) {
|
|
1304
|
+
const lastItem = paginatedVerbs[paginatedVerbs.length - 1];
|
|
1305
|
+
nextCursor = lastItem.id;
|
|
1306
|
+
}
|
|
1307
|
+
prodLog.debug(`[BaseStorage] getVerbs: Fast path returned ${filtered.length} verbs (${paginatedVerbs.length} after pagination)`);
|
|
1308
|
+
return {
|
|
1309
|
+
items: paginatedVerbs,
|
|
1310
|
+
totalCount: filtered.length,
|
|
1311
|
+
hasMore,
|
|
1312
|
+
nextCursor
|
|
1313
|
+
};
|
|
1314
|
+
}
|
|
1279
1315
|
}
|
|
1280
1316
|
// For more complex filtering or no filtering, use a paginated approach
|
|
1281
1317
|
// that avoids loading all verbs into memory at once
|
|
@@ -1336,14 +1372,32 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1336
1372
|
// Only use type-skipping optimization if counts are non-zero (reliable)
|
|
1337
1373
|
const totalVerbCountFromArray = this.verbCountsByType.reduce((sum, c) => sum + c, 0);
|
|
1338
1374
|
const useOptimization = totalVerbCountFromArray > 0;
|
|
1375
|
+
// v6.2.9 BUG FIX: Pre-compute requested verb types to avoid skipping them
|
|
1376
|
+
// When a specific verbType filter is provided, we MUST check that type
|
|
1377
|
+
// even if verbCountsByType shows 0 (counts can be stale after restart)
|
|
1378
|
+
const requestedVerbTypes = options?.filter?.verbType;
|
|
1379
|
+
const requestedVerbTypesSet = requestedVerbTypes
|
|
1380
|
+
? new Set(Array.isArray(requestedVerbTypes) ? requestedVerbTypes : [requestedVerbTypes])
|
|
1381
|
+
: null;
|
|
1339
1382
|
// Iterate through all 127 verb types (Stage 3 CANONICAL) with early termination
|
|
1340
1383
|
// OPTIMIZATION: Skip types with zero count (only if counts are reliable)
|
|
1341
1384
|
for (let i = 0; i < VERB_TYPE_COUNT && collectedVerbs.length < targetCount; i++) {
|
|
1342
|
-
|
|
1343
|
-
|
|
1385
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
1386
|
+
// v6.2.9 FIX: Never skip a type that's explicitly requested in the filter
|
|
1387
|
+
// This fixes VFS bug where Contains relationships were skipped after restart
|
|
1388
|
+
// when verbCountsByType[Contains] was 0 due to stale statistics
|
|
1389
|
+
const isRequestedType = requestedVerbTypesSet?.has(type) ?? false;
|
|
1390
|
+
const countIsZero = this.verbCountsByType[i] === 0;
|
|
1391
|
+
// Skip empty types for performance (but only if optimization is enabled AND not requested)
|
|
1392
|
+
if (useOptimization && countIsZero && !isRequestedType) {
|
|
1344
1393
|
continue;
|
|
1345
1394
|
}
|
|
1346
|
-
|
|
1395
|
+
// v6.2.9: Log when we DON'T skip a requested type that would have been skipped
|
|
1396
|
+
// This helps diagnose stale statistics issues in production
|
|
1397
|
+
if (useOptimization && countIsZero && isRequestedType) {
|
|
1398
|
+
prodLog.debug(`[BaseStorage] getVerbs: NOT skipping type=${type} despite count=0 (type was explicitly requested). ` +
|
|
1399
|
+
`Statistics may be stale - consider running rebuildTypeCounts().`);
|
|
1400
|
+
}
|
|
1347
1401
|
try {
|
|
1348
1402
|
const verbsOfType = await this.getVerbsByType_internal(type);
|
|
1349
1403
|
// Apply filtering inline (memory efficient)
|
|
@@ -2169,8 +2223,11 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
2169
2223
|
this.nounCountsByType[typeIndex]++;
|
|
2170
2224
|
// COW-aware write (v5.0.1): Use COW helper for branch isolation
|
|
2171
2225
|
await this.writeObjectToBranch(path, noun);
|
|
2172
|
-
// Periodically save statistics
|
|
2173
|
-
|
|
2226
|
+
// Periodically save statistics
|
|
2227
|
+
// v6.2.9: Also save on first noun of each type to ensure low-count types are tracked
|
|
2228
|
+
const shouldSave = this.nounCountsByType[typeIndex] === 1 || // First noun of type
|
|
2229
|
+
this.nounCountsByType[typeIndex] % 100 === 0; // Every 100th
|
|
2230
|
+
if (shouldSave) {
|
|
2174
2231
|
await this.saveTypeStatistics();
|
|
2175
2232
|
}
|
|
2176
2233
|
}
|
|
@@ -2277,7 +2334,11 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
2277
2334
|
prodLog.warn(`[BaseStorage] graphIndex is null, cannot update index for verb ${verb.id}`);
|
|
2278
2335
|
}
|
|
2279
2336
|
// Periodically save statistics
|
|
2280
|
-
|
|
2337
|
+
// v6.2.9: Also save on first verb of each type to ensure low-count types are tracked
|
|
2338
|
+
// This prevents stale statistics after restart for types with < 100 verbs (common for VFS)
|
|
2339
|
+
const shouldSave = this.verbCountsByType[typeIndex] === 1 || // First verb of type
|
|
2340
|
+
this.verbCountsByType[typeIndex] % 100 === 0; // Every 100th
|
|
2341
|
+
if (shouldSave) {
|
|
2281
2342
|
await this.saveTypeStatistics();
|
|
2282
2343
|
}
|
|
2283
2344
|
}
|
|
@@ -552,6 +552,7 @@ export interface BrainyConfig {
|
|
|
552
552
|
disableAutoOptimize?: boolean;
|
|
553
553
|
batchWrites?: boolean;
|
|
554
554
|
maxConcurrentOperations?: number;
|
|
555
|
+
hnswPersistMode?: 'immediate' | 'deferred';
|
|
555
556
|
maxQueryLimit?: number;
|
|
556
557
|
reservedQueryMemory?: number;
|
|
557
558
|
verbose?: boolean;
|
|
@@ -92,6 +92,13 @@ export declare class UnifiedCache {
|
|
|
92
92
|
* Delete specific item from cache
|
|
93
93
|
*/
|
|
94
94
|
delete(key: string): boolean;
|
|
95
|
+
/**
|
|
96
|
+
* Delete all items with keys starting with the given prefix
|
|
97
|
+
* v6.2.9: Added for VFS cache invalidation (fixes stale parent ID bug)
|
|
98
|
+
* @param prefix - The key prefix to match
|
|
99
|
+
* @returns Number of items deleted
|
|
100
|
+
*/
|
|
101
|
+
deleteByPrefix(prefix: string): number;
|
|
95
102
|
/**
|
|
96
103
|
* Clear cache or specific type
|
|
97
104
|
*/
|
|
@@ -304,6 +304,23 @@ export class UnifiedCache {
|
|
|
304
304
|
}
|
|
305
305
|
return false;
|
|
306
306
|
}
|
|
307
|
+
/**
|
|
308
|
+
* Delete all items with keys starting with the given prefix
|
|
309
|
+
* v6.2.9: Added for VFS cache invalidation (fixes stale parent ID bug)
|
|
310
|
+
* @param prefix - The key prefix to match
|
|
311
|
+
* @returns Number of items deleted
|
|
312
|
+
*/
|
|
313
|
+
deleteByPrefix(prefix) {
|
|
314
|
+
let deleted = 0;
|
|
315
|
+
for (const [key, item] of this.cache) {
|
|
316
|
+
if (key.startsWith(prefix)) {
|
|
317
|
+
this.currentSize -= item.size;
|
|
318
|
+
this.cache.delete(key);
|
|
319
|
+
deleted++;
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
return deleted;
|
|
323
|
+
}
|
|
307
324
|
/**
|
|
308
325
|
* Clear cache or specific type
|
|
309
326
|
*/
|
|
@@ -64,6 +64,8 @@ export declare class PathResolver {
|
|
|
64
64
|
createPath(path: string, entityId: string): Promise<void>;
|
|
65
65
|
/**
|
|
66
66
|
* Invalidate cache entries for a path and its children
|
|
67
|
+
* v6.2.9 FIX: Also invalidates UnifiedCache to prevent stale entity IDs
|
|
68
|
+
* This fixes the "Source entity not found" bug after delete+recreate operations
|
|
67
69
|
*/
|
|
68
70
|
invalidatePath(path: string, recursive?: boolean): void;
|
|
69
71
|
/**
|
package/dist/vfs/PathResolver.js
CHANGED
|
@@ -254,26 +254,38 @@ export class PathResolver {
|
|
|
254
254
|
}
|
|
255
255
|
/**
|
|
256
256
|
* Invalidate cache entries for a path and its children
|
|
257
|
+
* v6.2.9 FIX: Also invalidates UnifiedCache to prevent stale entity IDs
|
|
258
|
+
* This fixes the "Source entity not found" bug after delete+recreate operations
|
|
257
259
|
*/
|
|
258
260
|
invalidatePath(path, recursive = false) {
|
|
259
261
|
const normalizedPath = this.normalizePath(path);
|
|
260
|
-
//
|
|
262
|
+
// v6.2.9 FIX: Clear parent cache BEFORE deleting from pathCache
|
|
263
|
+
// (we need the entityId from the cache entry)
|
|
264
|
+
const cached = this.pathCache.get(normalizedPath);
|
|
265
|
+
if (cached) {
|
|
266
|
+
this.parentCache.delete(cached.entityId);
|
|
267
|
+
}
|
|
268
|
+
// Remove from local caches
|
|
261
269
|
this.pathCache.delete(normalizedPath);
|
|
262
270
|
this.hotPaths.delete(normalizedPath);
|
|
271
|
+
// v6.2.9 CRITICAL FIX: Also invalidate UnifiedCache (global LRU cache)
|
|
272
|
+
// This was missing before, causing stale entity IDs to be returned after delete
|
|
273
|
+
const cacheKey = `vfs:path:${normalizedPath}`;
|
|
274
|
+
getGlobalCache().delete(cacheKey);
|
|
263
275
|
if (recursive) {
|
|
264
276
|
// Remove all paths that start with this path
|
|
265
277
|
const prefix = normalizedPath.endsWith('/') ? normalizedPath : normalizedPath + '/';
|
|
266
|
-
for (const [cachedPath] of this.pathCache) {
|
|
278
|
+
for (const [cachedPath, entry] of this.pathCache) {
|
|
267
279
|
if (cachedPath.startsWith(prefix)) {
|
|
268
280
|
this.pathCache.delete(cachedPath);
|
|
269
281
|
this.hotPaths.delete(cachedPath);
|
|
282
|
+
// v6.2.9: Also clear parent cache for this entry
|
|
283
|
+
this.parentCache.delete(entry.entityId);
|
|
270
284
|
}
|
|
271
285
|
}
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
if (cached) {
|
|
276
|
-
this.parentCache.delete(cached.entityId);
|
|
286
|
+
// v6.2.9 CRITICAL FIX: Also invalidate UnifiedCache entries with this prefix
|
|
287
|
+
const globalCachePrefix = `vfs:path:${prefix}`;
|
|
288
|
+
getGlobalCache().deleteByPrefix(globalCachePrefix);
|
|
277
289
|
}
|
|
278
290
|
}
|
|
279
291
|
/**
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "6.2.
|
|
3
|
+
"version": "6.2.9",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|