@soulcraft/brainy 6.2.7 → 6.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/brainy.d.ts CHANGED
@@ -1795,6 +1795,10 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
1795
1795
  * - 87% memory reduction through separate graphs per entity type
1796
1796
  * - 10x faster type-specific queries
1797
1797
  * - Automatic type routing
1798
+ *
1799
+ * v6.2.8: Smart defaults for HNSW persistence mode
1800
+ * - Cloud storage (GCS/S3/R2/Azure): 'deferred' for 30-50× faster adds
1801
+ * - Local storage (FileSystem/Memory/OPFS): 'immediate' (already fast)
1798
1802
  */
1799
1803
  private setupIndex;
1800
1804
  /**
@@ -1846,6 +1850,9 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
1846
1850
  private rebuildIndexesIfNeeded;
1847
1851
  /**
1848
1852
  * Close and cleanup
1853
+ *
1854
+ * v6.2.8: Now flushes HNSW dirty nodes before closing
1855
+ * This ensures deferred persistence mode data is saved
1849
1856
  */
1850
1857
  close(): Promise<void>;
1851
1858
  /**
package/dist/brainy.js CHANGED
@@ -3974,20 +3974,35 @@ export class Brainy {
3974
3974
  * - 87% memory reduction through separate graphs per entity type
3975
3975
  * - 10x faster type-specific queries
3976
3976
  * - Automatic type routing
3977
+ *
3978
+ * v6.2.8: Smart defaults for HNSW persistence mode
3979
+ * - Cloud storage (GCS/S3/R2/Azure): 'deferred' for 30-50× faster adds
3980
+ * - Local storage (FileSystem/Memory/OPFS): 'immediate' (already fast)
3977
3981
  */
3978
3982
  setupIndex() {
3979
3983
  const indexConfig = {
3980
3984
  ...this.config.index,
3981
3985
  distanceFunction: this.distance
3982
3986
  };
3987
+ // v6.2.8: Determine persist mode (user config > smart default)
3988
+ let persistMode = this.config.hnswPersistMode || 'immediate';
3989
+ // Smart default: Use deferred mode for cloud storage adapters
3990
+ if (!this.config.hnswPersistMode) {
3991
+ const storageType = this.config.storage?.type || 'auto';
3992
+ const cloudStorageTypes = ['gcs', 's3', 'r2', 'azure'];
3993
+ if (cloudStorageTypes.includes(storageType)) {
3994
+ persistMode = 'deferred';
3995
+ }
3996
+ }
3983
3997
  // Phase 2: Use TypeAwareHNSWIndex for billion-scale optimization
3984
3998
  if (this.config.storage?.type !== 'memory') {
3985
3999
  return new TypeAwareHNSWIndex(indexConfig, this.distance, {
3986
4000
  storage: this.storage,
3987
- useParallelization: true
4001
+ useParallelization: true,
4002
+ persistMode
3988
4003
  });
3989
4004
  }
3990
- return new HNSWIndex(indexConfig);
4005
+ return new HNSWIndex(indexConfig, this.distance, { persistMode });
3991
4006
  }
3992
4007
  /**
3993
4008
  * Setup augmentations
@@ -4071,7 +4086,9 @@ export class Brainy {
4071
4086
  maxConcurrentOperations: config?.maxConcurrentOperations ?? 10,
4072
4087
  // Memory management options (v5.11.0)
4073
4088
  maxQueryLimit: config?.maxQueryLimit ?? undefined,
4074
- reservedQueryMemory: config?.reservedQueryMemory ?? undefined
4089
+ reservedQueryMemory: config?.reservedQueryMemory ?? undefined,
4090
+ // HNSW persistence mode (v6.2.8) - undefined = smart default in setupIndex
4091
+ hnswPersistMode: config?.hnswPersistMode ?? undefined
4075
4092
  };
4076
4093
  }
4077
4094
  /**
@@ -4241,8 +4258,16 @@ export class Brainy {
4241
4258
  }
4242
4259
  /**
4243
4260
  * Close and cleanup
4261
+ *
4262
+ * v6.2.8: Now flushes HNSW dirty nodes before closing
4263
+ * This ensures deferred persistence mode data is saved
4244
4264
  */
4245
4265
  async close() {
4266
+ // v6.2.8: Flush HNSW dirty nodes before closing
4267
+ // In deferred persistence mode, this persists all pending HNSW graph data
4268
+ if (this.index && typeof this.index.flush === 'function') {
4269
+ await this.index.flush();
4270
+ }
4246
4271
  // Shutdown augmentations
4247
4272
  const augs = this.augmentationRegistry.getAll();
4248
4273
  for (const aug of augs) {
@@ -19,9 +19,13 @@ export declare class HNSWIndex {
19
19
  private cowEnabled;
20
20
  private cowModifiedNodes;
21
21
  private cowParent;
22
+ private persistMode;
23
+ private dirtyNodes;
24
+ private dirtySystem;
22
25
  constructor(config?: Partial<HNSWConfig>, distanceFunction?: DistanceFunction, options?: {
23
26
  useParallelization?: boolean;
24
27
  storage?: BaseStorage;
28
+ persistMode?: 'immediate' | 'deferred';
25
29
  });
26
30
  /**
27
31
  * Set whether to use parallelization for performance-critical operations
@@ -31,6 +35,29 @@ export declare class HNSWIndex {
31
35
  * Get whether parallelization is enabled
32
36
  */
33
37
  getUseParallelization(): boolean;
38
+ /**
39
+ * v6.2.8: Flush dirty HNSW data to storage
40
+ *
41
+ * In deferred persistence mode, HNSW connections are tracked as dirty but not
42
+ * immediately persisted. Call flush() to persist all pending changes.
43
+ *
44
+ * This is automatically called by:
45
+ * - brain.close()
46
+ * - brain.flush()
47
+ * - Process shutdown (SIGTERM/SIGINT)
48
+ *
49
+ * @returns Number of nodes flushed
50
+ */
51
+ flush(): Promise<number>;
52
+ /**
53
+ * Get the number of dirty (unpersisted) nodes
54
+ * Useful for monitoring and debugging
55
+ */
56
+ getDirtyNodeCount(): number;
57
+ /**
58
+ * Get the current persist mode
59
+ */
60
+ getPersistMode(): 'immediate' | 'deferred';
34
61
  /**
35
62
  * Enable COW (Copy-on-Write) mode - Instant fork via shallow copy
36
63
  *
@@ -28,6 +28,12 @@ export class HNSWIndex {
28
28
  this.cowEnabled = false;
29
29
  this.cowModifiedNodes = new Set();
30
30
  this.cowParent = null;
31
+ // v6.2.8: Deferred HNSW persistence for cloud storage performance
32
+ // In deferred mode, HNSW connections are only persisted on flush/close
33
+ // This reduces GCS operations from 70 to 2-3 per add() (30-50× faster)
34
+ this.persistMode = 'immediate';
35
+ this.dirtyNodes = new Set(); // Nodes with unpersisted HNSW data
36
+ this.dirtySystem = false; // Whether system data (entryPoint, maxLevel) needs persist
31
37
  this.config = { ...DEFAULT_CONFIG, ...config };
32
38
  this.distanceFunction = distanceFunction;
33
39
  this.useParallelization =
@@ -35,6 +41,7 @@ export class HNSWIndex {
35
41
  ? options.useParallelization
36
42
  : true;
37
43
  this.storage = options.storage || null;
44
+ this.persistMode = options.persistMode || 'immediate';
38
45
  // Use SAME UnifiedCache as Graph and Metadata for fair memory competition
39
46
  this.unifiedCache = getGlobalCache();
40
47
  }
@@ -50,6 +57,82 @@ export class HNSWIndex {
50
57
  getUseParallelization() {
51
58
  return this.useParallelization;
52
59
  }
60
+ /**
61
+ * v6.2.8: Flush dirty HNSW data to storage
62
+ *
63
+ * In deferred persistence mode, HNSW connections are tracked as dirty but not
64
+ * immediately persisted. Call flush() to persist all pending changes.
65
+ *
66
+ * This is automatically called by:
67
+ * - brain.close()
68
+ * - brain.flush()
69
+ * - Process shutdown (SIGTERM/SIGINT)
70
+ *
71
+ * @returns Number of nodes flushed
72
+ */
73
+ async flush() {
74
+ if (!this.storage) {
75
+ return 0;
76
+ }
77
+ if (this.dirtyNodes.size === 0 && !this.dirtySystem) {
78
+ return 0;
79
+ }
80
+ const startTime = Date.now();
81
+ const nodeCount = this.dirtyNodes.size;
82
+ // Batch persist all dirty nodes concurrently
83
+ if (this.dirtyNodes.size > 0) {
84
+ const batchSize = 50; // Reasonable batch size for cloud storage
85
+ const nodeIds = Array.from(this.dirtyNodes);
86
+ for (let i = 0; i < nodeIds.length; i += batchSize) {
87
+ const batch = nodeIds.slice(i, i + batchSize);
88
+ const promises = batch.map(nodeId => {
89
+ const noun = this.nouns.get(nodeId);
90
+ if (!noun)
91
+ return Promise.resolve(); // Node was deleted
92
+ const connectionsObj = {};
93
+ for (const [level, nounIds] of noun.connections.entries()) {
94
+ connectionsObj[level.toString()] = Array.from(nounIds);
95
+ }
96
+ return this.storage.saveHNSWData(nodeId, {
97
+ level: noun.level,
98
+ connections: connectionsObj
99
+ }).catch(error => {
100
+ console.error(`[HNSW flush] Failed to persist node ${nodeId}:`, error);
101
+ });
102
+ });
103
+ await Promise.allSettled(promises);
104
+ }
105
+ this.dirtyNodes.clear();
106
+ }
107
+ // Persist system data if dirty
108
+ if (this.dirtySystem) {
109
+ await this.storage.saveHNSWSystem({
110
+ entryPointId: this.entryPointId,
111
+ maxLevel: this.maxLevel
112
+ }).catch(error => {
113
+ console.error('[HNSW flush] Failed to persist system data:', error);
114
+ });
115
+ this.dirtySystem = false;
116
+ }
117
+ const duration = Date.now() - startTime;
118
+ if (nodeCount > 0) {
119
+ prodLog.info(`[HNSW] Flushed ${nodeCount} dirty nodes in ${duration}ms`);
120
+ }
121
+ return nodeCount;
122
+ }
123
+ /**
124
+ * Get the number of dirty (unpersisted) nodes
125
+ * Useful for monitoring and debugging
126
+ */
127
+ getDirtyNodeCount() {
128
+ return this.dirtyNodes.size;
129
+ }
130
+ /**
131
+ * Get the current persist mode
132
+ */
133
+ getPersistMode() {
134
+ return this.persistMode;
135
+ }
53
136
  /**
54
137
  * Enable COW (Copy-on-Write) mode - Instant fork via shallow copy
55
138
  *
@@ -284,14 +367,11 @@ export class HNSWIndex {
284
367
  }
285
368
  // Persist updated neighbor HNSW data (v3.35.0+)
286
369
  //
287
- // PERFORMANCE OPTIMIZATION (v4.10.0): Concurrent neighbor updates
288
- // Previously (v4.9.2): Serial await - 100% safe but 48-64× slower
289
- // Now: Promise.allSettled() - 48-64× faster bulk imports
290
- // Safety: All storage adapters handle concurrent writes via:
291
- // - Optimistic locking with retry (GCS/S3/Azure/R2)
292
- // - Mutex serialization (Memory/OPFS/FileSystem)
293
- // Trade-off: More retry activity under high contention (expected and handled)
294
- if (this.storage) {
370
+ // v6.2.8: Deferred persistence mode for cloud storage performance
371
+ // In deferred mode, we track dirty nodes instead of persisting immediately
372
+ // This reduces GCS operations from 70 to 2-3 per add() (30-50× faster)
373
+ if (this.storage && this.persistMode === 'immediate') {
374
+ // IMMEDIATE MODE: Original behavior - persist each neighbor update
295
375
  const neighborConnectionsObj = {};
296
376
  for (const [lvl, nounIds] of neighbor.connections.entries()) {
297
377
  neighborConnectionsObj[lvl.toString()] = Array.from(nounIds);
@@ -304,9 +384,13 @@ export class HNSWIndex {
304
384
  })
305
385
  });
306
386
  }
387
+ else if (this.persistMode === 'deferred') {
388
+ // DEFERRED MODE: Track dirty nodes for later batch persistence
389
+ this.dirtyNodes.add(neighborId);
390
+ }
307
391
  }
308
- // Execute all neighbor updates concurrently (with optional batch size limiting)
309
- if (neighborUpdates.length > 0) {
392
+ // Execute all neighbor updates concurrently (only in immediate mode)
393
+ if (neighborUpdates.length > 0 && this.persistMode === 'immediate') {
310
394
  const batchSize = this.config.maxConcurrentNeighborWrites || neighborUpdates.length;
311
395
  const allFailures = [];
312
396
  // Process in chunks if batch size specified
@@ -360,8 +444,9 @@ export class HNSWIndex {
360
444
  this.highLevelNodes.get(nounLevel).add(id);
361
445
  }
362
446
  // Persist HNSW graph data to storage (v3.35.0+)
363
- if (this.storage) {
364
- // Convert connections Map to serializable format
447
+ // v6.2.8: Respect persistMode setting
448
+ if (this.storage && this.persistMode === 'immediate') {
449
+ // IMMEDIATE MODE: Original behavior - persist new entity and system data
365
450
  const connectionsObj = {};
366
451
  for (const [level, nounIds] of noun.connections.entries()) {
367
452
  connectionsObj[level.toString()] = Array.from(nounIds);
@@ -380,6 +465,11 @@ export class HNSWIndex {
380
465
  console.error('Failed to persist HNSW system data:', error);
381
466
  });
382
467
  }
468
+ else if (this.persistMode === 'deferred') {
469
+ // DEFERRED MODE: Track dirty nodes for later batch persistence
470
+ this.dirtyNodes.add(id);
471
+ this.dirtySystem = true;
472
+ }
383
473
  return id;
384
474
  }
385
475
  /**
@@ -43,16 +43,18 @@ export declare class TypeAwareHNSWIndex {
43
43
  private distanceFunction;
44
44
  private storage;
45
45
  private useParallelization;
46
+ private persistMode;
46
47
  /**
47
48
  * Create a new TypeAwareHNSWIndex
48
49
  *
49
50
  * @param config HNSW configuration (M, efConstruction, efSearch, ml)
50
51
  * @param distanceFunction Distance function (default: euclidean)
51
- * @param options Additional options (storage, parallelization)
52
+ * @param options Additional options (storage, parallelization, persistMode)
52
53
  */
53
54
  constructor(config?: Partial<HNSWConfig>, distanceFunction?: DistanceFunction, options?: {
54
55
  useParallelization?: boolean;
55
56
  storage?: BaseStorage;
57
+ persistMode?: 'immediate' | 'deferred';
56
58
  });
57
59
  /**
58
60
  * Enable COW (Copy-on-Write) mode - Instant fork via shallow copy
@@ -63,6 +65,24 @@ export declare class TypeAwareHNSWIndex {
63
65
  * @param parent - Parent TypeAwareHNSWIndex to copy from
64
66
  */
65
67
  enableCOW(parent: TypeAwareHNSWIndex): void;
68
+ /**
69
+ * v6.2.8: Flush dirty HNSW data to storage for all type-specific indexes
70
+ *
71
+ * In deferred persistence mode, HNSW connections are tracked as dirty but not
72
+ * immediately persisted. Call flush() to persist all pending changes across
73
+ * all type-specific indexes.
74
+ *
75
+ * @returns Total number of nodes flushed across all indexes
76
+ */
77
+ flush(): Promise<number>;
78
+ /**
79
+ * Get the total number of dirty (unpersisted) nodes across all type-specific indexes
80
+ */
81
+ getDirtyNodeCount(): number;
82
+ /**
83
+ * Get the current persist mode
84
+ */
85
+ getPersistMode(): 'immediate' | 'deferred';
66
86
  /**
67
87
  * Get or create HNSW index for a specific type (lazy initialization)
68
88
  *
@@ -35,7 +35,7 @@ export class TypeAwareHNSWIndex {
35
35
  *
36
36
  * @param config HNSW configuration (M, efConstruction, efSearch, ml)
37
37
  * @param distanceFunction Distance function (default: euclidean)
38
- * @param options Additional options (storage, parallelization)
38
+ * @param options Additional options (storage, parallelization, persistMode)
39
39
  */
40
40
  constructor(config = {}, distanceFunction = euclideanDistance, options = {}) {
41
41
  // One HNSW index per noun type (lazy initialization)
@@ -47,6 +47,7 @@ export class TypeAwareHNSWIndex {
47
47
  options.useParallelization !== undefined
48
48
  ? options.useParallelization
49
49
  : true;
50
+ this.persistMode = options.persistMode || 'immediate';
50
51
  prodLog.info('TypeAwareHNSWIndex initialized (Phase 2: Type-Aware HNSW)');
51
52
  }
52
53
  /**
@@ -64,13 +65,51 @@ export class TypeAwareHNSWIndex {
64
65
  for (const [type, parentIndex] of parent.indexes.entries()) {
65
66
  const childIndex = new HNSWIndex(this.config, this.distanceFunction, {
66
67
  useParallelization: this.useParallelization,
67
- storage: this.storage || undefined
68
+ storage: this.storage || undefined,
69
+ persistMode: this.persistMode
68
70
  });
69
71
  childIndex.enableCOW(parentIndex);
70
72
  this.indexes.set(type, childIndex);
71
73
  }
72
74
  prodLog.info(`TypeAwareHNSWIndex COW enabled: ${parent.indexes.size} type-specific indexes shallow copied`);
73
75
  }
76
+ /**
77
+ * v6.2.8: Flush dirty HNSW data to storage for all type-specific indexes
78
+ *
79
+ * In deferred persistence mode, HNSW connections are tracked as dirty but not
80
+ * immediately persisted. Call flush() to persist all pending changes across
81
+ * all type-specific indexes.
82
+ *
83
+ * @returns Total number of nodes flushed across all indexes
84
+ */
85
+ async flush() {
86
+ if (this.indexes.size === 0) {
87
+ return 0;
88
+ }
89
+ const flushPromises = Array.from(this.indexes.values()).map(index => index.flush());
90
+ const results = await Promise.all(flushPromises);
91
+ const totalFlushed = results.reduce((sum, count) => sum + count, 0);
92
+ if (totalFlushed > 0) {
93
+ prodLog.info(`[TypeAwareHNSW] Flushed ${totalFlushed} dirty nodes across ${this.indexes.size} type indexes`);
94
+ }
95
+ return totalFlushed;
96
+ }
97
+ /**
98
+ * Get the total number of dirty (unpersisted) nodes across all type-specific indexes
99
+ */
100
+ getDirtyNodeCount() {
101
+ let total = 0;
102
+ for (const index of this.indexes.values()) {
103
+ total += index.getDirtyNodeCount();
104
+ }
105
+ return total;
106
+ }
107
+ /**
108
+ * Get the current persist mode
109
+ */
110
+ getPersistMode() {
111
+ return this.persistMode;
112
+ }
74
113
  /**
75
114
  * Get or create HNSW index for a specific type (lazy initialization)
76
115
  *
@@ -90,7 +129,8 @@ export class TypeAwareHNSWIndex {
90
129
  prodLog.info(`Creating HNSW index for type: ${type}`);
91
130
  const index = new HNSWIndex(this.config, this.distanceFunction, {
92
131
  useParallelization: this.useParallelization,
93
- storage: this.storage || undefined
132
+ storage: this.storage || undefined,
133
+ persistMode: this.persistMode
94
134
  });
95
135
  this.indexes.set(type, index);
96
136
  }
@@ -552,6 +552,7 @@ export interface BrainyConfig {
552
552
  disableAutoOptimize?: boolean;
553
553
  batchWrites?: boolean;
554
554
  maxConcurrentOperations?: number;
555
+ hnswPersistMode?: 'immediate' | 'deferred';
555
556
  maxQueryLimit?: number;
556
557
  reservedQueryMemory?: number;
557
558
  verbose?: boolean;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "6.2.7",
3
+ "version": "6.2.8",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",