@soulcraft/brainy 3.34.0 → 3.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,186 @@
1
+ /**
2
+ * Unified Index Interface (v3.35.0+)
3
+ *
4
+ * Standardizes index lifecycle across all index types in Brainy.
5
+ * All indexes (HNSW Vector, Graph Adjacency, Metadata Field) implement this interface
6
+ * for consistent rebuild, clear, and stats operations.
7
+ *
8
+ * This enables:
9
+ * - Parallel index rebuilds during initialization
10
+ * - Consistent index management across the system
11
+ * - Easy addition of new index types
12
+ * - Unified monitoring and health checks
13
+ */
14
+ /**
15
+ * Index statistics returned by getStats()
16
+ */
17
+ export interface IndexStats {
18
+ /**
19
+ * Total number of items in the index
20
+ */
21
+ totalItems: number;
22
+ /**
23
+ * Estimated memory usage in bytes (optional)
24
+ */
25
+ memoryUsage?: number;
26
+ /**
27
+ * Timestamp of last rebuild (optional)
28
+ */
29
+ lastRebuilt?: number;
30
+ /**
31
+ * Index-specific statistics (optional)
32
+ * - HNSW: { maxLevel, entryPointId, levels, avgDegree }
33
+ * - Graph: { totalRelationships, verbTypes }
34
+ * - Metadata: { totalFields, totalEntries }
35
+ */
36
+ specifics?: Record<string, any>;
37
+ }
38
+ /**
39
+ * Progress callback for rebuild operations
40
+ * Reports current progress and total count
41
+ */
42
+ export type RebuildProgressCallback = (loaded: number, total: number) => void;
43
+ /**
44
+ * Rebuild options for index rebuilding
45
+ */
46
+ export interface RebuildOptions {
47
+ /**
48
+ * Lazy mode: Load structure only, data on-demand
49
+ * Saves memory at cost of first-access latency
50
+ * (HNSW: vectors loaded on-demand, Graph: relationships cached, Metadata: lazy field indexing)
51
+ */
52
+ lazy?: boolean;
53
+ /**
54
+ * Batch size for pagination during rebuild
55
+ * Default: 1000 (tune based on available memory)
56
+ */
57
+ batchSize?: number;
58
+ /**
59
+ * Progress callback for monitoring rebuild progress
60
+ * Called periodically with (loaded, total) counts
61
+ */
62
+ onProgress?: RebuildProgressCallback;
63
+ /**
64
+ * Force rebuild even if index appears populated
65
+ * Useful for repairing corrupted indexes
66
+ */
67
+ force?: boolean;
68
+ }
69
+ /**
70
+ * Unified Index Interface
71
+ *
72
+ * All indexes in Brainy implement this interface for consistent lifecycle management.
73
+ * This enables parallel rebuilds, unified monitoring, and standardized operations.
74
+ */
75
+ export interface IIndex {
76
+ /**
77
+ * Rebuild index from persisted storage
78
+ *
79
+ * Called during Brainy initialization when:
80
+ * - Container restarts and in-memory indexes are empty
81
+ * - Storage has persisted data but indexes need rebuilding
82
+ * - Force rebuild is requested
83
+ *
84
+ * Implementation must:
85
+ * - Clear existing in-memory state
86
+ * - Load data from storage using pagination
87
+ * - Restore index structure efficiently (O(N) preferred over O(N log N))
88
+ * - Handle millions of entities via batching
89
+ * - Support lazy loading for memory-constrained environments
90
+ * - Provide progress reporting for large datasets
91
+ * - Recover gracefully from partial failures
92
+ *
93
+ * @param options Rebuild options (lazy mode, batch size, progress callback, force)
94
+ * @returns Promise that resolves when rebuild is complete
95
+ * @throws Error if rebuild fails critically (should log warnings for partial failures)
96
+ */
97
+ rebuild(options?: RebuildOptions): Promise<void>;
98
+ /**
99
+ * Clear all in-memory index data
100
+ *
101
+ * Called when:
102
+ * - User explicitly calls brain.clear()
103
+ * - System needs to reset without rebuilding
104
+ * - Tests need clean state
105
+ *
106
+ * Implementation must:
107
+ * - Clear all in-memory data structures
108
+ * - Reset counters and statistics
109
+ * - NOT delete persisted storage data
110
+ * - Be idempotent (safe to call multiple times)
111
+ *
112
+ * Note: This is a memory-only operation. To delete persisted data,
113
+ * use storage.clear() instead.
114
+ */
115
+ clear(): void;
116
+ /**
117
+ * Get current index statistics
118
+ *
119
+ * Returns real-time statistics about the index state:
120
+ * - Total items indexed
121
+ * - Memory usage (if available)
122
+ * - Last rebuild timestamp
123
+ * - Index-specific metrics
124
+ *
125
+ * Used for:
126
+ * - Health monitoring
127
+ * - Determining if rebuild is needed
128
+ * - Performance analysis
129
+ * - Debugging
130
+ *
131
+ * @returns Promise that resolves to index statistics
132
+ */
133
+ getStats(): Promise<IndexStats>;
134
+ /**
135
+ * Get the current size of the index
136
+ *
137
+ * Fast O(1) operation returning the number of items in the index.
138
+ * Used for quick health checks and deciding rebuild strategy.
139
+ *
140
+ * @returns Number of items in the index
141
+ */
142
+ size(): number;
143
+ }
144
+ /**
145
+ * Extended index interface with cache support (optional)
146
+ *
147
+ * Indexes can optionally implement cache integration for:
148
+ * - Hot/warm/cold tier management
149
+ * - Memory-efficient lazy loading
150
+ * - Adaptive caching based on access patterns
151
+ */
152
+ export interface ICachedIndex extends IIndex {
153
+ /**
154
+ * Set cache for resource management
155
+ *
156
+ * Enables the index to use UnifiedCache for:
157
+ * - Lazy loading of vectors/data
158
+ * - Hot/warm/cold tier management
159
+ * - Memory pressure handling
160
+ *
161
+ * @param cache UnifiedCache instance
162
+ */
163
+ setCache?(cache: any): void;
164
+ }
165
+ /**
166
+ * Extended index interface with persistence support (optional)
167
+ *
168
+ * Indexes can optionally implement explicit persistence:
169
+ * - Manual triggering of data saves
170
+ * - Batch write optimization
171
+ * - Checkpoint creation
172
+ */
173
+ export interface IPersistentIndex extends IIndex {
174
+ /**
175
+ * Manually persist current index state to storage
176
+ *
177
+ * Most indexes auto-persist during operations (e.g., HNSW persists on addItem).
178
+ * This method allows explicit persistence for:
179
+ * - Checkpointing before risky operations
180
+ * - Forced flush before shutdown
181
+ * - Manual backup creation
182
+ *
183
+ * @returns Promise that resolves when persistence is complete
184
+ */
185
+ persist?(): Promise<void>;
186
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Unified Index Interface (v3.35.0+)
3
+ *
4
+ * Standardizes index lifecycle across all index types in Brainy.
5
+ * All indexes (HNSW Vector, Graph Adjacency, Metadata Field) implement this interface
6
+ * for consistent rebuild, clear, and stats operations.
7
+ *
8
+ * This enables:
9
+ * - Parallel index rebuilds during initialization
10
+ * - Consistent index management across the system
11
+ * - Easy addition of new index types
12
+ * - Unified monitoring and health checks
13
+ */
14
+ export {};
15
+ //# sourceMappingURL=IIndex.js.map
@@ -23,6 +23,23 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
23
23
  abstract getNounMetadata(id: string): Promise<any | null>;
24
24
  abstract saveVerbMetadata(id: string, metadata: any): Promise<void>;
25
25
  abstract getVerbMetadata(id: string): Promise<any | null>;
26
+ abstract getNounVector(id: string): Promise<number[] | null>;
27
+ abstract saveHNSWData(nounId: string, hnswData: {
28
+ level: number;
29
+ connections: Record<string, string[]>;
30
+ }): Promise<void>;
31
+ abstract getHNSWData(nounId: string): Promise<{
32
+ level: number;
33
+ connections: Record<string, string[]>;
34
+ } | null>;
35
+ abstract saveHNSWSystem(systemData: {
36
+ entryPointId: string | null;
37
+ maxLevel: number;
38
+ }): Promise<void>;
39
+ abstract getHNSWSystem(): Promise<{
40
+ entryPointId: string | null;
41
+ maxLevel: number;
42
+ } | null>;
26
43
  abstract clear(): Promise<void>;
27
44
  abstract getStorageStatus(): Promise<{
28
45
  type: string;
@@ -361,5 +361,37 @@ export declare class FileSystemStorage extends BaseStorage {
361
361
  * Check if a file exists (handles both sharded and non-sharded)
362
362
  */
363
363
  private fileExists;
364
+ /**
365
+ * Get vector for a noun
366
+ */
367
+ getNounVector(id: string): Promise<number[] | null>;
368
+ /**
369
+ * Save HNSW graph data for a noun
370
+ */
371
+ saveHNSWData(nounId: string, hnswData: {
372
+ level: number;
373
+ connections: Record<string, string[]>;
374
+ }): Promise<void>;
375
+ /**
376
+ * Get HNSW graph data for a noun
377
+ */
378
+ getHNSWData(nounId: string): Promise<{
379
+ level: number;
380
+ connections: Record<string, string[]>;
381
+ } | null>;
382
+ /**
383
+ * Save HNSW system data (entry point, max level)
384
+ */
385
+ saveHNSWSystem(systemData: {
386
+ entryPointId: string | null;
387
+ maxLevel: number;
388
+ }): Promise<void>;
389
+ /**
390
+ * Get HNSW system data
391
+ */
392
+ getHNSWSystem(): Promise<{
393
+ entryPointId: string | null;
394
+ maxLevel: number;
395
+ } | null>;
364
396
  }
365
397
  export {};
@@ -2108,5 +2108,71 @@ export class FileSystemStorage extends BaseStorage {
2108
2108
  return false;
2109
2109
  }
2110
2110
  }
2111
+ // =============================================
2112
+ // HNSW Index Persistence (v3.35.0+)
2113
+ // =============================================
2114
+ /**
2115
+ * Get vector for a noun
2116
+ */
2117
+ async getNounVector(id) {
2118
+ await this.ensureInitialized();
2119
+ const noun = await this.getNode(id);
2120
+ return noun ? noun.vector : null;
2121
+ }
2122
+ /**
2123
+ * Save HNSW graph data for a noun
2124
+ */
2125
+ async saveHNSWData(nounId, hnswData) {
2126
+ await this.ensureInitialized();
2127
+ // Use sharded path for HNSW data
2128
+ const shard = nounId.substring(0, 2).toLowerCase();
2129
+ const hnswDir = path.join(this.rootDir, 'entities', 'nouns', 'hnsw', shard);
2130
+ await this.ensureDirectoryExists(hnswDir);
2131
+ const filePath = path.join(hnswDir, `${nounId}.json`);
2132
+ await fs.promises.writeFile(filePath, JSON.stringify(hnswData, null, 2));
2133
+ }
2134
+ /**
2135
+ * Get HNSW graph data for a noun
2136
+ */
2137
+ async getHNSWData(nounId) {
2138
+ await this.ensureInitialized();
2139
+ const shard = nounId.substring(0, 2).toLowerCase();
2140
+ const filePath = path.join(this.rootDir, 'entities', 'nouns', 'hnsw', shard, `${nounId}.json`);
2141
+ try {
2142
+ const data = await fs.promises.readFile(filePath, 'utf-8');
2143
+ return JSON.parse(data);
2144
+ }
2145
+ catch (error) {
2146
+ if (error.code !== 'ENOENT') {
2147
+ console.error(`Error reading HNSW data for ${nounId}:`, error);
2148
+ }
2149
+ return null;
2150
+ }
2151
+ }
2152
+ /**
2153
+ * Save HNSW system data (entry point, max level)
2154
+ */
2155
+ async saveHNSWSystem(systemData) {
2156
+ await this.ensureInitialized();
2157
+ const filePath = path.join(this.systemDir, 'hnsw-system.json');
2158
+ await fs.promises.writeFile(filePath, JSON.stringify(systemData, null, 2));
2159
+ }
2160
+ /**
2161
+ * Get HNSW system data
2162
+ */
2163
+ async getHNSWSystem() {
2164
+ await this.ensureInitialized();
2165
+ const filePath = path.join(this.systemDir, 'hnsw-system.json');
2166
+ try {
2167
+ const data = await fs.promises.readFile(filePath, 'utf-8');
2168
+ return JSON.parse(data);
2169
+ }
2170
+ catch (error) {
2171
+ if (error.code !== 'ENOENT') {
2172
+ console.error('Error reading HNSW system data:', error);
2173
+ }
2174
+ return null;
2175
+ }
2176
+ }
2111
2177
  }
2112
2178
  //# sourceMappingURL=fileSystemStorage.js.map
@@ -339,5 +339,41 @@ export declare class GcsStorage extends BaseStorage {
339
339
  * Persist counts to storage
340
340
  */
341
341
  protected persistCounts(): Promise<void>;
342
+ /**
343
+ * Get a noun's vector for HNSW rebuild
344
+ */
345
+ getNounVector(id: string): Promise<number[] | null>;
346
+ /**
347
+ * Save HNSW graph data for a noun
348
+ * Storage path: entities/nouns/hnsw/{shard}/{id}.json
349
+ */
350
+ saveHNSWData(nounId: string, hnswData: {
351
+ level: number;
352
+ connections: Record<string, string[]>;
353
+ }): Promise<void>;
354
+ /**
355
+ * Get HNSW graph data for a noun
356
+ * Storage path: entities/nouns/hnsw/{shard}/{id}.json
357
+ */
358
+ getHNSWData(nounId: string): Promise<{
359
+ level: number;
360
+ connections: Record<string, string[]>;
361
+ } | null>;
362
+ /**
363
+ * Save HNSW system data (entry point, max level)
364
+ * Storage path: system/hnsw-system.json
365
+ */
366
+ saveHNSWSystem(systemData: {
367
+ entryPointId: string | null;
368
+ maxLevel: number;
369
+ }): Promise<void>;
370
+ /**
371
+ * Get HNSW system data (entry point, max level)
372
+ * Storage path: system/hnsw-system.json
373
+ */
374
+ getHNSWSystem(): Promise<{
375
+ entryPointId: string | null;
376
+ maxLevel: number;
377
+ } | null>;
342
378
  }
343
379
  export {};
@@ -1195,5 +1195,95 @@ export class GcsStorage extends BaseStorage {
1195
1195
  this.logger.error('Error persisting counts:', error);
1196
1196
  }
1197
1197
  }
1198
+ // HNSW Index Persistence (v3.35.0+)
1199
+ /**
1200
+ * Get a noun's vector for HNSW rebuild
1201
+ */
1202
+ async getNounVector(id) {
1203
+ await this.ensureInitialized();
1204
+ const noun = await this.getNode(id);
1205
+ return noun ? noun.vector : null;
1206
+ }
1207
+ /**
1208
+ * Save HNSW graph data for a noun
1209
+ * Storage path: entities/nouns/hnsw/{shard}/{id}.json
1210
+ */
1211
+ async saveHNSWData(nounId, hnswData) {
1212
+ await this.ensureInitialized();
1213
+ try {
1214
+ // Use sharded path for HNSW data
1215
+ const shard = getShardIdFromUuid(nounId);
1216
+ const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
1217
+ const file = this.bucket.file(key);
1218
+ await file.save(JSON.stringify(hnswData, null, 2), {
1219
+ contentType: 'application/json',
1220
+ resumable: false
1221
+ });
1222
+ }
1223
+ catch (error) {
1224
+ this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
1225
+ throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
1226
+ }
1227
+ }
1228
+ /**
1229
+ * Get HNSW graph data for a noun
1230
+ * Storage path: entities/nouns/hnsw/{shard}/{id}.json
1231
+ */
1232
+ async getHNSWData(nounId) {
1233
+ await this.ensureInitialized();
1234
+ try {
1235
+ const shard = getShardIdFromUuid(nounId);
1236
+ const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
1237
+ const file = this.bucket.file(key);
1238
+ const [contents] = await file.download();
1239
+ return JSON.parse(contents.toString());
1240
+ }
1241
+ catch (error) {
1242
+ if (error.code === 404) {
1243
+ return null;
1244
+ }
1245
+ this.logger.error(`Failed to get HNSW data for ${nounId}:`, error);
1246
+ throw new Error(`Failed to get HNSW data for ${nounId}: ${error}`);
1247
+ }
1248
+ }
1249
+ /**
1250
+ * Save HNSW system data (entry point, max level)
1251
+ * Storage path: system/hnsw-system.json
1252
+ */
1253
+ async saveHNSWSystem(systemData) {
1254
+ await this.ensureInitialized();
1255
+ try {
1256
+ const key = `${this.systemPrefix}hnsw-system.json`;
1257
+ const file = this.bucket.file(key);
1258
+ await file.save(JSON.stringify(systemData, null, 2), {
1259
+ contentType: 'application/json',
1260
+ resumable: false
1261
+ });
1262
+ }
1263
+ catch (error) {
1264
+ this.logger.error('Failed to save HNSW system data:', error);
1265
+ throw new Error(`Failed to save HNSW system data: ${error}`);
1266
+ }
1267
+ }
1268
+ /**
1269
+ * Get HNSW system data (entry point, max level)
1270
+ * Storage path: system/hnsw-system.json
1271
+ */
1272
+ async getHNSWSystem() {
1273
+ await this.ensureInitialized();
1274
+ try {
1275
+ const key = `${this.systemPrefix}hnsw-system.json`;
1276
+ const file = this.bucket.file(key);
1277
+ const [contents] = await file.download();
1278
+ return JSON.parse(contents.toString());
1279
+ }
1280
+ catch (error) {
1281
+ if (error.code === 404) {
1282
+ return null;
1283
+ }
1284
+ this.logger.error('Failed to get HNSW system data:', error);
1285
+ throw new Error(`Failed to get HNSW system data: ${error}`);
1286
+ }
1287
+ }
1198
1288
  }
1199
1289
  //# sourceMappingURL=gcsStorage.js.map
@@ -174,4 +174,36 @@ export declare class MemoryStorage extends BaseStorage {
174
174
  * Persist counts to storage - no-op for memory storage
175
175
  */
176
176
  protected persistCounts(): Promise<void>;
177
+ /**
178
+ * Get vector for a noun
179
+ */
180
+ getNounVector(id: string): Promise<number[] | null>;
181
+ /**
182
+ * Save HNSW graph data for a noun
183
+ */
184
+ saveHNSWData(nounId: string, hnswData: {
185
+ level: number;
186
+ connections: Record<string, string[]>;
187
+ }): Promise<void>;
188
+ /**
189
+ * Get HNSW graph data for a noun
190
+ */
191
+ getHNSWData(nounId: string): Promise<{
192
+ level: number;
193
+ connections: Record<string, string[]>;
194
+ } | null>;
195
+ /**
196
+ * Save HNSW system data (entry point, max level)
197
+ */
198
+ saveHNSWSystem(systemData: {
199
+ entryPointId: string | null;
200
+ maxLevel: number;
201
+ }): Promise<void>;
202
+ /**
203
+ * Get HNSW system data
204
+ */
205
+ getHNSWSystem(): Promise<{
206
+ entryPointId: string | null;
207
+ maxLevel: number;
208
+ } | null>;
177
209
  }
@@ -595,5 +595,48 @@ export class MemoryStorage extends BaseStorage {
595
595
  // No persistence needed for in-memory storage
596
596
  // Counts are always accurate from the live data structures
597
597
  }
598
+ // =============================================
599
+ // HNSW Index Persistence (v3.35.0+)
600
+ // =============================================
601
+ /**
602
+ * Get vector for a noun
603
+ */
604
+ async getNounVector(id) {
605
+ const noun = this.nouns.get(id);
606
+ return noun ? [...noun.vector] : null;
607
+ }
608
+ /**
609
+ * Save HNSW graph data for a noun
610
+ */
611
+ async saveHNSWData(nounId, hnswData) {
612
+ // For memory storage, HNSW data is already in the noun object
613
+ // This method is a no-op since saveNoun already stores the full graph
614
+ // But we store it separately for consistency with other adapters
615
+ const path = `hnsw/${nounId}.json`;
616
+ await this.writeObjectToPath(path, hnswData);
617
+ }
618
+ /**
619
+ * Get HNSW graph data for a noun
620
+ */
621
+ async getHNSWData(nounId) {
622
+ const path = `hnsw/${nounId}.json`;
623
+ const data = await this.readObjectFromPath(path);
624
+ return data || null;
625
+ }
626
+ /**
627
+ * Save HNSW system data (entry point, max level)
628
+ */
629
+ async saveHNSWSystem(systemData) {
630
+ const path = 'system/hnsw-system.json';
631
+ await this.writeObjectToPath(path, systemData);
632
+ }
633
+ /**
634
+ * Get HNSW system data
635
+ */
636
+ async getHNSWSystem() {
637
+ const path = 'system/hnsw-system.json';
638
+ const data = await this.readObjectFromPath(path);
639
+ return data || null;
640
+ }
598
641
  }
599
642
  //# sourceMappingURL=memoryStorage.js.map
@@ -262,5 +262,41 @@ export declare class OPFSStorage extends BaseStorage {
262
262
  * Persist counts to OPFS storage
263
263
  */
264
264
  protected persistCounts(): Promise<void>;
265
+ /**
266
+ * Get a noun's vector for HNSW rebuild
267
+ */
268
+ getNounVector(id: string): Promise<number[] | null>;
269
+ /**
270
+ * Save HNSW graph data for a noun
271
+ * Storage path: nouns/hnsw/{shard}/{id}.json
272
+ */
273
+ saveHNSWData(nounId: string, hnswData: {
274
+ level: number;
275
+ connections: Record<string, string[]>;
276
+ }): Promise<void>;
277
+ /**
278
+ * Get HNSW graph data for a noun
279
+ * Storage path: nouns/hnsw/{shard}/{id}.json
280
+ */
281
+ getHNSWData(nounId: string): Promise<{
282
+ level: number;
283
+ connections: Record<string, string[]>;
284
+ } | null>;
285
+ /**
286
+ * Save HNSW system data (entry point, max level)
287
+ * Storage path: index/hnsw-system.json
288
+ */
289
+ saveHNSWSystem(systemData: {
290
+ entryPointId: string | null;
291
+ maxLevel: number;
292
+ }): Promise<void>;
293
+ /**
294
+ * Get HNSW system data (entry point, max level)
295
+ * Storage path: index/hnsw-system.json
296
+ */
297
+ getHNSWSystem(): Promise<{
298
+ entryPointId: string | null;
299
+ maxLevel: number;
300
+ } | null>;
265
301
  }
266
302
  export {};