@soulcraft/brainy 3.34.0 → 3.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +63 -0
- package/README.md +32 -12
- package/dist/brainy.d.ts +15 -0
- package/dist/brainy.js +63 -34
- package/dist/hnsw/hnswIndex.d.ts +128 -1
- package/dist/hnsw/hnswIndex.js +411 -17
- package/dist/hnsw/hnswIndexOptimized.d.ts +3 -15
- package/dist/hnsw/hnswIndexOptimized.js +11 -42
- package/dist/hnsw/partitionedHNSWIndex.js +1 -1
- package/dist/interfaces/IIndex.d.ts +195 -0
- package/dist/interfaces/IIndex.js +15 -0
- package/dist/storage/adapters/baseStorageAdapter.d.ts +17 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +32 -0
- package/dist/storage/adapters/fileSystemStorage.js +66 -0
- package/dist/storage/adapters/gcsStorage.d.ts +36 -0
- package/dist/storage/adapters/gcsStorage.js +90 -0
- package/dist/storage/adapters/memoryStorage.d.ts +32 -0
- package/dist/storage/adapters/memoryStorage.js +43 -0
- package/dist/storage/adapters/opfsStorage.d.ts +36 -0
- package/dist/storage/adapters/opfsStorage.js +101 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +36 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +112 -0
- package/dist/utils/memoryDetection.d.ts +119 -0
- package/dist/utils/memoryDetection.js +321 -0
- package/dist/utils/unifiedCache.d.ts +75 -1
- package/dist/utils/unifiedCache.js +123 -4
- package/package.json +1 -1
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
* Uses product quantization for dimensionality reduction and disk-based storage when needed
|
|
5
5
|
*/
|
|
6
6
|
import { HNSWIndex } from './hnswIndex.js';
|
|
7
|
-
import { getGlobalCache } from '../utils/unifiedCache.js';
|
|
8
7
|
// Default configuration for the optimized HNSW index
|
|
9
8
|
const DEFAULT_OPTIMIZED_CONFIG = {
|
|
10
9
|
M: 16,
|
|
@@ -211,10 +210,9 @@ class ProductQuantizer {
|
|
|
211
210
|
*/
|
|
212
211
|
export class HNSWIndexOptimized extends HNSWIndex {
|
|
213
212
|
constructor(config = {}, distanceFunction, storage = null) {
|
|
214
|
-
// Initialize base HNSW index with standard config
|
|
215
|
-
super(config, distanceFunction);
|
|
213
|
+
// Initialize base HNSW index with standard config and storage
|
|
214
|
+
super(config, distanceFunction, { storage: storage || undefined });
|
|
216
215
|
this.productQuantizer = null;
|
|
217
|
-
this.storage = null;
|
|
218
216
|
this.useDiskBasedIndex = false;
|
|
219
217
|
this.useProductQuantization = false;
|
|
220
218
|
this.quantizedVectors = new Map();
|
|
@@ -224,8 +222,6 @@ export class HNSWIndexOptimized extends HNSWIndex {
|
|
|
224
222
|
this.memoryUpdateLock = Promise.resolve();
|
|
225
223
|
// Set optimized config
|
|
226
224
|
this.optimizedConfig = { ...DEFAULT_OPTIMIZED_CONFIG, ...config };
|
|
227
|
-
// Set storage adapter
|
|
228
|
-
this.storage = storage;
|
|
229
225
|
// Initialize product quantizer if enabled
|
|
230
226
|
if (this.optimizedConfig.productQuantization?.enabled) {
|
|
231
227
|
this.useProductQuantization = true;
|
|
@@ -233,8 +229,7 @@ export class HNSWIndexOptimized extends HNSWIndex {
|
|
|
233
229
|
}
|
|
234
230
|
// Set disk-based index flag
|
|
235
231
|
this.useDiskBasedIndex = this.optimizedConfig.useDiskBasedIndex || false;
|
|
236
|
-
//
|
|
237
|
-
this.unifiedCache = getGlobalCache();
|
|
232
|
+
// Note: UnifiedCache is inherited from base HNSWIndex class
|
|
238
233
|
}
|
|
239
234
|
/**
|
|
240
235
|
* Thread-safe method to update memory usage
|
|
@@ -302,18 +297,9 @@ export class HNSWIndexOptimized extends HNSWIndex {
|
|
|
302
297
|
return await super.addItem({ id, vector: reconstructedVector });
|
|
303
298
|
}
|
|
304
299
|
// If disk-based index is active and storage is available, store the vector
|
|
305
|
-
if (this.useDiskBasedIndex
|
|
306
|
-
//
|
|
307
|
-
|
|
308
|
-
id,
|
|
309
|
-
vector,
|
|
310
|
-
connections: new Map(),
|
|
311
|
-
level: 0
|
|
312
|
-
};
|
|
313
|
-
// Store the noun
|
|
314
|
-
this.storage.saveNoun(noun).catch((error) => {
|
|
315
|
-
console.error(`Failed to save noun ${id} to storage:`, error);
|
|
316
|
-
});
|
|
300
|
+
if (this.useDiskBasedIndex) {
|
|
301
|
+
// Storage is handled by the base class now via HNSW persistence
|
|
302
|
+
// No additional storage needed here
|
|
317
303
|
}
|
|
318
304
|
// Add the vector to the in-memory index
|
|
319
305
|
return await super.addItem(item);
|
|
@@ -344,17 +330,13 @@ export class HNSWIndexOptimized extends HNSWIndex {
|
|
|
344
330
|
/**
|
|
345
331
|
* Remove an item from the index
|
|
346
332
|
*/
|
|
347
|
-
removeItem(id) {
|
|
333
|
+
async removeItem(id) {
|
|
348
334
|
// If product quantization is active, remove the quantized vector
|
|
349
335
|
if (this.useProductQuantization) {
|
|
350
336
|
this.quantizedVectors.delete(id);
|
|
351
337
|
}
|
|
352
|
-
// If disk-based index is active
|
|
353
|
-
|
|
354
|
-
this.storage.deleteNoun(id).catch((error) => {
|
|
355
|
-
console.error(`Failed to delete noun ${id} from storage:`, error);
|
|
356
|
-
});
|
|
357
|
-
}
|
|
338
|
+
// If disk-based index is active, removal is handled by base class
|
|
339
|
+
// No additional removal needed here
|
|
358
340
|
// Update memory usage estimate (async operation, but don't block removal)
|
|
359
341
|
this.getMemoryUsageAsync().then((currentMemoryUsage) => {
|
|
360
342
|
if (currentMemoryUsage.vectorCount > 0) {
|
|
@@ -365,7 +347,7 @@ export class HNSWIndexOptimized extends HNSWIndex {
|
|
|
365
347
|
console.error('Failed to update memory usage after removal:', error);
|
|
366
348
|
});
|
|
367
349
|
// Remove the item from the in-memory index
|
|
368
|
-
return super.removeItem(id);
|
|
350
|
+
return await super.removeItem(id);
|
|
369
351
|
}
|
|
370
352
|
/**
|
|
371
353
|
* Clear the index
|
|
@@ -428,20 +410,7 @@ export class HNSWIndexOptimized extends HNSWIndex {
|
|
|
428
410
|
getMemoryUsage() {
|
|
429
411
|
return this.memoryUsage;
|
|
430
412
|
}
|
|
431
|
-
|
|
432
|
-
* Set the storage adapter
|
|
433
|
-
* @param storage Storage adapter
|
|
434
|
-
*/
|
|
435
|
-
setStorage(storage) {
|
|
436
|
-
this.storage = storage;
|
|
437
|
-
}
|
|
438
|
-
/**
|
|
439
|
-
* Get the storage adapter
|
|
440
|
-
* @returns Storage adapter or null if not set
|
|
441
|
-
*/
|
|
442
|
-
getStorage() {
|
|
443
|
-
return this.storage;
|
|
444
|
-
}
|
|
413
|
+
// Storage methods removed - now handled by base class
|
|
445
414
|
/**
|
|
446
415
|
* Set whether to use disk-based index
|
|
447
416
|
* @param useDiskBasedIndex Whether to use disk-based index
|
|
@@ -274,7 +274,7 @@ export class PartitionedHNSWIndex {
|
|
|
274
274
|
async removeItem(id) {
|
|
275
275
|
// Find which partition contains this item
|
|
276
276
|
for (const [partitionId, partition] of this.partitions.entries()) {
|
|
277
|
-
if (partition.removeItem(id)) {
|
|
277
|
+
if (await partition.removeItem(id)) {
|
|
278
278
|
// Update metadata
|
|
279
279
|
const metadata = this.partitionMetadata.get(partitionId);
|
|
280
280
|
metadata.nodeCount = partition.size();
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified Index Interface (v3.35.0+)
|
|
3
|
+
*
|
|
4
|
+
* Standardizes index lifecycle across all index types in Brainy.
|
|
5
|
+
* All indexes (HNSW Vector, Graph Adjacency, Metadata Field) implement this interface
|
|
6
|
+
* for consistent rebuild, clear, and stats operations.
|
|
7
|
+
*
|
|
8
|
+
* This enables:
|
|
9
|
+
* - Parallel index rebuilds during initialization
|
|
10
|
+
* - Consistent index management across the system
|
|
11
|
+
* - Easy addition of new index types
|
|
12
|
+
* - Unified monitoring and health checks
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* Index statistics returned by getStats()
|
|
16
|
+
*/
|
|
17
|
+
export interface IndexStats {
|
|
18
|
+
/**
|
|
19
|
+
* Total number of items in the index
|
|
20
|
+
*/
|
|
21
|
+
totalItems: number;
|
|
22
|
+
/**
|
|
23
|
+
* Estimated memory usage in bytes (optional)
|
|
24
|
+
*/
|
|
25
|
+
memoryUsage?: number;
|
|
26
|
+
/**
|
|
27
|
+
* Timestamp of last rebuild (optional)
|
|
28
|
+
*/
|
|
29
|
+
lastRebuilt?: number;
|
|
30
|
+
/**
|
|
31
|
+
* Index-specific statistics (optional)
|
|
32
|
+
* - HNSW: { maxLevel, entryPointId, levels, avgDegree }
|
|
33
|
+
* - Graph: { totalRelationships, verbTypes }
|
|
34
|
+
* - Metadata: { totalFields, totalEntries }
|
|
35
|
+
*/
|
|
36
|
+
specifics?: Record<string, any>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Progress callback for rebuild operations
|
|
40
|
+
* Reports current progress and total count
|
|
41
|
+
*/
|
|
42
|
+
export type RebuildProgressCallback = (loaded: number, total: number) => void;
|
|
43
|
+
/**
|
|
44
|
+
* Rebuild options for index rebuilding
|
|
45
|
+
*/
|
|
46
|
+
export interface RebuildOptions {
|
|
47
|
+
/**
|
|
48
|
+
* @deprecated Lazy mode is now auto-detected based on available memory.
|
|
49
|
+
* System automatically chooses between:
|
|
50
|
+
* - Preloading: Small datasets that fit comfortably in cache (< 80% threshold)
|
|
51
|
+
* - On-demand: Large datasets loaded adaptively via UnifiedCache
|
|
52
|
+
*
|
|
53
|
+
* This option is kept for backwards compatibility but is ignored.
|
|
54
|
+
* The system always uses adaptive caching (v3.36.0+).
|
|
55
|
+
*/
|
|
56
|
+
lazy?: boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Batch size for pagination during rebuild
|
|
59
|
+
* Default: 1000 (tune based on available memory)
|
|
60
|
+
*/
|
|
61
|
+
batchSize?: number;
|
|
62
|
+
/**
|
|
63
|
+
* Progress callback for monitoring rebuild progress
|
|
64
|
+
* Called periodically with (loaded, total) counts
|
|
65
|
+
*/
|
|
66
|
+
onProgress?: RebuildProgressCallback;
|
|
67
|
+
/**
|
|
68
|
+
* Force rebuild even if index appears populated
|
|
69
|
+
* Useful for repairing corrupted indexes
|
|
70
|
+
*/
|
|
71
|
+
force?: boolean;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Unified Index Interface
|
|
75
|
+
*
|
|
76
|
+
* All indexes in Brainy implement this interface for consistent lifecycle management.
|
|
77
|
+
* This enables parallel rebuilds, unified monitoring, and standardized operations.
|
|
78
|
+
*/
|
|
79
|
+
export interface IIndex {
|
|
80
|
+
/**
|
|
81
|
+
* Rebuild index from persisted storage
|
|
82
|
+
*
|
|
83
|
+
* Called during Brainy initialization when:
|
|
84
|
+
* - Container restarts and in-memory indexes are empty
|
|
85
|
+
* - Storage has persisted data but indexes need rebuilding
|
|
86
|
+
* - Force rebuild is requested
|
|
87
|
+
*
|
|
88
|
+
* Implementation must:
|
|
89
|
+
* - Clear existing in-memory state
|
|
90
|
+
* - Load data from storage using pagination
|
|
91
|
+
* - Restore index structure efficiently (O(N) preferred over O(N log N))
|
|
92
|
+
* - Handle millions of entities via batching
|
|
93
|
+
* - Auto-detect caching strategy based on dataset size vs available memory
|
|
94
|
+
* - Provide progress reporting for large datasets
|
|
95
|
+
* - Recover gracefully from partial failures
|
|
96
|
+
*
|
|
97
|
+
* Adaptive Caching (v3.36.0+):
|
|
98
|
+
* System automatically chooses optimal strategy:
|
|
99
|
+
* - Small datasets: Preload all data at init for zero-latency access
|
|
100
|
+
* - Large datasets: Load on-demand via UnifiedCache for memory efficiency
|
|
101
|
+
*
|
|
102
|
+
* @param options Rebuild options (batch size, progress callback, force)
|
|
103
|
+
* @returns Promise that resolves when rebuild is complete
|
|
104
|
+
* @throws Error if rebuild fails critically (should log warnings for partial failures)
|
|
105
|
+
*/
|
|
106
|
+
rebuild(options?: RebuildOptions): Promise<void>;
|
|
107
|
+
/**
|
|
108
|
+
* Clear all in-memory index data
|
|
109
|
+
*
|
|
110
|
+
* Called when:
|
|
111
|
+
* - User explicitly calls brain.clear()
|
|
112
|
+
* - System needs to reset without rebuilding
|
|
113
|
+
* - Tests need clean state
|
|
114
|
+
*
|
|
115
|
+
* Implementation must:
|
|
116
|
+
* - Clear all in-memory data structures
|
|
117
|
+
* - Reset counters and statistics
|
|
118
|
+
* - NOT delete persisted storage data
|
|
119
|
+
* - Be idempotent (safe to call multiple times)
|
|
120
|
+
*
|
|
121
|
+
* Note: This is a memory-only operation. To delete persisted data,
|
|
122
|
+
* use storage.clear() instead.
|
|
123
|
+
*/
|
|
124
|
+
clear(): void;
|
|
125
|
+
/**
|
|
126
|
+
* Get current index statistics
|
|
127
|
+
*
|
|
128
|
+
* Returns real-time statistics about the index state:
|
|
129
|
+
* - Total items indexed
|
|
130
|
+
* - Memory usage (if available)
|
|
131
|
+
* - Last rebuild timestamp
|
|
132
|
+
* - Index-specific metrics
|
|
133
|
+
*
|
|
134
|
+
* Used for:
|
|
135
|
+
* - Health monitoring
|
|
136
|
+
* - Determining if rebuild is needed
|
|
137
|
+
* - Performance analysis
|
|
138
|
+
* - Debugging
|
|
139
|
+
*
|
|
140
|
+
* @returns Promise that resolves to index statistics
|
|
141
|
+
*/
|
|
142
|
+
getStats(): Promise<IndexStats>;
|
|
143
|
+
/**
|
|
144
|
+
* Get the current size of the index
|
|
145
|
+
*
|
|
146
|
+
* Fast O(1) operation returning the number of items in the index.
|
|
147
|
+
* Used for quick health checks and deciding rebuild strategy.
|
|
148
|
+
*
|
|
149
|
+
* @returns Number of items in the index
|
|
150
|
+
*/
|
|
151
|
+
size(): number;
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Extended index interface with cache support (optional)
|
|
155
|
+
*
|
|
156
|
+
* Indexes can optionally implement cache integration for:
|
|
157
|
+
* - Hot/warm/cold tier management
|
|
158
|
+
* - Memory-efficient lazy loading
|
|
159
|
+
* - Adaptive caching based on access patterns
|
|
160
|
+
*/
|
|
161
|
+
export interface ICachedIndex extends IIndex {
|
|
162
|
+
/**
|
|
163
|
+
* Set cache for resource management
|
|
164
|
+
*
|
|
165
|
+
* Enables the index to use UnifiedCache for:
|
|
166
|
+
* - Lazy loading of vectors/data
|
|
167
|
+
* - Hot/warm/cold tier management
|
|
168
|
+
* - Memory pressure handling
|
|
169
|
+
*
|
|
170
|
+
* @param cache UnifiedCache instance
|
|
171
|
+
*/
|
|
172
|
+
setCache?(cache: any): void;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Extended index interface with persistence support (optional)
|
|
176
|
+
*
|
|
177
|
+
* Indexes can optionally implement explicit persistence:
|
|
178
|
+
* - Manual triggering of data saves
|
|
179
|
+
* - Batch write optimization
|
|
180
|
+
* - Checkpoint creation
|
|
181
|
+
*/
|
|
182
|
+
export interface IPersistentIndex extends IIndex {
|
|
183
|
+
/**
|
|
184
|
+
* Manually persist current index state to storage
|
|
185
|
+
*
|
|
186
|
+
* Most indexes auto-persist during operations (e.g., HNSW persists on addItem).
|
|
187
|
+
* This method allows explicit persistence for:
|
|
188
|
+
* - Checkpointing before risky operations
|
|
189
|
+
* - Forced flush before shutdown
|
|
190
|
+
* - Manual backup creation
|
|
191
|
+
*
|
|
192
|
+
* @returns Promise that resolves when persistence is complete
|
|
193
|
+
*/
|
|
194
|
+
persist?(): Promise<void>;
|
|
195
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified Index Interface (v3.35.0+)
|
|
3
|
+
*
|
|
4
|
+
* Standardizes index lifecycle across all index types in Brainy.
|
|
5
|
+
* All indexes (HNSW Vector, Graph Adjacency, Metadata Field) implement this interface
|
|
6
|
+
* for consistent rebuild, clear, and stats operations.
|
|
7
|
+
*
|
|
8
|
+
* This enables:
|
|
9
|
+
* - Parallel index rebuilds during initialization
|
|
10
|
+
* - Consistent index management across the system
|
|
11
|
+
* - Easy addition of new index types
|
|
12
|
+
* - Unified monitoring and health checks
|
|
13
|
+
*/
|
|
14
|
+
export {};
|
|
15
|
+
//# sourceMappingURL=IIndex.js.map
|
|
@@ -23,6 +23,23 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
|
|
|
23
23
|
abstract getNounMetadata(id: string): Promise<any | null>;
|
|
24
24
|
abstract saveVerbMetadata(id: string, metadata: any): Promise<void>;
|
|
25
25
|
abstract getVerbMetadata(id: string): Promise<any | null>;
|
|
26
|
+
abstract getNounVector(id: string): Promise<number[] | null>;
|
|
27
|
+
abstract saveHNSWData(nounId: string, hnswData: {
|
|
28
|
+
level: number;
|
|
29
|
+
connections: Record<string, string[]>;
|
|
30
|
+
}): Promise<void>;
|
|
31
|
+
abstract getHNSWData(nounId: string): Promise<{
|
|
32
|
+
level: number;
|
|
33
|
+
connections: Record<string, string[]>;
|
|
34
|
+
} | null>;
|
|
35
|
+
abstract saveHNSWSystem(systemData: {
|
|
36
|
+
entryPointId: string | null;
|
|
37
|
+
maxLevel: number;
|
|
38
|
+
}): Promise<void>;
|
|
39
|
+
abstract getHNSWSystem(): Promise<{
|
|
40
|
+
entryPointId: string | null;
|
|
41
|
+
maxLevel: number;
|
|
42
|
+
} | null>;
|
|
26
43
|
abstract clear(): Promise<void>;
|
|
27
44
|
abstract getStorageStatus(): Promise<{
|
|
28
45
|
type: string;
|
|
@@ -361,5 +361,37 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
361
361
|
* Check if a file exists (handles both sharded and non-sharded)
|
|
362
362
|
*/
|
|
363
363
|
private fileExists;
|
|
364
|
+
/**
|
|
365
|
+
* Get vector for a noun
|
|
366
|
+
*/
|
|
367
|
+
getNounVector(id: string): Promise<number[] | null>;
|
|
368
|
+
/**
|
|
369
|
+
* Save HNSW graph data for a noun
|
|
370
|
+
*/
|
|
371
|
+
saveHNSWData(nounId: string, hnswData: {
|
|
372
|
+
level: number;
|
|
373
|
+
connections: Record<string, string[]>;
|
|
374
|
+
}): Promise<void>;
|
|
375
|
+
/**
|
|
376
|
+
* Get HNSW graph data for a noun
|
|
377
|
+
*/
|
|
378
|
+
getHNSWData(nounId: string): Promise<{
|
|
379
|
+
level: number;
|
|
380
|
+
connections: Record<string, string[]>;
|
|
381
|
+
} | null>;
|
|
382
|
+
/**
|
|
383
|
+
* Save HNSW system data (entry point, max level)
|
|
384
|
+
*/
|
|
385
|
+
saveHNSWSystem(systemData: {
|
|
386
|
+
entryPointId: string | null;
|
|
387
|
+
maxLevel: number;
|
|
388
|
+
}): Promise<void>;
|
|
389
|
+
/**
|
|
390
|
+
* Get HNSW system data
|
|
391
|
+
*/
|
|
392
|
+
getHNSWSystem(): Promise<{
|
|
393
|
+
entryPointId: string | null;
|
|
394
|
+
maxLevel: number;
|
|
395
|
+
} | null>;
|
|
364
396
|
}
|
|
365
397
|
export {};
|
|
@@ -2108,5 +2108,71 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
2108
2108
|
return false;
|
|
2109
2109
|
}
|
|
2110
2110
|
}
|
|
2111
|
+
// =============================================
|
|
2112
|
+
// HNSW Index Persistence (v3.35.0+)
|
|
2113
|
+
// =============================================
|
|
2114
|
+
/**
|
|
2115
|
+
* Get vector for a noun
|
|
2116
|
+
*/
|
|
2117
|
+
async getNounVector(id) {
|
|
2118
|
+
await this.ensureInitialized();
|
|
2119
|
+
const noun = await this.getNode(id);
|
|
2120
|
+
return noun ? noun.vector : null;
|
|
2121
|
+
}
|
|
2122
|
+
/**
|
|
2123
|
+
* Save HNSW graph data for a noun
|
|
2124
|
+
*/
|
|
2125
|
+
async saveHNSWData(nounId, hnswData) {
|
|
2126
|
+
await this.ensureInitialized();
|
|
2127
|
+
// Use sharded path for HNSW data
|
|
2128
|
+
const shard = nounId.substring(0, 2).toLowerCase();
|
|
2129
|
+
const hnswDir = path.join(this.rootDir, 'entities', 'nouns', 'hnsw', shard);
|
|
2130
|
+
await this.ensureDirectoryExists(hnswDir);
|
|
2131
|
+
const filePath = path.join(hnswDir, `${nounId}.json`);
|
|
2132
|
+
await fs.promises.writeFile(filePath, JSON.stringify(hnswData, null, 2));
|
|
2133
|
+
}
|
|
2134
|
+
/**
|
|
2135
|
+
* Get HNSW graph data for a noun
|
|
2136
|
+
*/
|
|
2137
|
+
async getHNSWData(nounId) {
|
|
2138
|
+
await this.ensureInitialized();
|
|
2139
|
+
const shard = nounId.substring(0, 2).toLowerCase();
|
|
2140
|
+
const filePath = path.join(this.rootDir, 'entities', 'nouns', 'hnsw', shard, `${nounId}.json`);
|
|
2141
|
+
try {
|
|
2142
|
+
const data = await fs.promises.readFile(filePath, 'utf-8');
|
|
2143
|
+
return JSON.parse(data);
|
|
2144
|
+
}
|
|
2145
|
+
catch (error) {
|
|
2146
|
+
if (error.code !== 'ENOENT') {
|
|
2147
|
+
console.error(`Error reading HNSW data for ${nounId}:`, error);
|
|
2148
|
+
}
|
|
2149
|
+
return null;
|
|
2150
|
+
}
|
|
2151
|
+
}
|
|
2152
|
+
/**
|
|
2153
|
+
* Save HNSW system data (entry point, max level)
|
|
2154
|
+
*/
|
|
2155
|
+
async saveHNSWSystem(systemData) {
|
|
2156
|
+
await this.ensureInitialized();
|
|
2157
|
+
const filePath = path.join(this.systemDir, 'hnsw-system.json');
|
|
2158
|
+
await fs.promises.writeFile(filePath, JSON.stringify(systemData, null, 2));
|
|
2159
|
+
}
|
|
2160
|
+
/**
|
|
2161
|
+
* Get HNSW system data
|
|
2162
|
+
*/
|
|
2163
|
+
async getHNSWSystem() {
|
|
2164
|
+
await this.ensureInitialized();
|
|
2165
|
+
const filePath = path.join(this.systemDir, 'hnsw-system.json');
|
|
2166
|
+
try {
|
|
2167
|
+
const data = await fs.promises.readFile(filePath, 'utf-8');
|
|
2168
|
+
return JSON.parse(data);
|
|
2169
|
+
}
|
|
2170
|
+
catch (error) {
|
|
2171
|
+
if (error.code !== 'ENOENT') {
|
|
2172
|
+
console.error('Error reading HNSW system data:', error);
|
|
2173
|
+
}
|
|
2174
|
+
return null;
|
|
2175
|
+
}
|
|
2176
|
+
}
|
|
2111
2177
|
}
|
|
2112
2178
|
//# sourceMappingURL=fileSystemStorage.js.map
|
|
@@ -339,5 +339,41 @@ export declare class GcsStorage extends BaseStorage {
|
|
|
339
339
|
* Persist counts to storage
|
|
340
340
|
*/
|
|
341
341
|
protected persistCounts(): Promise<void>;
|
|
342
|
+
/**
|
|
343
|
+
* Get a noun's vector for HNSW rebuild
|
|
344
|
+
*/
|
|
345
|
+
getNounVector(id: string): Promise<number[] | null>;
|
|
346
|
+
/**
|
|
347
|
+
* Save HNSW graph data for a noun
|
|
348
|
+
* Storage path: entities/nouns/hnsw/{shard}/{id}.json
|
|
349
|
+
*/
|
|
350
|
+
saveHNSWData(nounId: string, hnswData: {
|
|
351
|
+
level: number;
|
|
352
|
+
connections: Record<string, string[]>;
|
|
353
|
+
}): Promise<void>;
|
|
354
|
+
/**
|
|
355
|
+
* Get HNSW graph data for a noun
|
|
356
|
+
* Storage path: entities/nouns/hnsw/{shard}/{id}.json
|
|
357
|
+
*/
|
|
358
|
+
getHNSWData(nounId: string): Promise<{
|
|
359
|
+
level: number;
|
|
360
|
+
connections: Record<string, string[]>;
|
|
361
|
+
} | null>;
|
|
362
|
+
/**
|
|
363
|
+
* Save HNSW system data (entry point, max level)
|
|
364
|
+
* Storage path: system/hnsw-system.json
|
|
365
|
+
*/
|
|
366
|
+
saveHNSWSystem(systemData: {
|
|
367
|
+
entryPointId: string | null;
|
|
368
|
+
maxLevel: number;
|
|
369
|
+
}): Promise<void>;
|
|
370
|
+
/**
|
|
371
|
+
* Get HNSW system data (entry point, max level)
|
|
372
|
+
* Storage path: system/hnsw-system.json
|
|
373
|
+
*/
|
|
374
|
+
getHNSWSystem(): Promise<{
|
|
375
|
+
entryPointId: string | null;
|
|
376
|
+
maxLevel: number;
|
|
377
|
+
} | null>;
|
|
342
378
|
}
|
|
343
379
|
export {};
|
|
@@ -1195,5 +1195,95 @@ export class GcsStorage extends BaseStorage {
|
|
|
1195
1195
|
this.logger.error('Error persisting counts:', error);
|
|
1196
1196
|
}
|
|
1197
1197
|
}
|
|
1198
|
+
// HNSW Index Persistence (v3.35.0+)
|
|
1199
|
+
/**
|
|
1200
|
+
* Get a noun's vector for HNSW rebuild
|
|
1201
|
+
*/
|
|
1202
|
+
async getNounVector(id) {
|
|
1203
|
+
await this.ensureInitialized();
|
|
1204
|
+
const noun = await this.getNode(id);
|
|
1205
|
+
return noun ? noun.vector : null;
|
|
1206
|
+
}
|
|
1207
|
+
/**
|
|
1208
|
+
* Save HNSW graph data for a noun
|
|
1209
|
+
* Storage path: entities/nouns/hnsw/{shard}/{id}.json
|
|
1210
|
+
*/
|
|
1211
|
+
async saveHNSWData(nounId, hnswData) {
|
|
1212
|
+
await this.ensureInitialized();
|
|
1213
|
+
try {
|
|
1214
|
+
// Use sharded path for HNSW data
|
|
1215
|
+
const shard = getShardIdFromUuid(nounId);
|
|
1216
|
+
const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
|
|
1217
|
+
const file = this.bucket.file(key);
|
|
1218
|
+
await file.save(JSON.stringify(hnswData, null, 2), {
|
|
1219
|
+
contentType: 'application/json',
|
|
1220
|
+
resumable: false
|
|
1221
|
+
});
|
|
1222
|
+
}
|
|
1223
|
+
catch (error) {
|
|
1224
|
+
this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
|
|
1225
|
+
throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
/**
|
|
1229
|
+
* Get HNSW graph data for a noun
|
|
1230
|
+
* Storage path: entities/nouns/hnsw/{shard}/{id}.json
|
|
1231
|
+
*/
|
|
1232
|
+
async getHNSWData(nounId) {
|
|
1233
|
+
await this.ensureInitialized();
|
|
1234
|
+
try {
|
|
1235
|
+
const shard = getShardIdFromUuid(nounId);
|
|
1236
|
+
const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
|
|
1237
|
+
const file = this.bucket.file(key);
|
|
1238
|
+
const [contents] = await file.download();
|
|
1239
|
+
return JSON.parse(contents.toString());
|
|
1240
|
+
}
|
|
1241
|
+
catch (error) {
|
|
1242
|
+
if (error.code === 404) {
|
|
1243
|
+
return null;
|
|
1244
|
+
}
|
|
1245
|
+
this.logger.error(`Failed to get HNSW data for ${nounId}:`, error);
|
|
1246
|
+
throw new Error(`Failed to get HNSW data for ${nounId}: ${error}`);
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
/**
|
|
1250
|
+
* Save HNSW system data (entry point, max level)
|
|
1251
|
+
* Storage path: system/hnsw-system.json
|
|
1252
|
+
*/
|
|
1253
|
+
async saveHNSWSystem(systemData) {
|
|
1254
|
+
await this.ensureInitialized();
|
|
1255
|
+
try {
|
|
1256
|
+
const key = `${this.systemPrefix}hnsw-system.json`;
|
|
1257
|
+
const file = this.bucket.file(key);
|
|
1258
|
+
await file.save(JSON.stringify(systemData, null, 2), {
|
|
1259
|
+
contentType: 'application/json',
|
|
1260
|
+
resumable: false
|
|
1261
|
+
});
|
|
1262
|
+
}
|
|
1263
|
+
catch (error) {
|
|
1264
|
+
this.logger.error('Failed to save HNSW system data:', error);
|
|
1265
|
+
throw new Error(`Failed to save HNSW system data: ${error}`);
|
|
1266
|
+
}
|
|
1267
|
+
}
|
|
1268
|
+
/**
|
|
1269
|
+
* Get HNSW system data (entry point, max level)
|
|
1270
|
+
* Storage path: system/hnsw-system.json
|
|
1271
|
+
*/
|
|
1272
|
+
async getHNSWSystem() {
|
|
1273
|
+
await this.ensureInitialized();
|
|
1274
|
+
try {
|
|
1275
|
+
const key = `${this.systemPrefix}hnsw-system.json`;
|
|
1276
|
+
const file = this.bucket.file(key);
|
|
1277
|
+
const [contents] = await file.download();
|
|
1278
|
+
return JSON.parse(contents.toString());
|
|
1279
|
+
}
|
|
1280
|
+
catch (error) {
|
|
1281
|
+
if (error.code === 404) {
|
|
1282
|
+
return null;
|
|
1283
|
+
}
|
|
1284
|
+
this.logger.error('Failed to get HNSW system data:', error);
|
|
1285
|
+
throw new Error(`Failed to get HNSW system data: ${error}`);
|
|
1286
|
+
}
|
|
1287
|
+
}
|
|
1198
1288
|
}
|
|
1199
1289
|
//# sourceMappingURL=gcsStorage.js.map
|
|
@@ -174,4 +174,36 @@ export declare class MemoryStorage extends BaseStorage {
|
|
|
174
174
|
* Persist counts to storage - no-op for memory storage
|
|
175
175
|
*/
|
|
176
176
|
protected persistCounts(): Promise<void>;
|
|
177
|
+
/**
|
|
178
|
+
* Get vector for a noun
|
|
179
|
+
*/
|
|
180
|
+
getNounVector(id: string): Promise<number[] | null>;
|
|
181
|
+
/**
|
|
182
|
+
* Save HNSW graph data for a noun
|
|
183
|
+
*/
|
|
184
|
+
saveHNSWData(nounId: string, hnswData: {
|
|
185
|
+
level: number;
|
|
186
|
+
connections: Record<string, string[]>;
|
|
187
|
+
}): Promise<void>;
|
|
188
|
+
/**
|
|
189
|
+
* Get HNSW graph data for a noun
|
|
190
|
+
*/
|
|
191
|
+
getHNSWData(nounId: string): Promise<{
|
|
192
|
+
level: number;
|
|
193
|
+
connections: Record<string, string[]>;
|
|
194
|
+
} | null>;
|
|
195
|
+
/**
|
|
196
|
+
* Save HNSW system data (entry point, max level)
|
|
197
|
+
*/
|
|
198
|
+
saveHNSWSystem(systemData: {
|
|
199
|
+
entryPointId: string | null;
|
|
200
|
+
maxLevel: number;
|
|
201
|
+
}): Promise<void>;
|
|
202
|
+
/**
|
|
203
|
+
* Get HNSW system data
|
|
204
|
+
*/
|
|
205
|
+
getHNSWSystem(): Promise<{
|
|
206
|
+
entryPointId: string | null;
|
|
207
|
+
maxLevel: number;
|
|
208
|
+
} | null>;
|
|
177
209
|
}
|
|
@@ -595,5 +595,48 @@ export class MemoryStorage extends BaseStorage {
|
|
|
595
595
|
// No persistence needed for in-memory storage
|
|
596
596
|
// Counts are always accurate from the live data structures
|
|
597
597
|
}
|
|
598
|
+
// =============================================
|
|
599
|
+
// HNSW Index Persistence (v3.35.0+)
|
|
600
|
+
// =============================================
|
|
601
|
+
/**
|
|
602
|
+
* Get vector for a noun
|
|
603
|
+
*/
|
|
604
|
+
async getNounVector(id) {
|
|
605
|
+
const noun = this.nouns.get(id);
|
|
606
|
+
return noun ? [...noun.vector] : null;
|
|
607
|
+
}
|
|
608
|
+
/**
|
|
609
|
+
* Save HNSW graph data for a noun
|
|
610
|
+
*/
|
|
611
|
+
async saveHNSWData(nounId, hnswData) {
|
|
612
|
+
// For memory storage, HNSW data is already in the noun object
|
|
613
|
+
// This method is a no-op since saveNoun already stores the full graph
|
|
614
|
+
// But we store it separately for consistency with other adapters
|
|
615
|
+
const path = `hnsw/${nounId}.json`;
|
|
616
|
+
await this.writeObjectToPath(path, hnswData);
|
|
617
|
+
}
|
|
618
|
+
/**
|
|
619
|
+
* Get HNSW graph data for a noun
|
|
620
|
+
*/
|
|
621
|
+
async getHNSWData(nounId) {
|
|
622
|
+
const path = `hnsw/${nounId}.json`;
|
|
623
|
+
const data = await this.readObjectFromPath(path);
|
|
624
|
+
return data || null;
|
|
625
|
+
}
|
|
626
|
+
/**
|
|
627
|
+
* Save HNSW system data (entry point, max level)
|
|
628
|
+
*/
|
|
629
|
+
async saveHNSWSystem(systemData) {
|
|
630
|
+
const path = 'system/hnsw-system.json';
|
|
631
|
+
await this.writeObjectToPath(path, systemData);
|
|
632
|
+
}
|
|
633
|
+
/**
|
|
634
|
+
* Get HNSW system data
|
|
635
|
+
*/
|
|
636
|
+
async getHNSWSystem() {
|
|
637
|
+
const path = 'system/hnsw-system.json';
|
|
638
|
+
const data = await this.readObjectFromPath(path);
|
|
639
|
+
return data || null;
|
|
640
|
+
}
|
|
598
641
|
}
|
|
599
642
|
//# sourceMappingURL=memoryStorage.js.map
|