@cmdoss/memwal-sdk 0.6.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. package/ARCHITECTURE.md +547 -547
  2. package/BENCHMARKS.md +238 -238
  3. package/README.md +310 -181
  4. package/dist/ai-sdk/tools.d.ts +2 -2
  5. package/dist/ai-sdk/tools.js +2 -2
  6. package/dist/client/ClientMemoryManager.js +2 -2
  7. package/dist/client/ClientMemoryManager.js.map +1 -1
  8. package/dist/client/PersonalDataWallet.d.ts.map +1 -1
  9. package/dist/client/SimplePDWClient.d.ts +29 -1
  10. package/dist/client/SimplePDWClient.d.ts.map +1 -1
  11. package/dist/client/SimplePDWClient.js +45 -13
  12. package/dist/client/SimplePDWClient.js.map +1 -1
  13. package/dist/client/namespaces/EmbeddingsNamespace.d.ts +1 -1
  14. package/dist/client/namespaces/EmbeddingsNamespace.js +1 -1
  15. package/dist/client/namespaces/MemoryNamespace.d.ts +31 -0
  16. package/dist/client/namespaces/MemoryNamespace.d.ts.map +1 -1
  17. package/dist/client/namespaces/MemoryNamespace.js +272 -39
  18. package/dist/client/namespaces/MemoryNamespace.js.map +1 -1
  19. package/dist/client/namespaces/consolidated/AINamespace.d.ts +2 -2
  20. package/dist/client/namespaces/consolidated/AINamespace.js +2 -2
  21. package/dist/client/namespaces/consolidated/BlockchainNamespace.d.ts +12 -2
  22. package/dist/client/namespaces/consolidated/BlockchainNamespace.d.ts.map +1 -1
  23. package/dist/client/namespaces/consolidated/BlockchainNamespace.js +62 -4
  24. package/dist/client/namespaces/consolidated/BlockchainNamespace.js.map +1 -1
  25. package/dist/client/namespaces/consolidated/StorageNamespace.d.ts +67 -2
  26. package/dist/client/namespaces/consolidated/StorageNamespace.d.ts.map +1 -1
  27. package/dist/client/namespaces/consolidated/StorageNamespace.js +549 -16
  28. package/dist/client/namespaces/consolidated/StorageNamespace.js.map +1 -1
  29. package/dist/config/ConfigurationHelper.js +61 -61
  30. package/dist/config/defaults.js +2 -2
  31. package/dist/config/defaults.js.map +1 -1
  32. package/dist/graph/GraphService.js +21 -21
  33. package/dist/graph/GraphService.js.map +1 -1
  34. package/dist/index.d.ts +3 -1
  35. package/dist/index.d.ts.map +1 -1
  36. package/dist/index.js +3 -1
  37. package/dist/index.js.map +1 -1
  38. package/dist/infrastructure/seal/EncryptionService.d.ts +9 -5
  39. package/dist/infrastructure/seal/EncryptionService.d.ts.map +1 -1
  40. package/dist/infrastructure/seal/EncryptionService.js +37 -15
  41. package/dist/infrastructure/seal/EncryptionService.js.map +1 -1
  42. package/dist/infrastructure/seal/SealService.d.ts +13 -5
  43. package/dist/infrastructure/seal/SealService.d.ts.map +1 -1
  44. package/dist/infrastructure/seal/SealService.js +36 -34
  45. package/dist/infrastructure/seal/SealService.js.map +1 -1
  46. package/dist/langchain/createPDWRAG.js +30 -30
  47. package/dist/retrieval/MemoryDecryptionPipeline.d.ts.map +1 -1
  48. package/dist/retrieval/MemoryDecryptionPipeline.js +2 -1
  49. package/dist/retrieval/MemoryDecryptionPipeline.js.map +1 -1
  50. package/dist/retrieval/MemoryRetrievalService.d.ts +31 -0
  51. package/dist/retrieval/MemoryRetrievalService.d.ts.map +1 -1
  52. package/dist/retrieval/MemoryRetrievalService.js +44 -4
  53. package/dist/retrieval/MemoryRetrievalService.js.map +1 -1
  54. package/dist/services/CapabilityService.d.ts.map +1 -1
  55. package/dist/services/CapabilityService.js +30 -14
  56. package/dist/services/CapabilityService.js.map +1 -1
  57. package/dist/services/CrossContextPermissionService.d.ts.map +1 -1
  58. package/dist/services/CrossContextPermissionService.js +9 -7
  59. package/dist/services/CrossContextPermissionService.js.map +1 -1
  60. package/dist/services/EmbeddingService.d.ts +28 -1
  61. package/dist/services/EmbeddingService.d.ts.map +1 -1
  62. package/dist/services/EmbeddingService.js +54 -0
  63. package/dist/services/EmbeddingService.js.map +1 -1
  64. package/dist/services/EncryptionService.d.ts.map +1 -1
  65. package/dist/services/EncryptionService.js +6 -5
  66. package/dist/services/EncryptionService.js.map +1 -1
  67. package/dist/services/GeminiAIService.js +309 -309
  68. package/dist/services/IndexManager.d.ts +5 -1
  69. package/dist/services/IndexManager.d.ts.map +1 -1
  70. package/dist/services/IndexManager.js +17 -40
  71. package/dist/services/IndexManager.js.map +1 -1
  72. package/dist/services/QueryService.js +1 -1
  73. package/dist/services/QueryService.js.map +1 -1
  74. package/dist/services/StorageService.d.ts +11 -0
  75. package/dist/services/StorageService.d.ts.map +1 -1
  76. package/dist/services/StorageService.js +73 -10
  77. package/dist/services/StorageService.js.map +1 -1
  78. package/dist/services/TransactionService.d.ts +20 -0
  79. package/dist/services/TransactionService.d.ts.map +1 -1
  80. package/dist/services/TransactionService.js +43 -0
  81. package/dist/services/TransactionService.js.map +1 -1
  82. package/dist/services/ViewService.js +2 -2
  83. package/dist/services/ViewService.js.map +1 -1
  84. package/dist/services/storage/QuiltBatchManager.d.ts +101 -1
  85. package/dist/services/storage/QuiltBatchManager.d.ts.map +1 -1
  86. package/dist/services/storage/QuiltBatchManager.js +410 -20
  87. package/dist/services/storage/QuiltBatchManager.js.map +1 -1
  88. package/dist/services/storage/index.d.ts +1 -1
  89. package/dist/services/storage/index.d.ts.map +1 -1
  90. package/dist/services/storage/index.js.map +1 -1
  91. package/dist/utils/LRUCache.d.ts +106 -0
  92. package/dist/utils/LRUCache.d.ts.map +1 -0
  93. package/dist/utils/LRUCache.js +281 -0
  94. package/dist/utils/LRUCache.js.map +1 -0
  95. package/dist/utils/index.d.ts +1 -0
  96. package/dist/utils/index.d.ts.map +1 -1
  97. package/dist/utils/index.js +2 -0
  98. package/dist/utils/index.js.map +1 -1
  99. package/dist/utils/memoryIndexOnChain.d.ts +212 -0
  100. package/dist/utils/memoryIndexOnChain.d.ts.map +1 -0
  101. package/dist/utils/memoryIndexOnChain.js +312 -0
  102. package/dist/utils/memoryIndexOnChain.js.map +1 -0
  103. package/dist/utils/rebuildIndexNode.d.ts +29 -0
  104. package/dist/utils/rebuildIndexNode.d.ts.map +1 -1
  105. package/dist/utils/rebuildIndexNode.js +366 -98
  106. package/dist/utils/rebuildIndexNode.js.map +1 -1
  107. package/dist/vector/HnswWasmService.d.ts +20 -5
  108. package/dist/vector/HnswWasmService.d.ts.map +1 -1
  109. package/dist/vector/HnswWasmService.js +73 -40
  110. package/dist/vector/HnswWasmService.js.map +1 -1
  111. package/dist/vector/IHnswService.d.ts +10 -1
  112. package/dist/vector/IHnswService.d.ts.map +1 -1
  113. package/dist/vector/IHnswService.js.map +1 -1
  114. package/dist/vector/NodeHnswService.d.ts +16 -0
  115. package/dist/vector/NodeHnswService.d.ts.map +1 -1
  116. package/dist/vector/NodeHnswService.js +84 -5
  117. package/dist/vector/NodeHnswService.js.map +1 -1
  118. package/dist/vector/createHnswService.d.ts +1 -1
  119. package/dist/vector/createHnswService.js +1 -1
  120. package/dist/vector/index.d.ts +1 -1
  121. package/dist/vector/index.js +1 -1
  122. package/package.json +157 -157
  123. package/src/access/PermissionService.ts +635 -635
  124. package/src/aggregation/AggregationService.ts +389 -389
  125. package/src/ai-sdk/PDWVectorStore.ts +715 -715
  126. package/src/ai-sdk/index.ts +65 -65
  127. package/src/ai-sdk/tools.ts +460 -460
  128. package/src/ai-sdk/types.ts +404 -404
  129. package/src/batch/BatchManager.ts +597 -597
  130. package/src/batch/BatchingService.ts +429 -429
  131. package/src/batch/MemoryProcessingCache.ts +492 -492
  132. package/src/batch/index.ts +30 -30
  133. package/src/browser.ts +200 -200
  134. package/src/client/ClientMemoryManager.ts +987 -987
  135. package/src/client/PersonalDataWallet.ts +345 -345
  136. package/src/client/SimplePDWClient.ts +1289 -1222
  137. package/src/client/factory.ts +154 -154
  138. package/src/client/namespaces/AnalyticsNamespace.ts +377 -377
  139. package/src/client/namespaces/BatchNamespace.ts +356 -356
  140. package/src/client/namespaces/CacheNamespace.ts +123 -123
  141. package/src/client/namespaces/CapabilityNamespace.ts +217 -217
  142. package/src/client/namespaces/ClassifyNamespace.ts +169 -169
  143. package/src/client/namespaces/ContextNamespace.ts +297 -297
  144. package/src/client/namespaces/EmbeddingsNamespace.ts +99 -99
  145. package/src/client/namespaces/EncryptionNamespace.ts +221 -221
  146. package/src/client/namespaces/GraphNamespace.ts +468 -468
  147. package/src/client/namespaces/IndexNamespace.ts +361 -361
  148. package/src/client/namespaces/MemoryNamespace.ts +1422 -1135
  149. package/src/client/namespaces/PermissionsNamespace.ts +254 -254
  150. package/src/client/namespaces/PipelineNamespace.ts +220 -220
  151. package/src/client/namespaces/SearchNamespace.ts +1049 -1049
  152. package/src/client/namespaces/StorageNamespace.ts +458 -458
  153. package/src/client/namespaces/TxNamespace.ts +260 -260
  154. package/src/client/namespaces/WalletNamespace.ts +243 -243
  155. package/src/client/namespaces/consolidated/AINamespace.ts +449 -449
  156. package/src/client/namespaces/consolidated/BlockchainNamespace.ts +607 -546
  157. package/src/client/namespaces/consolidated/SecurityNamespace.ts +648 -648
  158. package/src/client/namespaces/consolidated/StorageNamespace.ts +1141 -497
  159. package/src/client/namespaces/consolidated/index.ts +39 -39
  160. package/src/client/signers/KeypairSigner.ts +108 -108
  161. package/src/client/signers/UnifiedSigner.ts +110 -110
  162. package/src/client/signers/WalletAdapterSigner.ts +159 -159
  163. package/src/client/signers/index.ts +26 -26
  164. package/src/config/ConfigurationHelper.ts +412 -412
  165. package/src/config/defaults.ts +51 -51
  166. package/src/config/index.ts +8 -8
  167. package/src/config/validation.ts +70 -70
  168. package/src/core/index.ts +14 -14
  169. package/src/core/interfaces/IService.ts +307 -307
  170. package/src/core/interfaces/index.ts +8 -8
  171. package/src/core/types/capability.ts +297 -297
  172. package/src/core/types/index.ts +870 -870
  173. package/src/core/types/wallet.ts +270 -270
  174. package/src/core/types.ts +9 -9
  175. package/src/core/wallet.ts +222 -222
  176. package/src/embedding/index.ts +19 -19
  177. package/src/embedding/types.ts +357 -357
  178. package/src/errors/index.ts +602 -602
  179. package/src/errors/recovery.ts +461 -461
  180. package/src/errors/validation.ts +567 -567
  181. package/src/generated/pdw/capability.ts +319 -319
  182. package/src/graph/GraphService.ts +887 -887
  183. package/src/graph/KnowledgeGraphManager.ts +728 -728
  184. package/src/graph/index.ts +25 -25
  185. package/src/index.ts +498 -474
  186. package/src/infrastructure/index.ts +22 -22
  187. package/src/infrastructure/seal/EncryptionService.ts +628 -603
  188. package/src/infrastructure/seal/SealService.ts +613 -615
  189. package/src/infrastructure/seal/index.ts +9 -9
  190. package/src/infrastructure/sui/BlockchainManager.ts +627 -627
  191. package/src/infrastructure/sui/SuiService.ts +888 -888
  192. package/src/infrastructure/sui/index.ts +9 -9
  193. package/src/infrastructure/walrus/StorageManager.ts +604 -604
  194. package/src/infrastructure/walrus/WalrusStorageService.ts +612 -612
  195. package/src/infrastructure/walrus/index.ts +9 -9
  196. package/src/langchain/PDWEmbeddings.ts +145 -145
  197. package/src/langchain/PDWVectorStore.ts +456 -456
  198. package/src/langchain/createPDWRAG.ts +303 -303
  199. package/src/langchain/index.ts +47 -47
  200. package/src/permissions/ConsentRepository.browser.ts +249 -249
  201. package/src/permissions/ConsentRepository.ts +364 -364
  202. package/src/pipeline/MemoryPipeline.ts +862 -862
  203. package/src/pipeline/PipelineManager.ts +683 -683
  204. package/src/pipeline/index.ts +26 -26
  205. package/src/retrieval/AdvancedSearchService.ts +629 -629
  206. package/src/retrieval/MemoryAnalyticsService.ts +711 -711
  207. package/src/retrieval/MemoryDecryptionPipeline.ts +825 -824
  208. package/src/retrieval/MemoryRetrievalService.ts +904 -830
  209. package/src/retrieval/index.ts +42 -42
  210. package/src/services/BatchService.ts +352 -352
  211. package/src/services/CapabilityService.ts +464 -448
  212. package/src/services/ClassifierService.ts +465 -465
  213. package/src/services/CrossContextPermissionService.ts +486 -484
  214. package/src/services/EmbeddingService.ts +771 -706
  215. package/src/services/EncryptionService.ts +712 -711
  216. package/src/services/GeminiAIService.ts +753 -753
  217. package/src/services/IndexManager.ts +977 -1004
  218. package/src/services/MemoryIndexService.ts +1003 -1003
  219. package/src/services/MemoryService.ts +369 -369
  220. package/src/services/QueryService.ts +890 -890
  221. package/src/services/StorageService.ts +1182 -1111
  222. package/src/services/TransactionService.ts +838 -790
  223. package/src/services/VectorService.ts +462 -462
  224. package/src/services/ViewService.ts +484 -484
  225. package/src/services/index.ts +25 -25
  226. package/src/services/storage/BlobAttributesManager.ts +333 -333
  227. package/src/services/storage/KnowledgeGraphManager.ts +425 -425
  228. package/src/services/storage/MemorySearchManager.ts +387 -387
  229. package/src/services/storage/QuiltBatchManager.ts +1130 -660
  230. package/src/services/storage/WalrusMetadataManager.ts +268 -268
  231. package/src/services/storage/WalrusStorageManager.ts +287 -287
  232. package/src/services/storage/index.ts +57 -52
  233. package/src/types/index.ts +13 -13
  234. package/src/utils/LRUCache.ts +378 -0
  235. package/src/utils/index.ts +76 -68
  236. package/src/utils/memoryIndexOnChain.ts +507 -0
  237. package/src/utils/rebuildIndex.ts +290 -290
  238. package/src/utils/rebuildIndexNode.ts +771 -424
  239. package/src/vector/BrowserHnswIndexService.ts +758 -758
  240. package/src/vector/HnswWasmService.ts +731 -679
  241. package/src/vector/IHnswService.ts +233 -224
  242. package/src/vector/NodeHnswService.ts +833 -735
  243. package/src/vector/VectorManager.ts +478 -478
  244. package/src/vector/createHnswService.ts +135 -135
  245. package/src/vector/index.ts +56 -56
  246. package/src/wallet/ContextWalletService.ts +656 -656
  247. package/src/wallet/MainWalletService.ts +317 -317
@@ -1,679 +1,731 @@
1
- /**
2
- * HnswWasmService - Browser-Compatible HNSW Vector Indexing
3
- *
4
- * Provides browser-compatible Hierarchical Navigable Small World (HNSW) vector indexing
5
- * using hnswlib-wasm with IndexedDB persistence. Replaces Node.js-only hnswlib-node.
6
- *
7
- * Key Features:
8
- * - ✅ Runs in browsers (WebAssembly)
9
- * - ✅ IndexedDB persistence (no filesystem needed)
10
- * - ✅ Intelligent batching and caching
11
- * - ✅ Walrus storage integration
12
- * - ✅ Near-native performance via WASM
13
- * - ✅ Safe for Node.js/SSR (uses dynamic import)
14
- */
15
-
16
- // Dynamic import for hnswlib-wasm to avoid bundling issues in Node.js
17
- // Types defined locally to avoid static import issues
18
- type HierarchicalNSW = any;
19
- type HnswlibModule = any;
20
-
21
- /**
22
- * Helper to dynamically load hnswlib-wasm (browser only)
23
- */
24
- async function loadHnswlibDynamic(): Promise<HnswlibModule> {
25
- const module = await import('hnswlib-wasm/dist/hnswlib.js');
26
- return module.loadHnswlib();
27
- }
28
- import { StorageService, type MemoryMetadata } from '../services/StorageService';
29
- import {
30
- HNSWIndexConfig,
31
- HNSWSearchResult,
32
- HNSWSearchOptions,
33
- BatchConfig,
34
- BatchJob,
35
- BatchStats,
36
- VectorError
37
- } from '../embedding/types';
38
-
39
- interface IndexCacheEntry {
40
- index: HierarchicalNSW;
41
- lastModified: Date;
42
- pendingVectors: Map<number, number[]>; // vectorId -> vector
43
- isDirty: boolean;
44
- version: number;
45
- metadata: Map<number, any>; // vectorId -> metadata
46
- dimensions: number;
47
- }
48
-
49
- interface IndexMetadata {
50
- dimension: number;
51
- maxElements: number;
52
- efConstruction: number;
53
- m: number;
54
- spaceType: string;
55
- version: number;
56
- createdAt: Date;
57
- lastUpdated: Date;
58
- }
59
-
60
- /**
61
- * Browser-compatible HNSW vector indexing service using WebAssembly
62
- * Drop-in replacement for HnswIndexService with identical API
63
- */
64
- export class HnswWasmService {
65
- private hnswlib: HnswlibModule | null = null;
66
- private readonly indexCache = new Map<string, IndexCacheEntry>();
67
- private readonly batchJobs = new Map<string, BatchJob>();
68
- private readonly config: Required<BatchConfig>;
69
- private readonly indexConfig: Required<HNSWIndexConfig>;
70
- private batchProcessor?: ReturnType<typeof setInterval>;
71
- private cacheCleanup?: ReturnType<typeof setInterval>;
72
- private initPromise: Promise<void> | null = null;
73
-
74
- constructor(
75
- private storageService: StorageService,
76
- indexConfig: Partial<HNSWIndexConfig> = {},
77
- batchConfig: Partial<BatchConfig> = {}
78
- ) {
79
- // Default HNSW configuration (matching HnswIndexService)
80
- this.indexConfig = {
81
- dimension: indexConfig.dimension || 3072,
82
- maxElements: indexConfig.maxElements || 10000,
83
- efConstruction: indexConfig.efConstruction || 200,
84
- m: indexConfig.m || 16,
85
- randomSeed: indexConfig.randomSeed || 42,
86
- spaceType: indexConfig.spaceType || 'cosine'
87
- };
88
-
89
- // Default batch configuration
90
- this.config = {
91
- maxBatchSize: batchConfig.maxBatchSize || 50,
92
- batchDelayMs: batchConfig.batchDelayMs || 5000,
93
- maxCacheSize: batchConfig.maxCacheSize || 100,
94
- cacheTtlMs: batchConfig.cacheTtlMs || 30 * 60 * 1000 // 30 minutes
95
- };
96
-
97
- // Initialize WASM library asynchronously
98
- this.initPromise = this.initialize();
99
- }
100
-
101
- /**
102
- * Initialize hnswlib-wasm (must be called before use)
103
- */
104
- private async initialize(): Promise<void> {
105
- try {
106
- console.log('🔧 Loading hnswlib-wasm...');
107
- this.hnswlib = await loadHnswlibDynamic();
108
- console.log('✅ hnswlib-wasm loaded successfully');
109
-
110
- // Start background processors
111
- this.startBatchProcessor();
112
- this.startCacheCleanup();
113
- } catch (error) {
114
- console.error('❌ Failed to load hnswlib-wasm:', error);
115
- throw error;
116
- }
117
- }
118
-
119
- /**
120
- * Ensure WASM library is loaded
121
- */
122
- private async ensureInitialized(): Promise<void> {
123
- if (this.initPromise) {
124
- await this.initPromise;
125
- }
126
- if (!this.hnswlib) {
127
- throw new Error('hnswlib-wasm not initialized');
128
- }
129
- }
130
-
131
- /**
132
- * Create a new HNSW index
133
- */
134
- async createIndex(
135
- userAddress: string,
136
- options: Partial<HNSWIndexConfig> = {}
137
- ): Promise<{ index: HierarchicalNSW; serialized: Uint8Array }> {
138
- await this.ensureInitialized();
139
-
140
- try {
141
- const config = { ...this.indexConfig, ...options };
142
-
143
- console.log(`🔨 Creating new HNSW index for user ${userAddress}`);
144
- console.log(` Dimensions: ${config.dimension}, M: ${config.m}, efConstruction: ${config.efConstruction}`);
145
-
146
- // Create a new index using WASM (constructor takes: spaceName, numDimensions, autoSaveFilename)
147
- const index = new this.hnswlib!.HierarchicalNSW(config.spaceType, config.dimension, '');
148
- index.initIndex(config.maxElements, config.m, config.efConstruction, config.randomSeed);
149
-
150
- // Create cache entry
151
- this.indexCache.set(userAddress, {
152
- index,
153
- lastModified: new Date(),
154
- pendingVectors: new Map(),
155
- isDirty: false,
156
- version: 1,
157
- metadata: new Map(),
158
- dimensions: config.dimension
159
- });
160
-
161
- // Serialize the empty index
162
- const indexName = `index_${userAddress}_${Date.now()}`;
163
- await index.writeIndex(indexName);
164
-
165
- // Sync to IndexedDB (persist the index) - syncFS requires a callback
166
- await this.hnswlib!.EmscriptenFileSystemManager.syncFS(false, () => {});
167
-
168
- // Read serialized data from filesystem for returning
169
- const serialized = new Uint8Array(0); // Placeholder - actual data is in IndexedDB
170
-
171
- console.log(`✅ Index created successfully for ${userAddress}`);
172
-
173
- return { index, serialized };
174
- } catch (error) {
175
- throw this.createVectorError('INDEX_ERROR', `Failed to create index: ${error}`, error);
176
- }
177
- }
178
-
179
- /**
180
- * Add vector to index with batching (main entry point)
181
- */
182
- addVectorToIndexBatched(
183
- userAddress: string,
184
- vectorId: number,
185
- vector: number[],
186
- metadata?: any
187
- ): void {
188
- try {
189
- // Validate input
190
- this.validateVector(vector);
191
-
192
- // Get or create cache entry
193
- let cacheEntry = this.indexCache.get(userAddress);
194
- if (!cacheEntry) {
195
- console.warn(`No cached index found for user ${userAddress}, will create on first flush`);
196
- // Create placeholder entry - actual index created on first flush
197
- cacheEntry = {
198
- index: null as any, // Will be created on flush
199
- lastModified: new Date(),
200
- pendingVectors: new Map(),
201
- isDirty: true,
202
- version: 1,
203
- metadata: new Map(),
204
- dimensions: vector.length
205
- };
206
- this.indexCache.set(userAddress, cacheEntry);
207
- }
208
-
209
- // Validate vector dimensions
210
- if (cacheEntry.dimensions && vector.length !== cacheEntry.dimensions) {
211
- throw new Error(`Vector dimension mismatch: expected ${cacheEntry.dimensions}, got ${vector.length}`);
212
- }
213
-
214
- // Add to pending queue
215
- cacheEntry.pendingVectors.set(vectorId, vector);
216
- if (metadata) {
217
- cacheEntry.metadata.set(vectorId, metadata);
218
- }
219
- cacheEntry.isDirty = true;
220
- cacheEntry.lastModified = new Date();
221
-
222
- // Schedule or update batch job
223
- this.scheduleBatchJob(userAddress, vectorId, vector);
224
-
225
- console.debug(`📊 Vector ${vectorId} queued for batch processing. Pending: ${cacheEntry.pendingVectors.size}`);
226
-
227
- // Process immediately if batch size limit reached
228
- if (cacheEntry.pendingVectors.size >= this.config.maxBatchSize) {
229
- console.log(`⚡ Batch size limit reached (${this.config.maxBatchSize}), processing immediately`);
230
- setTimeout(() => this.flushPendingVectors(userAddress), 0);
231
- }
232
- } catch (error) {
233
- throw this.createVectorError('INDEX_ERROR', `Failed to queue vector: ${error}`, error);
234
- }
235
- }
236
-
237
- /**
238
- * Search vectors in the index (including pending vectors)
239
- */
240
- async searchVectors(
241
- userAddress: string,
242
- queryVector: number[],
243
- options: HNSWSearchOptions = {}
244
- ): Promise<HNSWSearchResult> {
245
- await this.ensureInitialized();
246
-
247
- try {
248
- this.validateVector(queryVector);
249
-
250
- const cacheEntry = this.indexCache.get(userAddress);
251
- if (!cacheEntry?.index) {
252
- throw new Error(`No index found for user ${userAddress}`);
253
- }
254
-
255
- const { k = 10, efSearch = 50, filter } = options;
256
-
257
- // Set search parameters
258
- cacheEntry.index.setEfSearch(efSearch);
259
-
260
- let searchIndex = cacheEntry.index;
261
-
262
- // If there are pending vectors, flush them first
263
- if (cacheEntry.pendingVectors.size > 0) {
264
- console.log(`⏳ Flushing ${cacheEntry.pendingVectors.size} pending vectors before search`);
265
- await this.flushPendingVectors(userAddress);
266
- // Get updated index
267
- const updatedEntry = this.indexCache.get(userAddress);
268
- if (updatedEntry?.index) {
269
- searchIndex = updatedEntry.index;
270
- }
271
- }
272
-
273
- // Perform search (convert to Float32Array if needed)
274
- const queryFloat32 = queryVector instanceof Float32Array
275
- ? queryVector
276
- : new Float32Array(queryVector);
277
- const result = searchIndex.searchKnn(
278
- queryFloat32,
279
- k,
280
- filter && typeof filter === 'function' ? (filter as (label: number) => boolean) : undefined
281
- );
282
-
283
- // Apply metadata filter if provided (additional filtering)
284
- let filteredIds = result.neighbors;
285
- let filteredDistances = result.distances;
286
-
287
- if (filter && typeof filter === 'function') {
288
- const filtered = this.applyMetadataFilter(
289
- result.neighbors,
290
- result.distances,
291
- cacheEntry.metadata,
292
- filter as (metadata: any) => boolean
293
- );
294
- filteredIds = filtered.ids;
295
- filteredDistances = filtered.distances;
296
- }
297
-
298
- // Convert distances to similarities (for cosine distance)
299
- const similarities = this.indexConfig.spaceType === 'cosine'
300
- ? filteredDistances.map((dist: number) => 1 - dist)
301
- : filteredDistances.map((dist: number) => 1 / (1 + dist));
302
-
303
- console.log(`🔍 Search completed: ${filteredIds.length} results found`);
304
-
305
- return {
306
- ids: filteredIds,
307
- distances: filteredDistances,
308
- similarities
309
- };
310
- } catch (error) {
311
- throw this.createVectorError('SEARCH_ERROR', `Search failed: ${error}`, error);
312
- }
313
- }
314
-
315
- /**
316
- * Load index from Walrus storage
317
- */
318
- async loadIndex(blobId: string, userAddress: string): Promise<HierarchicalNSW> {
319
- await this.ensureInitialized();
320
-
321
- try {
322
- console.log(`📥 Loading HNSW index from Walrus: ${blobId}`);
323
-
324
- // Download index from Walrus
325
- const retrieveResult = await this.storageService.retrieve(blobId);
326
- const indexBuffer = retrieveResult.content;
327
-
328
- // Save to Emscripten virtual filesystem
329
- const indexName = `index_${userAddress}_${Date.now()}`;
330
- (this.hnswlib!.EmscriptenFileSystemManager as any).writeFile(indexName, indexBuffer);
331
-
332
- // Sync from IndexedDB (load the data into memory)
333
- await this.hnswlib!.EmscriptenFileSystemManager.syncFS(true, () => {});
334
-
335
- // Create and load index (constructor: spaceName, numDimensions, autoSaveFilename)
336
- const index = new this.hnswlib!.HierarchicalNSW(this.indexConfig.spaceType, this.indexConfig.dimension, '');
337
- await index.readIndex(indexName, this.indexConfig.maxElements);
338
-
339
- // Cache the loaded index
340
- this.indexCache.set(userAddress, {
341
- index,
342
- lastModified: new Date(),
343
- pendingVectors: new Map(),
344
- isDirty: false,
345
- version: 1,
346
- metadata: new Map(),
347
- dimensions: this.indexConfig.dimension
348
- });
349
-
350
- console.log(`✅ Index loaded successfully for ${userAddress}`);
351
- return index;
352
- } catch (error) {
353
- throw this.createVectorError('STORAGE_ERROR', `Failed to load index: ${error}`, error);
354
- }
355
- }
356
-
357
- /**
358
- * Save index to Walrus storage
359
- */
360
- async saveIndex(userAddress: string): Promise<string> {
361
- await this.ensureInitialized();
362
-
363
- try {
364
- const cacheEntry = this.indexCache.get(userAddress);
365
- if (!cacheEntry?.index) {
366
- throw new Error(`No index found for user ${userAddress}`);
367
- }
368
-
369
- console.log(`💾 Saving index to Walrus for ${userAddress}`);
370
-
371
- return await this.saveIndexToWalrus(cacheEntry.index, userAddress);
372
- } catch (error) {
373
- throw this.createVectorError('STORAGE_ERROR', `Failed to save index: ${error}`, error);
374
- }
375
- }
376
-
377
- /**
378
- * Force flush all pending vectors for a user
379
- */
380
- async forceFlush(userAddress: string): Promise<void> {
381
- await this.flushPendingVectors(userAddress);
382
- }
383
-
384
- /**
385
- * Get cache statistics
386
- */
387
- getCacheStats(): BatchStats {
388
- const cacheEntries: any[] = [];
389
- let totalPendingVectors = 0;
390
-
391
- for (const [userAddress, entry] of this.indexCache.entries()) {
392
- const pendingCount = entry.pendingVectors.size;
393
- totalPendingVectors += pendingCount;
394
-
395
- cacheEntries.push({
396
- userAddress,
397
- pendingVectors: pendingCount,
398
- lastModified: entry.lastModified,
399
- isDirty: entry.isDirty,
400
- indexDimensions: entry.dimensions
401
- });
402
- }
403
-
404
- return {
405
- totalUsers: this.indexCache.size,
406
- totalPendingVectors,
407
- activeBatchJobs: this.batchJobs.size,
408
- cacheHitRate: 0, // TODO: Implement hit rate tracking
409
- averageBatchSize: totalPendingVectors / Math.max(1, this.indexCache.size),
410
- averageProcessingTime: 0 // TODO: Implement timing tracking
411
- };
412
- }
413
-
414
- /**
415
- * Remove a vector from the index
416
- */
417
- removeVector(userAddress: string, vectorId: number): void {
418
- try {
419
- const cacheEntry = this.indexCache.get(userAddress);
420
- if (!cacheEntry?.index) {
421
- throw new Error(`No index found for user ${userAddress}`);
422
- }
423
-
424
- // Remove from pending vectors if exists
425
- cacheEntry.pendingVectors.delete(vectorId);
426
- cacheEntry.metadata.delete(vectorId);
427
-
428
- // Note: hnswlib-wasm doesn't support deletion, mark for rebuild
429
- cacheEntry.isDirty = true;
430
- cacheEntry.lastModified = new Date();
431
-
432
- console.log(`🗑️ Vector ${vectorId} removed from index`);
433
- } catch (error) {
434
- throw this.createVectorError('INDEX_ERROR', `Failed to remove vector: ${error}`, error);
435
- }
436
- }
437
-
438
- /**
439
- * Clear user index and cache
440
- */
441
- clearUserIndex(userAddress: string): void {
442
- this.indexCache.delete(userAddress);
443
- this.batchJobs.delete(userAddress);
444
- console.log(`🧹 Cleared index cache for user ${userAddress}`);
445
- }
446
-
447
- /**
448
- * Cleanup resources
449
- */
450
- destroy(): void {
451
- if (this.batchProcessor) {
452
- clearInterval(this.batchProcessor);
453
- }
454
- if (this.cacheCleanup) {
455
- clearInterval(this.cacheCleanup);
456
- }
457
- this.indexCache.clear();
458
- this.batchJobs.clear();
459
- console.log('🛑 HnswWasmService destroyed');
460
- }
461
-
462
- // ==================== PRIVATE METHODS ====================
463
-
464
- private async createCacheEntry(dimensions: number): Promise<IndexCacheEntry> {
465
- await this.ensureInitialized();
466
-
467
- const index = new this.hnswlib!.HierarchicalNSW(this.indexConfig.spaceType, dimensions, '');
468
- index.initIndex(this.indexConfig.maxElements, this.indexConfig.m, this.indexConfig.efConstruction, this.indexConfig.randomSeed);
469
-
470
- return {
471
- index,
472
- lastModified: new Date(),
473
- pendingVectors: new Map(),
474
- isDirty: false,
475
- version: 1,
476
- metadata: new Map(),
477
- dimensions
478
- };
479
- }
480
-
481
- private scheduleBatchJob(userAddress: string, vectorId: number, vector: number[]): void {
482
- let batchJob = this.batchJobs.get(userAddress);
483
- if (!batchJob) {
484
- batchJob = {
485
- userAddress,
486
- vectors: new Map(),
487
- scheduledAt: new Date()
488
- };
489
- this.batchJobs.set(userAddress, batchJob);
490
- }
491
-
492
- batchJob.vectors.set(vectorId, vector);
493
- }
494
-
495
- private startBatchProcessor(): void {
496
- this.batchProcessor = setInterval(async () => {
497
- await this.processBatchJobs();
498
- }, this.config.batchDelayMs);
499
- }
500
-
501
- private startCacheCleanup(): void {
502
- this.cacheCleanup = setInterval(() => {
503
- this.cleanupCache();
504
- }, 5 * 60 * 1000); // Every 5 minutes
505
- }
506
-
507
- private async processBatchJobs(): Promise<void> {
508
- const now = Date.now();
509
- const jobsToProcess: string[] = [];
510
-
511
- for (const [userAddress, job] of this.batchJobs.entries()) {
512
- const timeSinceScheduled = now - job.scheduledAt.getTime();
513
- const cacheEntry = this.indexCache.get(userAddress);
514
-
515
- if (timeSinceScheduled >= this.config.batchDelayMs ||
516
- (cacheEntry && cacheEntry.pendingVectors.size >= this.config.maxBatchSize)) {
517
- jobsToProcess.push(userAddress);
518
- }
519
- }
520
-
521
- for (const userAddress of jobsToProcess) {
522
- try {
523
- await this.flushPendingVectors(userAddress);
524
- } catch (error) {
525
- console.error(`❌ Error processing batch job for user ${userAddress}:`, error);
526
- }
527
- }
528
- }
529
-
530
- private async flushPendingVectors(userAddress: string): Promise<void> {
531
- await this.ensureInitialized();
532
-
533
- const cacheEntry = this.indexCache.get(userAddress);
534
- if (!cacheEntry || cacheEntry.pendingVectors.size === 0) {
535
- return;
536
- }
537
-
538
- console.log(`⚡ Flushing ${cacheEntry.pendingVectors.size} pending vectors for user ${userAddress}`);
539
-
540
- try {
541
- // Create index if it doesn't exist
542
- if (!cacheEntry.index) {
543
- const newEntry = await this.createCacheEntry(cacheEntry.dimensions);
544
- cacheEntry.index = newEntry.index;
545
- }
546
-
547
- // Prepare vectors array for batch insertion
548
- const vectors: number[][] = [];
549
- const labels: number[] = [];
550
-
551
- for (const [vectorId, vector] of cacheEntry.pendingVectors.entries()) {
552
- vectors.push(vector);
553
- labels.push(vectorId);
554
- }
555
-
556
- // Add all pending vectors to the index in batch
557
- if (vectors.length > 0) {
558
- // Convert to Float32Array[] as required by hnswlib-wasm
559
- const float32Vectors = vectors.map(v =>
560
- v instanceof Float32Array ? v : new Float32Array(v)
561
- );
562
- cacheEntry.index.addItems(float32Vectors, true);
563
- }
564
-
565
- // Save to Walrus
566
- await this.saveIndexToWalrus(cacheEntry.index, userAddress);
567
-
568
- // Clear pending vectors
569
- cacheEntry.pendingVectors.clear();
570
- cacheEntry.isDirty = false;
571
- cacheEntry.lastModified = new Date();
572
- cacheEntry.version++;
573
-
574
- // Remove batch job
575
- this.batchJobs.delete(userAddress);
576
-
577
- console.log(`✅ Successfully flushed vectors for user ${userAddress} (version ${cacheEntry.version})`);
578
- } catch (error) {
579
- console.error(`❌ Error flushing vectors for user ${userAddress}:`, error);
580
- throw error;
581
- }
582
- }
583
-
584
- private async saveIndexToWalrus(index: HierarchicalNSW, userAddress: string): Promise<string> {
585
- await this.ensureInitialized();
586
-
587
- try {
588
- // Serialize index to Emscripten filesystem
589
- const indexName = `index_${userAddress}_${Date.now()}`;
590
- await index.writeIndex(indexName);
591
-
592
- // Sync to IndexedDB
593
- await this.hnswlib!.EmscriptenFileSystemManager.syncFS(false, () => {});
594
-
595
- // Read serialized data from filesystem
596
- const serialized = (this.hnswlib!.EmscriptenFileSystemManager as any).readFile(indexName) as Uint8Array;
597
-
598
- // Upload to Walrus via StorageService
599
- const metadata: MemoryMetadata = {
600
- contentType: 'application/hnsw-index-wasm',
601
- contentSize: serialized.byteLength,
602
- contentHash: '', // TODO: Calculate hash
603
- category: 'vector-index',
604
- topic: 'hnsw-wasm',
605
- importance: 8,
606
- embeddingDimension: this.indexConfig.dimension,
607
- createdTimestamp: Date.now(),
608
- customMetadata: {
609
- 'user-address': userAddress,
610
- 'version': '1.0',
611
- 'wasm': 'true'
612
- }
613
- };
614
-
615
- const result = await this.storageService.upload(serialized, metadata);
616
-
617
- console.log(`💾 Index saved to Walrus: ${result.blobId}`);
618
- return result.blobId;
619
- } catch (error) {
620
- console.error('❌ Failed to save index to Walrus:', error);
621
- throw error;
622
- }
623
- }
624
-
625
- private applyMetadataFilter(
626
- ids: number[],
627
- distances: number[],
628
- metadata: Map<number, any>,
629
- filter: (metadata: any) => boolean
630
- ): { ids: number[]; distances: number[] } {
631
- const filteredIds: number[] = [];
632
- const filteredDistances: number[] = [];
633
-
634
- for (let i = 0; i < ids.length; i++) {
635
- const vectorId = ids[i];
636
- const vectorMetadata = metadata.get(vectorId);
637
-
638
- if (!vectorMetadata || filter(vectorMetadata)) {
639
- filteredIds.push(vectorId);
640
- filteredDistances.push(distances[i]);
641
- }
642
- }
643
-
644
- return { ids: filteredIds, distances: filteredDistances };
645
- }
646
-
647
- private cleanupCache(): void {
648
- const now = Date.now();
649
- for (const [userAddress, entry] of this.indexCache.entries()) {
650
- if (now - entry.lastModified.getTime() > this.config.cacheTtlMs) {
651
- console.debug(`🧹 Removing stale cache entry for user ${userAddress}`);
652
- this.indexCache.delete(userAddress);
653
- }
654
- }
655
- }
656
-
657
- private validateVector(vector: number[]): void {
658
- if (!Array.isArray(vector) || vector.length === 0) {
659
- throw new Error('Vector must be a non-empty array');
660
- }
661
-
662
- if (vector.some(v => typeof v !== 'number' || !isFinite(v))) {
663
- throw new Error('Vector must contain only finite numbers');
664
- }
665
-
666
- if (vector.length !== this.indexConfig.dimension) {
667
- throw new Error(`Vector dimension mismatch: expected ${this.indexConfig.dimension}, got ${vector.length}`);
668
- }
669
- }
670
-
671
- private createVectorError(code: VectorError['code'], message: string, details?: any): VectorError {
672
- const error = new Error(message) as VectorError;
673
- error.code = code;
674
- error.details = details;
675
- return error;
676
- }
677
- }
678
-
679
- export default HnswWasmService;
1
+ /**
2
+ * HnswWasmService - Browser-Compatible HNSW Vector Indexing
3
+ *
4
+ * Provides browser-compatible Hierarchical Navigable Small World (HNSW) vector indexing
5
+ * using hnswlib-wasm with IndexedDB persistence. Replaces Node.js-only hnswlib-node.
6
+ *
7
+ * Key Features:
8
+ * - ✅ Runs in browsers (WebAssembly)
9
+ * - ✅ IndexedDB persistence (no filesystem needed)
10
+ * - ✅ Intelligent batching and caching
11
+ * - ✅ Walrus storage integration
12
+ * - ✅ Near-native performance via WASM
13
+ * - ✅ Safe for Node.js/SSR (uses dynamic import)
14
+ * - ✅ LRU cache with memory limits to prevent OOM
15
+ */
16
+
17
+ // Dynamic import for hnswlib-wasm to avoid bundling issues in Node.js
18
+ // Types defined locally to avoid static import issues
19
+ type HierarchicalNSW = any;
20
+ type HnswlibModule = any;
21
+
22
+ /**
23
+ * Helper to dynamically load hnswlib-wasm (browser only)
24
+ */
25
+ async function loadHnswlibDynamic(): Promise<HnswlibModule> {
26
+ const module = await import('hnswlib-wasm/dist/hnswlib.js');
27
+ return module.loadHnswlib();
28
+ }
29
+ import { StorageService, type MemoryMetadata } from '../services/StorageService';
30
+ import {
31
+ HNSWIndexConfig,
32
+ HNSWSearchResult,
33
+ HNSWSearchOptions,
34
+ BatchConfig,
35
+ BatchJob,
36
+ BatchStats,
37
+ VectorError
38
+ } from '../embedding/types';
39
+ import { LRUCache, estimateIndexCacheSize } from '../utils/LRUCache';
40
+
41
+ interface IndexCacheEntry {
42
+ index: HierarchicalNSW;
43
+ lastModified: Date;
44
+ pendingVectors: Map<number, number[]>; // vectorId -> vector
45
+ isDirty: boolean;
46
+ version: number;
47
+ metadata: Map<number, any>; // vectorId -> metadata
48
+ dimensions: number;
49
+ /** Cached vectors for serialization - only store if needed */
50
+ vectors: Map<number, number[]>;
51
+ }
52
+
53
+ interface IndexMetadata {
54
+ dimension: number;
55
+ maxElements: number;
56
+ efConstruction: number;
57
+ m: number;
58
+ spaceType: string;
59
+ version: number;
60
+ createdAt: Date;
61
+ lastUpdated: Date;
62
+ }
63
+
64
+ // Memory management constants
65
+ const DEFAULT_MAX_CACHED_INDEXES = 5; // Max number of user indexes to keep in memory
66
+ const DEFAULT_INDEX_TTL_MS = 10 * 60 * 1000; // 10 minutes TTL for idle indexes
67
+ const DEFAULT_MAX_MEMORY_MB = 512; // 512MB max memory for index cache
68
+ const DEFAULT_CLEANUP_INTERVAL_MS = 60 * 1000; // Check every 1 minute
69
+
70
+ /**
71
+ * Browser-compatible HNSW vector indexing service using WebAssembly
72
+ * Drop-in replacement for HnswIndexService with identical API
73
+ *
74
+ * Memory Management:
75
+ * - LRU cache limits number of indexes in memory (default: 5)
76
+ * - TTL-based expiration for idle indexes (default: 10 minutes)
77
+ * - Optional memory limit (default: 512MB)
78
+ * - Automatic cleanup of expired/evicted indexes
79
+ */
80
+ export class HnswWasmService {
81
+ private hnswlib: HnswlibModule | null = null;
82
+ private readonly indexCache: LRUCache<IndexCacheEntry>;
83
+ private readonly batchJobs = new Map<string, BatchJob>();
84
+ private readonly config: Required<BatchConfig>;
85
+ private readonly indexConfig: Required<HNSWIndexConfig>;
86
+ private batchProcessor?: ReturnType<typeof setInterval>;
87
+ private initPromise: Promise<void> | null = null;
88
+
89
+ // Memory management settings
90
+ private readonly maxCachedIndexes: number;
91
+ private readonly indexTtlMs: number;
92
+ private readonly maxMemoryBytes: number;
93
+
94
+ constructor(
95
+ private storageService: StorageService,
96
+ indexConfig: Partial<HNSWIndexConfig> = {},
97
+ batchConfig: Partial<BatchConfig> = {},
98
+ memoryConfig?: {
99
+ maxCachedIndexes?: number;
100
+ indexTtlMs?: number;
101
+ maxMemoryMB?: number;
102
+ }
103
+ ) {
104
+ // Default HNSW configuration (matching HnswIndexService)
105
+ this.indexConfig = {
106
+ dimension: indexConfig.dimension || 3072,
107
+ maxElements: indexConfig.maxElements || 10000,
108
+ efConstruction: indexConfig.efConstruction || 200,
109
+ m: indexConfig.m || 16,
110
+ randomSeed: indexConfig.randomSeed || 42,
111
+ spaceType: indexConfig.spaceType || 'cosine'
112
+ };
113
+
114
+ // Default batch configuration
115
+ this.config = {
116
+ maxBatchSize: batchConfig.maxBatchSize || 50,
117
+ batchDelayMs: batchConfig.batchDelayMs || 5000,
118
+ maxCacheSize: batchConfig.maxCacheSize || 100,
119
+ cacheTtlMs: batchConfig.cacheTtlMs || 30 * 60 * 1000 // 30 minutes
120
+ };
121
+
122
+ // Memory management configuration
123
+ this.maxCachedIndexes = memoryConfig?.maxCachedIndexes ?? DEFAULT_MAX_CACHED_INDEXES;
124
+ this.indexTtlMs = memoryConfig?.indexTtlMs ?? DEFAULT_INDEX_TTL_MS;
125
+ this.maxMemoryBytes = (memoryConfig?.maxMemoryMB ?? DEFAULT_MAX_MEMORY_MB) * 1024 * 1024;
126
+
127
+ // Initialize LRU cache with memory limits
128
+ this.indexCache = new LRUCache<IndexCacheEntry>({
129
+ maxSize: this.maxCachedIndexes,
130
+ ttlMs: this.indexTtlMs,
131
+ cleanupIntervalMs: DEFAULT_CLEANUP_INTERVAL_MS,
132
+ maxMemoryBytes: this.maxMemoryBytes,
133
+ sizeEstimator: (entry) => estimateIndexCacheSize({
134
+ vectors: entry.vectors || new Map(),
135
+ metadata: entry.metadata,
136
+ pendingVectors: entry.pendingVectors,
137
+ }),
138
+ onEvict: (userAddress, entry, reason) => {
139
+ console.log(`🧹 [HnswWasmService] Evicting index for ${userAddress} (reason: ${reason})`);
140
+ // Dispose WASM resources
141
+ if (entry.index) {
142
+ try {
143
+ if (typeof entry.index.free === 'function') {
144
+ entry.index.free();
145
+ }
146
+ } catch (e) {
147
+ // Ignore cleanup errors
148
+ }
149
+ }
150
+ // Remove associated batch job
151
+ this.batchJobs.delete(userAddress);
152
+ },
153
+ });
154
+
155
+ console.log(`✅ HnswWasmService initialized with memory limits:`);
156
+ console.log(` Max indexes: ${this.maxCachedIndexes}, TTL: ${this.indexTtlMs / 1000}s, Max memory: ${this.maxMemoryBytes / 1024 / 1024}MB`);
157
+
158
+ // Initialize WASM library asynchronously
159
+ this.initPromise = this.initialize();
160
+ }
161
+
162
+ /**
163
+ * Initialize hnswlib-wasm (must be called before use)
164
+ */
165
+ private async initialize(): Promise<void> {
166
+ try {
167
+ console.log('🔧 Loading hnswlib-wasm...');
168
+ this.hnswlib = await loadHnswlibDynamic();
169
+ console.log('✅ hnswlib-wasm loaded successfully');
170
+
171
+ // Start batch processor (cache cleanup is handled by LRUCache)
172
+ this.startBatchProcessor();
173
+ } catch (error) {
174
+ console.error('❌ Failed to load hnswlib-wasm:', error);
175
+ throw error;
176
+ }
177
+ }
178
+
179
+ /**
180
+ * Ensure WASM library is loaded
181
+ */
182
+ private async ensureInitialized(): Promise<void> {
183
+ if (this.initPromise) {
184
+ await this.initPromise;
185
+ }
186
+ if (!this.hnswlib) {
187
+ throw new Error('hnswlib-wasm not initialized');
188
+ }
189
+ }
190
+
191
+ /**
192
+ * Create a new HNSW index
193
+ */
194
+ async createIndex(
195
+ userAddress: string,
196
+ options: Partial<HNSWIndexConfig> = {}
197
+ ): Promise<{ index: HierarchicalNSW; serialized: Uint8Array }> {
198
+ await this.ensureInitialized();
199
+
200
+ try {
201
+ const config = { ...this.indexConfig, ...options };
202
+
203
+ console.log(`🔨 Creating new HNSW index for user ${userAddress}`);
204
+ console.log(` Dimensions: ${config.dimension}, M: ${config.m}, efConstruction: ${config.efConstruction}`);
205
+ console.log(` Cache stats: ${this.indexCache.size}/${this.maxCachedIndexes} indexes, ${(this.indexCache.memoryBytes / 1024 / 1024).toFixed(1)}MB`);
206
+
207
+ // Create a new index using WASM (constructor takes: spaceName, numDimensions, autoSaveFilename)
208
+ const index = new this.hnswlib!.HierarchicalNSW(config.spaceType, config.dimension, '');
209
+ index.initIndex(config.maxElements, config.m, config.efConstruction, config.randomSeed);
210
+
211
+ // Create cache entry (LRU cache handles eviction automatically)
212
+ this.indexCache.set(userAddress, {
213
+ index,
214
+ lastModified: new Date(),
215
+ pendingVectors: new Map(),
216
+ isDirty: false,
217
+ version: 1,
218
+ metadata: new Map(),
219
+ dimensions: config.dimension,
220
+ vectors: new Map(),
221
+ });
222
+
223
+ // Serialize the empty index
224
+ const indexName = `index_${userAddress}_${Date.now()}`;
225
+ await index.writeIndex(indexName);
226
+
227
+ // Sync to IndexedDB (persist the index) - syncFS requires a callback
228
+ await this.hnswlib!.EmscriptenFileSystemManager.syncFS(false, () => {});
229
+
230
+ // Read serialized data from filesystem for returning
231
+ const serialized = new Uint8Array(0); // Placeholder - actual data is in IndexedDB
232
+
233
+ console.log(`✅ Index created successfully for ${userAddress}`);
234
+
235
+ return { index, serialized };
236
+ } catch (error) {
237
+ throw this.createVectorError('INDEX_ERROR', `Failed to create index: ${error}`, error);
238
+ }
239
+ }
240
+
241
+ /**
242
+ * Add vector to index with batching (main entry point)
243
+ */
244
+ addVectorToIndexBatched(
245
+ userAddress: string,
246
+ vectorId: number,
247
+ vector: number[],
248
+ metadata?: any
249
+ ): void {
250
+ try {
251
+ // Validate input
252
+ this.validateVector(vector);
253
+
254
+ // Get or create cache entry (LRU cache will evict old entries if needed)
255
+ let cacheEntry = this.indexCache.get(userAddress);
256
+ if (!cacheEntry) {
257
+ console.warn(`No cached index found for user ${userAddress}, will create on first flush`);
258
+ // Create placeholder entry - actual index created on first flush
259
+ cacheEntry = {
260
+ index: null as any, // Will be created on flush
261
+ lastModified: new Date(),
262
+ pendingVectors: new Map(),
263
+ isDirty: true,
264
+ version: 1,
265
+ metadata: new Map(),
266
+ dimensions: vector.length,
267
+ vectors: new Map(),
268
+ };
269
+ this.indexCache.set(userAddress, cacheEntry);
270
+ }
271
+
272
+ // Validate vector dimensions
273
+ if (cacheEntry.dimensions && vector.length !== cacheEntry.dimensions) {
274
+ throw new Error(`Vector dimension mismatch: expected ${cacheEntry.dimensions}, got ${vector.length}`);
275
+ }
276
+
277
+ // Add to pending queue
278
+ cacheEntry.pendingVectors.set(vectorId, vector);
279
+ if (metadata) {
280
+ cacheEntry.metadata.set(vectorId, metadata);
281
+ }
282
+ // Also cache the vector for serialization
283
+ cacheEntry.vectors.set(vectorId, vector);
284
+ cacheEntry.isDirty = true;
285
+ cacheEntry.lastModified = new Date();
286
+
287
+ // Schedule or update batch job
288
+ this.scheduleBatchJob(userAddress, vectorId, vector);
289
+
290
+ console.debug(`📊 Vector ${vectorId} queued for batch processing. Pending: ${cacheEntry.pendingVectors.size}`);
291
+
292
+ // Process immediately if batch size limit reached
293
+ if (cacheEntry.pendingVectors.size >= this.config.maxBatchSize) {
294
+ console.log(`⚡ Batch size limit reached (${this.config.maxBatchSize}), processing immediately`);
295
+ setTimeout(() => this.flushPendingVectors(userAddress), 0);
296
+ }
297
+ } catch (error) {
298
+ throw this.createVectorError('INDEX_ERROR', `Failed to queue vector: ${error}`, error);
299
+ }
300
+ }
301
+
302
+ /**
303
+ * Search vectors in the index (including pending vectors)
304
+ */
305
+ async searchVectors(
306
+ userAddress: string,
307
+ queryVector: number[],
308
+ options: HNSWSearchOptions = {}
309
+ ): Promise<HNSWSearchResult> {
310
+ await this.ensureInitialized();
311
+
312
+ try {
313
+ this.validateVector(queryVector);
314
+
315
+ const cacheEntry = this.indexCache.get(userAddress);
316
+ if (!cacheEntry?.index) {
317
+ throw new Error(`No index found for user ${userAddress}`);
318
+ }
319
+
320
+ const { k = 10, efSearch = 50, filter } = options;
321
+
322
+ // Set search parameters
323
+ cacheEntry.index.setEfSearch(efSearch);
324
+
325
+ let searchIndex = cacheEntry.index;
326
+
327
+ // If there are pending vectors, flush them first
328
+ if (cacheEntry.pendingVectors.size > 0) {
329
+ console.log(`⏳ Flushing ${cacheEntry.pendingVectors.size} pending vectors before search`);
330
+ await this.flushPendingVectors(userAddress);
331
+ // Get updated index
332
+ const updatedEntry = this.indexCache.get(userAddress);
333
+ if (updatedEntry?.index) {
334
+ searchIndex = updatedEntry.index;
335
+ }
336
+ }
337
+
338
+ // Perform search (convert to Float32Array if needed)
339
+ const queryFloat32 = queryVector instanceof Float32Array
340
+ ? queryVector
341
+ : new Float32Array(queryVector);
342
+ const result = searchIndex.searchKnn(
343
+ queryFloat32,
344
+ k,
345
+ filter && typeof filter === 'function' ? (filter as (label: number) => boolean) : undefined
346
+ );
347
+
348
+ // Apply metadata filter if provided (additional filtering)
349
+ let filteredIds = result.neighbors;
350
+ let filteredDistances = result.distances;
351
+
352
+ if (filter && typeof filter === 'function') {
353
+ const filtered = this.applyMetadataFilter(
354
+ result.neighbors,
355
+ result.distances,
356
+ cacheEntry.metadata,
357
+ filter as (metadata: any) => boolean
358
+ );
359
+ filteredIds = filtered.ids;
360
+ filteredDistances = filtered.distances;
361
+ }
362
+
363
+ // Convert distances to similarities (for cosine distance)
364
+ const similarities = this.indexConfig.spaceType === 'cosine'
365
+ ? filteredDistances.map((dist: number) => 1 - dist)
366
+ : filteredDistances.map((dist: number) => 1 / (1 + dist));
367
+
368
+ console.log(`🔍 Search completed: ${filteredIds.length} results found`);
369
+
370
+ return {
371
+ ids: filteredIds,
372
+ distances: filteredDistances,
373
+ similarities
374
+ };
375
+ } catch (error) {
376
+ throw this.createVectorError('SEARCH_ERROR', `Search failed: ${error}`, error);
377
+ }
378
+ }
379
+
380
+ /**
381
+ * Load index from Walrus storage
382
+ */
383
+ async loadIndex(blobId: string, userAddress: string): Promise<HierarchicalNSW> {
384
+ await this.ensureInitialized();
385
+
386
+ try {
387
+ console.log(`📥 Loading HNSW index from Walrus: ${blobId}`);
388
+
389
+ // Download index from Walrus
390
+ const retrieveResult = await this.storageService.retrieve(blobId);
391
+ const indexBuffer = retrieveResult.content;
392
+
393
+ // Save to Emscripten virtual filesystem
394
+ const indexName = `index_${userAddress}_${Date.now()}`;
395
+ (this.hnswlib!.EmscriptenFileSystemManager as any).writeFile(indexName, indexBuffer);
396
+
397
+ // Sync from IndexedDB (load the data into memory)
398
+ await this.hnswlib!.EmscriptenFileSystemManager.syncFS(true, () => {});
399
+
400
+ // Create and load index (constructor: spaceName, numDimensions, autoSaveFilename)
401
+ const index = new this.hnswlib!.HierarchicalNSW(this.indexConfig.spaceType, this.indexConfig.dimension, '');
402
+ await index.readIndex(indexName, this.indexConfig.maxElements);
403
+
404
+ // Cache the loaded index
405
+ this.indexCache.set(userAddress, {
406
+ index,
407
+ lastModified: new Date(),
408
+ pendingVectors: new Map(),
409
+ isDirty: false,
410
+ version: 1,
411
+ metadata: new Map(),
412
+ dimensions: this.indexConfig.dimension,
413
+ vectors: new Map(),
414
+ });
415
+
416
+ console.log(`✅ Index loaded successfully for ${userAddress}`);
417
+ return index;
418
+ } catch (error) {
419
+ throw this.createVectorError('STORAGE_ERROR', `Failed to load index: ${error}`, error);
420
+ }
421
+ }
422
+
423
+ /**
424
+ * Save index to Walrus storage
425
+ */
426
+ async saveIndex(userAddress: string): Promise<string> {
427
+ await this.ensureInitialized();
428
+
429
+ try {
430
+ const cacheEntry = this.indexCache.get(userAddress);
431
+ if (!cacheEntry?.index) {
432
+ throw new Error(`No index found for user ${userAddress}`);
433
+ }
434
+
435
+ console.log(`💾 Saving index to Walrus for ${userAddress}`);
436
+
437
+ return await this.saveIndexToWalrus(cacheEntry.index, userAddress);
438
+ } catch (error) {
439
+ throw this.createVectorError('STORAGE_ERROR', `Failed to save index: ${error}`, error);
440
+ }
441
+ }
442
+
443
+ /**
444
+ * Force flush all pending vectors for a user
445
+ */
446
+ async forceFlush(userAddress: string): Promise<void> {
447
+ await this.flushPendingVectors(userAddress);
448
+ }
449
+
450
+ /**
451
+ * Get cache statistics
452
+ */
453
+ getCacheStats(): BatchStats & {
454
+ memoryUsageMB: number;
455
+ maxMemoryMB: number;
456
+ maxCachedIndexes: number;
457
+ } {
458
+ let totalPendingVectors = 0;
459
+
460
+ for (const [, entry] of this.indexCache.entries()) {
461
+ totalPendingVectors += entry.pendingVectors.size;
462
+ }
463
+
464
+ const lruStats = this.indexCache.getStats();
465
+
466
+ return {
467
+ totalUsers: this.indexCache.size,
468
+ totalPendingVectors,
469
+ activeBatchJobs: this.batchJobs.size,
470
+ cacheHitRate: 0, // TODO: Implement hit rate tracking
471
+ averageBatchSize: totalPendingVectors / Math.max(1, this.indexCache.size),
472
+ averageProcessingTime: 0, // TODO: Implement timing tracking
473
+ memoryUsageMB: lruStats.memoryBytes / 1024 / 1024,
474
+ maxMemoryMB: this.maxMemoryBytes / 1024 / 1024,
475
+ maxCachedIndexes: this.maxCachedIndexes,
476
+ };
477
+ }
478
+
479
+ /**
480
+ * Remove a vector from the index
481
+ */
482
+ removeVector(userAddress: string, vectorId: number): void {
483
+ try {
484
+ const cacheEntry = this.indexCache.get(userAddress);
485
+ if (!cacheEntry?.index) {
486
+ throw new Error(`No index found for user ${userAddress}`);
487
+ }
488
+
489
+ // Remove from pending vectors if exists
490
+ cacheEntry.pendingVectors.delete(vectorId);
491
+ cacheEntry.metadata.delete(vectorId);
492
+
493
+ // Note: hnswlib-wasm doesn't support deletion, mark for rebuild
494
+ cacheEntry.isDirty = true;
495
+ cacheEntry.lastModified = new Date();
496
+
497
+ console.log(`🗑️ Vector ${vectorId} removed from index`);
498
+ } catch (error) {
499
+ throw this.createVectorError('INDEX_ERROR', `Failed to remove vector: ${error}`, error);
500
+ }
501
+ }
502
+
503
+ /**
504
+ * Clear user index and cache
505
+ */
506
+ clearUserIndex(userAddress: string): void {
507
+ this.indexCache.delete(userAddress);
508
+ this.batchJobs.delete(userAddress);
509
+ console.log(`🧹 Cleared index cache for user ${userAddress}`);
510
+ }
511
+
512
+ /**
513
+ * Cleanup resources
514
+ */
515
+ destroy(): void {
516
+ if (this.batchProcessor) {
517
+ clearInterval(this.batchProcessor);
518
+ }
519
+ // LRU cache cleanup is handled internally, but we should destroy it
520
+ this.indexCache.destroy();
521
+ this.batchJobs.clear();
522
+ console.log('🛑 HnswWasmService destroyed');
523
+ }
524
+
525
+ // ==================== PRIVATE METHODS ====================
526
+
527
+ private async createCacheEntry(dimensions: number): Promise<IndexCacheEntry> {
528
+ await this.ensureInitialized();
529
+
530
+ const index = new this.hnswlib!.HierarchicalNSW(this.indexConfig.spaceType, dimensions, '');
531
+ index.initIndex(this.indexConfig.maxElements, this.indexConfig.m, this.indexConfig.efConstruction, this.indexConfig.randomSeed);
532
+
533
+ return {
534
+ index,
535
+ lastModified: new Date(),
536
+ pendingVectors: new Map(),
537
+ isDirty: false,
538
+ version: 1,
539
+ metadata: new Map(),
540
+ dimensions,
541
+ vectors: new Map(),
542
+ };
543
+ }
544
+
545
+ private scheduleBatchJob(userAddress: string, vectorId: number, vector: number[]): void {
546
+ let batchJob = this.batchJobs.get(userAddress);
547
+ if (!batchJob) {
548
+ batchJob = {
549
+ userAddress,
550
+ vectors: new Map(),
551
+ scheduledAt: new Date()
552
+ };
553
+ this.batchJobs.set(userAddress, batchJob);
554
+ }
555
+
556
+ batchJob.vectors.set(vectorId, vector);
557
+ }
558
+
559
+ private startBatchProcessor(): void {
560
+ this.batchProcessor = setInterval(async () => {
561
+ await this.processBatchJobs();
562
+ }, this.config.batchDelayMs);
563
+ }
564
+
565
+ // Note: Cache cleanup is now handled by LRUCache internally
566
+
567
+ private async processBatchJobs(): Promise<void> {
568
+ const now = Date.now();
569
+ const jobsToProcess: string[] = [];
570
+
571
+ for (const [userAddress, job] of this.batchJobs.entries()) {
572
+ const timeSinceScheduled = now - job.scheduledAt.getTime();
573
+ const cacheEntry = this.indexCache.get(userAddress);
574
+
575
+ if (timeSinceScheduled >= this.config.batchDelayMs ||
576
+ (cacheEntry && cacheEntry.pendingVectors.size >= this.config.maxBatchSize)) {
577
+ jobsToProcess.push(userAddress);
578
+ }
579
+ }
580
+
581
+ for (const userAddress of jobsToProcess) {
582
+ try {
583
+ await this.flushPendingVectors(userAddress);
584
+ } catch (error) {
585
+ console.error(`❌ Error processing batch job for user ${userAddress}:`, error);
586
+ }
587
+ }
588
+ }
589
+
590
+ private async flushPendingVectors(userAddress: string): Promise<void> {
591
+ await this.ensureInitialized();
592
+
593
+ const cacheEntry = this.indexCache.get(userAddress);
594
+ if (!cacheEntry || cacheEntry.pendingVectors.size === 0) {
595
+ return;
596
+ }
597
+
598
+ console.log(`⚡ Flushing ${cacheEntry.pendingVectors.size} pending vectors for user ${userAddress}`);
599
+
600
+ try {
601
+ // Create index if it doesn't exist
602
+ if (!cacheEntry.index) {
603
+ const newEntry = await this.createCacheEntry(cacheEntry.dimensions);
604
+ cacheEntry.index = newEntry.index;
605
+ }
606
+
607
+ // Prepare vectors array for batch insertion
608
+ const vectors: number[][] = [];
609
+ const labels: number[] = [];
610
+
611
+ for (const [vectorId, vector] of cacheEntry.pendingVectors.entries()) {
612
+ vectors.push(vector);
613
+ labels.push(vectorId);
614
+ }
615
+
616
+ // Add all pending vectors to the index in batch
617
+ if (vectors.length > 0) {
618
+ // Convert to Float32Array[] as required by hnswlib-wasm
619
+ const float32Vectors = vectors.map(v =>
620
+ v instanceof Float32Array ? v : new Float32Array(v)
621
+ );
622
+ cacheEntry.index.addItems(float32Vectors, true);
623
+ }
624
+
625
+ // Save to Walrus
626
+ await this.saveIndexToWalrus(cacheEntry.index, userAddress);
627
+
628
+ // Clear pending vectors
629
+ cacheEntry.pendingVectors.clear();
630
+ cacheEntry.isDirty = false;
631
+ cacheEntry.lastModified = new Date();
632
+ cacheEntry.version++;
633
+
634
+ // Remove batch job
635
+ this.batchJobs.delete(userAddress);
636
+
637
+ console.log(`✅ Successfully flushed vectors for user ${userAddress} (version ${cacheEntry.version})`);
638
+ } catch (error) {
639
+ console.error(`❌ Error flushing vectors for user ${userAddress}:`, error);
640
+ throw error;
641
+ }
642
+ }
643
+
644
+ private async saveIndexToWalrus(index: HierarchicalNSW, userAddress: string): Promise<string> {
645
+ await this.ensureInitialized();
646
+
647
+ try {
648
+ // Serialize index to Emscripten filesystem
649
+ const indexName = `index_${userAddress}_${Date.now()}`;
650
+ await index.writeIndex(indexName);
651
+
652
+ // Sync to IndexedDB
653
+ await this.hnswlib!.EmscriptenFileSystemManager.syncFS(false, () => {});
654
+
655
+ // Read serialized data from filesystem
656
+ const serialized = (this.hnswlib!.EmscriptenFileSystemManager as any).readFile(indexName) as Uint8Array;
657
+
658
+ // Upload to Walrus via StorageService
659
+ const metadata: MemoryMetadata = {
660
+ contentType: 'application/hnsw-index-wasm',
661
+ contentSize: serialized.byteLength,
662
+ contentHash: '', // TODO: Calculate hash
663
+ category: 'vector-index',
664
+ topic: 'hnsw-wasm',
665
+ importance: 8,
666
+ embeddingDimension: this.indexConfig.dimension,
667
+ createdTimestamp: Date.now(),
668
+ customMetadata: {
669
+ 'user-address': userAddress,
670
+ 'version': '1.0',
671
+ 'wasm': 'true'
672
+ }
673
+ };
674
+
675
+ const result = await this.storageService.upload(serialized, metadata);
676
+
677
+ console.log(`💾 Index saved to Walrus: ${result.blobId}`);
678
+ return result.blobId;
679
+ } catch (error) {
680
+ console.error('❌ Failed to save index to Walrus:', error);
681
+ throw error;
682
+ }
683
+ }
684
+
685
+ private applyMetadataFilter(
686
+ ids: number[],
687
+ distances: number[],
688
+ metadata: Map<number, any>,
689
+ filter: (metadata: any) => boolean
690
+ ): { ids: number[]; distances: number[] } {
691
+ const filteredIds: number[] = [];
692
+ const filteredDistances: number[] = [];
693
+
694
+ for (let i = 0; i < ids.length; i++) {
695
+ const vectorId = ids[i];
696
+ const vectorMetadata = metadata.get(vectorId);
697
+
698
+ if (!vectorMetadata || filter(vectorMetadata)) {
699
+ filteredIds.push(vectorId);
700
+ filteredDistances.push(distances[i]);
701
+ }
702
+ }
703
+
704
+ return { ids: filteredIds, distances: filteredDistances };
705
+ }
706
+
707
+ // Note: cleanupCache is now handled by LRUCache internally
708
+
709
+ private validateVector(vector: number[]): void {
710
+ if (!Array.isArray(vector) || vector.length === 0) {
711
+ throw new Error('Vector must be a non-empty array');
712
+ }
713
+
714
+ if (vector.some(v => typeof v !== 'number' || !isFinite(v))) {
715
+ throw new Error('Vector must contain only finite numbers');
716
+ }
717
+
718
+ if (vector.length !== this.indexConfig.dimension) {
719
+ throw new Error(`Vector dimension mismatch: expected ${this.indexConfig.dimension}, got ${vector.length}`);
720
+ }
721
+ }
722
+
723
+ private createVectorError(code: VectorError['code'], message: string, details?: any): VectorError {
724
+ const error = new Error(message) as VectorError;
725
+ error.code = code;
726
+ error.details = details;
727
+ return error;
728
+ }
729
+ }
730
+
731
+ export default HnswWasmService;