@cmdoss/memwal-sdk 0.6.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. package/ARCHITECTURE.md +547 -547
  2. package/BENCHMARKS.md +238 -238
  3. package/README.md +310 -181
  4. package/dist/ai-sdk/tools.d.ts +2 -2
  5. package/dist/ai-sdk/tools.js +2 -2
  6. package/dist/client/ClientMemoryManager.js +2 -2
  7. package/dist/client/ClientMemoryManager.js.map +1 -1
  8. package/dist/client/PersonalDataWallet.d.ts.map +1 -1
  9. package/dist/client/SimplePDWClient.d.ts +29 -1
  10. package/dist/client/SimplePDWClient.d.ts.map +1 -1
  11. package/dist/client/SimplePDWClient.js +45 -13
  12. package/dist/client/SimplePDWClient.js.map +1 -1
  13. package/dist/client/namespaces/EmbeddingsNamespace.d.ts +1 -1
  14. package/dist/client/namespaces/EmbeddingsNamespace.js +1 -1
  15. package/dist/client/namespaces/MemoryNamespace.d.ts +31 -0
  16. package/dist/client/namespaces/MemoryNamespace.d.ts.map +1 -1
  17. package/dist/client/namespaces/MemoryNamespace.js +272 -39
  18. package/dist/client/namespaces/MemoryNamespace.js.map +1 -1
  19. package/dist/client/namespaces/consolidated/AINamespace.d.ts +2 -2
  20. package/dist/client/namespaces/consolidated/AINamespace.js +2 -2
  21. package/dist/client/namespaces/consolidated/BlockchainNamespace.d.ts +12 -2
  22. package/dist/client/namespaces/consolidated/BlockchainNamespace.d.ts.map +1 -1
  23. package/dist/client/namespaces/consolidated/BlockchainNamespace.js +62 -4
  24. package/dist/client/namespaces/consolidated/BlockchainNamespace.js.map +1 -1
  25. package/dist/client/namespaces/consolidated/StorageNamespace.d.ts +67 -2
  26. package/dist/client/namespaces/consolidated/StorageNamespace.d.ts.map +1 -1
  27. package/dist/client/namespaces/consolidated/StorageNamespace.js +549 -16
  28. package/dist/client/namespaces/consolidated/StorageNamespace.js.map +1 -1
  29. package/dist/config/ConfigurationHelper.js +61 -61
  30. package/dist/config/defaults.js +2 -2
  31. package/dist/config/defaults.js.map +1 -1
  32. package/dist/graph/GraphService.js +21 -21
  33. package/dist/graph/GraphService.js.map +1 -1
  34. package/dist/index.d.ts +3 -1
  35. package/dist/index.d.ts.map +1 -1
  36. package/dist/index.js +3 -1
  37. package/dist/index.js.map +1 -1
  38. package/dist/infrastructure/seal/EncryptionService.d.ts +9 -5
  39. package/dist/infrastructure/seal/EncryptionService.d.ts.map +1 -1
  40. package/dist/infrastructure/seal/EncryptionService.js +37 -15
  41. package/dist/infrastructure/seal/EncryptionService.js.map +1 -1
  42. package/dist/infrastructure/seal/SealService.d.ts +13 -5
  43. package/dist/infrastructure/seal/SealService.d.ts.map +1 -1
  44. package/dist/infrastructure/seal/SealService.js +36 -34
  45. package/dist/infrastructure/seal/SealService.js.map +1 -1
  46. package/dist/langchain/createPDWRAG.js +30 -30
  47. package/dist/retrieval/MemoryDecryptionPipeline.d.ts.map +1 -1
  48. package/dist/retrieval/MemoryDecryptionPipeline.js +2 -1
  49. package/dist/retrieval/MemoryDecryptionPipeline.js.map +1 -1
  50. package/dist/retrieval/MemoryRetrievalService.d.ts +31 -0
  51. package/dist/retrieval/MemoryRetrievalService.d.ts.map +1 -1
  52. package/dist/retrieval/MemoryRetrievalService.js +44 -4
  53. package/dist/retrieval/MemoryRetrievalService.js.map +1 -1
  54. package/dist/services/CapabilityService.d.ts.map +1 -1
  55. package/dist/services/CapabilityService.js +30 -14
  56. package/dist/services/CapabilityService.js.map +1 -1
  57. package/dist/services/CrossContextPermissionService.d.ts.map +1 -1
  58. package/dist/services/CrossContextPermissionService.js +9 -7
  59. package/dist/services/CrossContextPermissionService.js.map +1 -1
  60. package/dist/services/EmbeddingService.d.ts +28 -1
  61. package/dist/services/EmbeddingService.d.ts.map +1 -1
  62. package/dist/services/EmbeddingService.js +54 -0
  63. package/dist/services/EmbeddingService.js.map +1 -1
  64. package/dist/services/EncryptionService.d.ts.map +1 -1
  65. package/dist/services/EncryptionService.js +6 -5
  66. package/dist/services/EncryptionService.js.map +1 -1
  67. package/dist/services/GeminiAIService.js +309 -309
  68. package/dist/services/IndexManager.d.ts +5 -1
  69. package/dist/services/IndexManager.d.ts.map +1 -1
  70. package/dist/services/IndexManager.js +17 -40
  71. package/dist/services/IndexManager.js.map +1 -1
  72. package/dist/services/QueryService.js +1 -1
  73. package/dist/services/QueryService.js.map +1 -1
  74. package/dist/services/StorageService.d.ts +11 -0
  75. package/dist/services/StorageService.d.ts.map +1 -1
  76. package/dist/services/StorageService.js +73 -10
  77. package/dist/services/StorageService.js.map +1 -1
  78. package/dist/services/TransactionService.d.ts +20 -0
  79. package/dist/services/TransactionService.d.ts.map +1 -1
  80. package/dist/services/TransactionService.js +43 -0
  81. package/dist/services/TransactionService.js.map +1 -1
  82. package/dist/services/ViewService.js +2 -2
  83. package/dist/services/ViewService.js.map +1 -1
  84. package/dist/services/storage/QuiltBatchManager.d.ts +101 -1
  85. package/dist/services/storage/QuiltBatchManager.d.ts.map +1 -1
  86. package/dist/services/storage/QuiltBatchManager.js +410 -20
  87. package/dist/services/storage/QuiltBatchManager.js.map +1 -1
  88. package/dist/services/storage/index.d.ts +1 -1
  89. package/dist/services/storage/index.d.ts.map +1 -1
  90. package/dist/services/storage/index.js.map +1 -1
  91. package/dist/utils/LRUCache.d.ts +106 -0
  92. package/dist/utils/LRUCache.d.ts.map +1 -0
  93. package/dist/utils/LRUCache.js +281 -0
  94. package/dist/utils/LRUCache.js.map +1 -0
  95. package/dist/utils/index.d.ts +1 -0
  96. package/dist/utils/index.d.ts.map +1 -1
  97. package/dist/utils/index.js +2 -0
  98. package/dist/utils/index.js.map +1 -1
  99. package/dist/utils/memoryIndexOnChain.d.ts +212 -0
  100. package/dist/utils/memoryIndexOnChain.d.ts.map +1 -0
  101. package/dist/utils/memoryIndexOnChain.js +312 -0
  102. package/dist/utils/memoryIndexOnChain.js.map +1 -0
  103. package/dist/utils/rebuildIndexNode.d.ts +29 -0
  104. package/dist/utils/rebuildIndexNode.d.ts.map +1 -1
  105. package/dist/utils/rebuildIndexNode.js +366 -98
  106. package/dist/utils/rebuildIndexNode.js.map +1 -1
  107. package/dist/vector/HnswWasmService.d.ts +20 -5
  108. package/dist/vector/HnswWasmService.d.ts.map +1 -1
  109. package/dist/vector/HnswWasmService.js +73 -40
  110. package/dist/vector/HnswWasmService.js.map +1 -1
  111. package/dist/vector/IHnswService.d.ts +10 -1
  112. package/dist/vector/IHnswService.d.ts.map +1 -1
  113. package/dist/vector/IHnswService.js.map +1 -1
  114. package/dist/vector/NodeHnswService.d.ts +16 -0
  115. package/dist/vector/NodeHnswService.d.ts.map +1 -1
  116. package/dist/vector/NodeHnswService.js +84 -5
  117. package/dist/vector/NodeHnswService.js.map +1 -1
  118. package/dist/vector/createHnswService.d.ts +1 -1
  119. package/dist/vector/createHnswService.js +1 -1
  120. package/dist/vector/index.d.ts +1 -1
  121. package/dist/vector/index.js +1 -1
  122. package/package.json +157 -157
  123. package/src/access/PermissionService.ts +635 -635
  124. package/src/aggregation/AggregationService.ts +389 -389
  125. package/src/ai-sdk/PDWVectorStore.ts +715 -715
  126. package/src/ai-sdk/index.ts +65 -65
  127. package/src/ai-sdk/tools.ts +460 -460
  128. package/src/ai-sdk/types.ts +404 -404
  129. package/src/batch/BatchManager.ts +597 -597
  130. package/src/batch/BatchingService.ts +429 -429
  131. package/src/batch/MemoryProcessingCache.ts +492 -492
  132. package/src/batch/index.ts +30 -30
  133. package/src/browser.ts +200 -200
  134. package/src/client/ClientMemoryManager.ts +987 -987
  135. package/src/client/PersonalDataWallet.ts +345 -345
  136. package/src/client/SimplePDWClient.ts +1289 -1222
  137. package/src/client/factory.ts +154 -154
  138. package/src/client/namespaces/AnalyticsNamespace.ts +377 -377
  139. package/src/client/namespaces/BatchNamespace.ts +356 -356
  140. package/src/client/namespaces/CacheNamespace.ts +123 -123
  141. package/src/client/namespaces/CapabilityNamespace.ts +217 -217
  142. package/src/client/namespaces/ClassifyNamespace.ts +169 -169
  143. package/src/client/namespaces/ContextNamespace.ts +297 -297
  144. package/src/client/namespaces/EmbeddingsNamespace.ts +99 -99
  145. package/src/client/namespaces/EncryptionNamespace.ts +221 -221
  146. package/src/client/namespaces/GraphNamespace.ts +468 -468
  147. package/src/client/namespaces/IndexNamespace.ts +361 -361
  148. package/src/client/namespaces/MemoryNamespace.ts +1422 -1135
  149. package/src/client/namespaces/PermissionsNamespace.ts +254 -254
  150. package/src/client/namespaces/PipelineNamespace.ts +220 -220
  151. package/src/client/namespaces/SearchNamespace.ts +1049 -1049
  152. package/src/client/namespaces/StorageNamespace.ts +458 -458
  153. package/src/client/namespaces/TxNamespace.ts +260 -260
  154. package/src/client/namespaces/WalletNamespace.ts +243 -243
  155. package/src/client/namespaces/consolidated/AINamespace.ts +449 -449
  156. package/src/client/namespaces/consolidated/BlockchainNamespace.ts +607 -546
  157. package/src/client/namespaces/consolidated/SecurityNamespace.ts +648 -648
  158. package/src/client/namespaces/consolidated/StorageNamespace.ts +1141 -497
  159. package/src/client/namespaces/consolidated/index.ts +39 -39
  160. package/src/client/signers/KeypairSigner.ts +108 -108
  161. package/src/client/signers/UnifiedSigner.ts +110 -110
  162. package/src/client/signers/WalletAdapterSigner.ts +159 -159
  163. package/src/client/signers/index.ts +26 -26
  164. package/src/config/ConfigurationHelper.ts +412 -412
  165. package/src/config/defaults.ts +51 -51
  166. package/src/config/index.ts +8 -8
  167. package/src/config/validation.ts +70 -70
  168. package/src/core/index.ts +14 -14
  169. package/src/core/interfaces/IService.ts +307 -307
  170. package/src/core/interfaces/index.ts +8 -8
  171. package/src/core/types/capability.ts +297 -297
  172. package/src/core/types/index.ts +870 -870
  173. package/src/core/types/wallet.ts +270 -270
  174. package/src/core/types.ts +9 -9
  175. package/src/core/wallet.ts +222 -222
  176. package/src/embedding/index.ts +19 -19
  177. package/src/embedding/types.ts +357 -357
  178. package/src/errors/index.ts +602 -602
  179. package/src/errors/recovery.ts +461 -461
  180. package/src/errors/validation.ts +567 -567
  181. package/src/generated/pdw/capability.ts +319 -319
  182. package/src/graph/GraphService.ts +887 -887
  183. package/src/graph/KnowledgeGraphManager.ts +728 -728
  184. package/src/graph/index.ts +25 -25
  185. package/src/index.ts +498 -474
  186. package/src/infrastructure/index.ts +22 -22
  187. package/src/infrastructure/seal/EncryptionService.ts +628 -603
  188. package/src/infrastructure/seal/SealService.ts +613 -615
  189. package/src/infrastructure/seal/index.ts +9 -9
  190. package/src/infrastructure/sui/BlockchainManager.ts +627 -627
  191. package/src/infrastructure/sui/SuiService.ts +888 -888
  192. package/src/infrastructure/sui/index.ts +9 -9
  193. package/src/infrastructure/walrus/StorageManager.ts +604 -604
  194. package/src/infrastructure/walrus/WalrusStorageService.ts +612 -612
  195. package/src/infrastructure/walrus/index.ts +9 -9
  196. package/src/langchain/PDWEmbeddings.ts +145 -145
  197. package/src/langchain/PDWVectorStore.ts +456 -456
  198. package/src/langchain/createPDWRAG.ts +303 -303
  199. package/src/langchain/index.ts +47 -47
  200. package/src/permissions/ConsentRepository.browser.ts +249 -249
  201. package/src/permissions/ConsentRepository.ts +364 -364
  202. package/src/pipeline/MemoryPipeline.ts +862 -862
  203. package/src/pipeline/PipelineManager.ts +683 -683
  204. package/src/pipeline/index.ts +26 -26
  205. package/src/retrieval/AdvancedSearchService.ts +629 -629
  206. package/src/retrieval/MemoryAnalyticsService.ts +711 -711
  207. package/src/retrieval/MemoryDecryptionPipeline.ts +825 -824
  208. package/src/retrieval/MemoryRetrievalService.ts +904 -830
  209. package/src/retrieval/index.ts +42 -42
  210. package/src/services/BatchService.ts +352 -352
  211. package/src/services/CapabilityService.ts +464 -448
  212. package/src/services/ClassifierService.ts +465 -465
  213. package/src/services/CrossContextPermissionService.ts +486 -484
  214. package/src/services/EmbeddingService.ts +771 -706
  215. package/src/services/EncryptionService.ts +712 -711
  216. package/src/services/GeminiAIService.ts +753 -753
  217. package/src/services/IndexManager.ts +977 -1004
  218. package/src/services/MemoryIndexService.ts +1003 -1003
  219. package/src/services/MemoryService.ts +369 -369
  220. package/src/services/QueryService.ts +890 -890
  221. package/src/services/StorageService.ts +1182 -1111
  222. package/src/services/TransactionService.ts +838 -790
  223. package/src/services/VectorService.ts +462 -462
  224. package/src/services/ViewService.ts +484 -484
  225. package/src/services/index.ts +25 -25
  226. package/src/services/storage/BlobAttributesManager.ts +333 -333
  227. package/src/services/storage/KnowledgeGraphManager.ts +425 -425
  228. package/src/services/storage/MemorySearchManager.ts +387 -387
  229. package/src/services/storage/QuiltBatchManager.ts +1130 -660
  230. package/src/services/storage/WalrusMetadataManager.ts +268 -268
  231. package/src/services/storage/WalrusStorageManager.ts +287 -287
  232. package/src/services/storage/index.ts +57 -52
  233. package/src/types/index.ts +13 -13
  234. package/src/utils/LRUCache.ts +378 -0
  235. package/src/utils/index.ts +76 -68
  236. package/src/utils/memoryIndexOnChain.ts +507 -0
  237. package/src/utils/rebuildIndex.ts +290 -290
  238. package/src/utils/rebuildIndexNode.ts +771 -424
  239. package/src/vector/BrowserHnswIndexService.ts +758 -758
  240. package/src/vector/HnswWasmService.ts +731 -679
  241. package/src/vector/IHnswService.ts +233 -224
  242. package/src/vector/NodeHnswService.ts +833 -735
  243. package/src/vector/VectorManager.ts +478 -478
  244. package/src/vector/createHnswService.ts +135 -135
  245. package/src/vector/index.ts +56 -56
  246. package/src/wallet/ContextWalletService.ts +656 -656
  247. package/src/wallet/MainWalletService.ts +317 -317
@@ -1,424 +1,771 @@
1
- /**
2
- * Rebuild HNSW Index from Blockchain + Walrus (Node.js)
3
- *
4
- * This utility fetches all existing memories from the Sui blockchain,
5
- * downloads embeddings from Walrus using the Walrus SDK, and rebuilds the local HNSW index.
6
- *
7
- * Use this when:
8
- * 1. User logs in on a new device
9
- * 2. Local index file was deleted/corrupted
10
- * 3. Need to sync with latest on-chain state
11
- *
12
- * @example
13
- * ```typescript
14
- * import { rebuildIndexNode } from 'personal-data-wallet-sdk';
15
- * import { getFullnodeUrl, SuiClient } from '@mysten/sui/client';
16
- *
17
- * const client = new SuiClient({ url: getFullnodeUrl('testnet') });
18
- *
19
- * await rebuildIndexNode({
20
- * userAddress: '0x...',
21
- * client,
22
- * packageId: process.env.PACKAGE_ID!,
23
- * network: 'testnet',
24
- * onProgress: (current, total, status) => console.log(`${current}/${total}: ${status}`)
25
- * });
26
- * ```
27
- */
28
-
29
- import type { SuiClient } from '@mysten/sui/client';
30
- import { WalrusClient, WalrusFile } from '@mysten/walrus';
31
-
32
- export interface RebuildIndexNodeOptions {
33
- /** User's blockchain address */
34
- userAddress: string;
35
-
36
- /** Sui client instance */
37
- client: SuiClient;
38
-
39
- /** Package ID for the PDW smart contract */
40
- packageId: string;
41
-
42
- /** Walrus network (testnet or mainnet) */
43
- network?: 'testnet' | 'mainnet';
44
-
45
- /** @deprecated Use network instead. Walrus aggregator URL (fallback) */
46
- walrusAggregator?: string;
47
-
48
- /** Index directory (default: .pdw-indexes) */
49
- indexDirectory?: string;
50
-
51
- /** Progress callback */
52
- onProgress?: (current: number, total: number, status: string) => void;
53
-
54
- /** Whether to force re-index even if index exists */
55
- force?: boolean;
56
- }
57
-
58
- export interface RebuildIndexNodeResult {
59
- success: boolean;
60
- totalMemories: number;
61
- indexedMemories: number;
62
- failedMemories: number;
63
- errors: Array<{ blobId: string; error: string }>;
64
- duration: number;
65
- }
66
-
67
- interface MemoryContent {
68
- content: string;
69
- embedding: number[];
70
- metadata: {
71
- category: string;
72
- importance: number;
73
- topic: string;
74
- };
75
- timestamp: number;
76
- }
77
-
78
- /**
79
- * Rebuild HNSW index from blockchain + Walrus (Node.js)
80
- */
81
- export async function rebuildIndexNode(options: RebuildIndexNodeOptions): Promise<RebuildIndexNodeResult> {
82
- const {
83
- userAddress,
84
- client,
85
- packageId,
86
- network = (process.env.WALRUS_NETWORK as 'testnet' | 'mainnet') || 'testnet',
87
- walrusAggregator,
88
- indexDirectory = './.pdw-indexes',
89
- onProgress,
90
- force = false
91
- } = options;
92
-
93
- const startTime = Date.now();
94
- const errors: Array<{ blobId: string; error: string }> = [];
95
-
96
- console.log('[rebuildIndexNode] Starting index rebuild...');
97
- onProgress?.(0, 0, 'Initializing...');
98
-
99
- try {
100
- // Dynamic imports for Node.js modules
101
- const { NodeHnswService } = await import('../vector/NodeHnswService');
102
- const fs = await import('fs/promises');
103
-
104
- // Initialize Walrus client
105
- const walrusClient = client.$extend(
106
- WalrusClient.experimental_asClientExtension({
107
- network,
108
- storageNodeClientOptions: {
109
- timeout: 60_000,
110
- },
111
- })
112
- );
113
-
114
- // Initialize HNSW service
115
- const hnswService = new NodeHnswService({
116
- indexDirectory,
117
- indexConfig: {
118
- dimension: 3072,
119
- maxElements: 10000,
120
- m: 16,
121
- efConstruction: 200
122
- }
123
- });
124
-
125
- await hnswService.initialize();
126
-
127
- // Check if index exists
128
- const indexPath = `${indexDirectory}/${userAddress.replace(/[^a-zA-Z0-9]/g, '_')}.hnsw`;
129
- let indexExists = false;
130
- try {
131
- await fs.access(indexPath);
132
- indexExists = true;
133
- } catch {
134
- // Index doesn't exist
135
- }
136
-
137
- if (indexExists && !force) {
138
- console.log('[rebuildIndexNode] Index already exists. Use force=true to rebuild.');
139
- return {
140
- success: false,
141
- totalMemories: 0,
142
- indexedMemories: 0,
143
- failedMemories: 0,
144
- errors: [{ blobId: '', error: 'Index already exists. Use force=true to rebuild.' }],
145
- duration: Date.now() - startTime
146
- };
147
- }
148
-
149
- if (indexExists && force) {
150
- await hnswService.deleteIndex(userAddress);
151
- console.log('[rebuildIndexNode] Deleted existing index for rebuild');
152
- }
153
-
154
- // Fetch all memories from blockchain
155
- console.log('[rebuildIndexNode] Fetching memories from blockchain...');
156
- onProgress?.(0, 0, 'Fetching memories from blockchain...');
157
-
158
- const memories: Array<{
159
- id: string;
160
- blobId: string;
161
- vectorId: number;
162
- category: string;
163
- importance: number;
164
- }> = [];
165
-
166
- let cursor: string | null | undefined = undefined;
167
- let hasMore = true;
168
-
169
- while (hasMore) {
170
- const response = await client.getOwnedObjects({
171
- owner: userAddress,
172
- filter: {
173
- StructType: `${packageId}::memory::Memory`,
174
- },
175
- options: {
176
- showContent: true,
177
- showType: true,
178
- },
179
- cursor,
180
- limit: 50
181
- });
182
-
183
- for (const obj of response.data) {
184
- if (obj.data?.content && 'fields' in obj.data.content) {
185
- const fields = obj.data.content.fields as any;
186
- memories.push({
187
- id: obj.data.objectId,
188
- blobId: fields.blob_id || '',
189
- vectorId: parseInt(fields.vector_id || '0'),
190
- category: fields.category || 'general',
191
- importance: parseInt(fields.importance || '5')
192
- });
193
- }
194
- }
195
-
196
- cursor = response.nextCursor;
197
- hasMore = response.hasNextPage;
198
- }
199
-
200
- const totalMemories = memories.length;
201
- console.log(`[rebuildIndexNode] Found ${totalMemories} memories on-chain`);
202
-
203
- if (totalMemories === 0) {
204
- console.log('[rebuildIndexNode] No memories to index');
205
- return {
206
- success: true,
207
- totalMemories: 0,
208
- indexedMemories: 0,
209
- failedMemories: 0,
210
- errors: [],
211
- duration: Date.now() - startTime
212
- };
213
- }
214
-
215
- // Process memories grouped by blobId (for Quilt support)
216
- // In a Quilt, multiple memories share the same blobId
217
- const memoriesByBlobId = new Map<string, typeof memories>();
218
- for (const memory of memories) {
219
- const list = memoriesByBlobId.get(memory.blobId) || [];
220
- list.push(memory);
221
- memoriesByBlobId.set(memory.blobId, list);
222
- }
223
-
224
- console.log(`[rebuildIndexNode] Unique blobIds: ${memoriesByBlobId.size} (${memoriesByBlobId.size < totalMemories ? 'Quilt detected' : 'individual blobs'})`);
225
-
226
- let indexedCount = 0;
227
- let failedCount = 0;
228
- let processedCount = 0;
229
-
230
- // Cache for Quilt files to avoid re-fetching
231
- const quiltFileCache = new Map<string, WalrusFile[]>();
232
-
233
- for (const [blobId, memoriesInBlob] of memoriesByBlobId) {
234
- console.log(`[rebuildIndexNode] Processing blobId ${blobId.substring(0, 20)}... (${memoriesInBlob.length} memories)`);
235
-
236
- try {
237
- // Use getBlob().files() to correctly parse Quilt structure
238
- // For regular blob: returns [singleFile]
239
- // For Quilt: returns [file1, file2, ...] - all files in the quilt
240
- let files: WalrusFile[];
241
-
242
- if (quiltFileCache.has(blobId)) {
243
- files = quiltFileCache.get(blobId)!;
244
- console.log(`[rebuildIndexNode] ♻️ Using cached files (${files.length} files)`);
245
- } else {
246
- const blob = await walrusClient.walrus.getBlob({ blobId });
247
- files = await blob.files();
248
- quiltFileCache.set(blobId, files);
249
- console.log(`[rebuildIndexNode] 📥 Fetched ${files.length} file(s) from Walrus`);
250
- }
251
-
252
- // For each memory in this blobId
253
- for (let i = 0; i < memoriesInBlob.length; i++) {
254
- const memory = memoriesInBlob[i];
255
- processedCount++;
256
- const progress = `Memory ${processedCount}/${totalMemories}`;
257
-
258
- console.log(`[rebuildIndexNode] Processing ${progress}: vectorId=${memory.vectorId}`);
259
- onProgress?.(processedCount, totalMemories, `Processing ${progress}...`);
260
-
261
- try {
262
- // Determine which file to use
263
- // For Quilt: match by index
264
- // For single blob: use the only file
265
- const fileIndex = files.length === 1 ? 0 : Math.min(i, files.length - 1);
266
- const file = files[fileIndex];
267
-
268
- if (!file) {
269
- throw new Error(`No file found at index ${fileIndex}`);
270
- }
271
-
272
- // Get file content
273
- const rawBytes = await file.bytes();
274
- const rawText = new TextDecoder().decode(rawBytes);
275
- const trimmedText = rawText.trim();
276
-
277
- // Get file identifier and tags if available (for Quilts)
278
- const identifier = await file.getIdentifier();
279
- const tags = await file.getTags();
280
-
281
- if (identifier) {
282
- console.log(`[rebuildIndexNode] 📎 File identifier: ${identifier}`);
283
- }
284
-
285
- let content: string;
286
- let embedding: number[];
287
- let metadata: { category?: string; importance?: number; topic?: string } = {};
288
- let timestamp = Date.now();
289
-
290
- if (trimmedText.startsWith('{') && trimmedText.endsWith('}')) {
291
- // JSON package format (correct format)
292
- try {
293
- const memoryData: MemoryContent = JSON.parse(trimmedText);
294
- content = memoryData.content;
295
- embedding = memoryData.embedding;
296
- metadata = memoryData.metadata || {};
297
- timestamp = memoryData.timestamp || Date.now();
298
-
299
- if (!embedding || embedding.length !== 3072) {
300
- throw new Error(`Invalid embedding in JSON: length=${embedding?.length || 0}`);
301
- }
302
-
303
- console.log(`[rebuildIndexNode] 📦 Format: JSON package`);
304
- } catch (jsonError) {
305
- throw new Error(`Invalid JSON structure: ${(jsonError as Error).message}`);
306
- }
307
- } else if (trimmedText.length > 0 && !trimmedText.includes('\x00') && trimmedText.length < 10000) {
308
- // Plain text format - cannot index without embedding
309
- throw new Error('Plain text format detected but no embedding available - skip');
310
- } else {
311
- throw new Error('Binary, encrypted, or empty content - cannot index');
312
- }
313
-
314
- // Add to HNSW index
315
- await hnswService.addVector(
316
- userAddress,
317
- memory.vectorId,
318
- embedding,
319
- {
320
- blobId: memory.blobId,
321
- memoryObjectId: memory.id,
322
- category: metadata.category || memory.category || tags?.['category'],
323
- importance: metadata.importance || memory.importance || parseInt(tags?.['importance'] || '5'),
324
- topic: metadata.topic || tags?.['topic'] || '',
325
- timestamp,
326
- content,
327
- isEncrypted: false
328
- }
329
- );
330
-
331
- indexedCount++;
332
- console.log(`[rebuildIndexNode] ✓ Indexed: "${content.substring(0, 30)}..."`);
333
-
334
- } catch (error: any) {
335
- failedCount++;
336
- const errorMsg = error.message || String(error);
337
- errors.push({ blobId: memory.blobId, error: errorMsg });
338
- console.error(`[rebuildIndexNode] ✗ Failed: ${errorMsg}`);
339
- }
340
- }
341
-
342
- } catch (error: any) {
343
- // Failed to fetch files for this blobId
344
- const errorMsg = error.message || String(error);
345
- console.error(`[rebuildIndexNode] ✗ Failed to fetch blobId: ${errorMsg}`);
346
-
347
- for (const memory of memoriesInBlob) {
348
- processedCount++;
349
- failedCount++;
350
- errors.push({ blobId: memory.blobId, error: `Failed to fetch blob: ${errorMsg}` });
351
- }
352
- }
353
- }
354
-
355
- // Force save index
356
- console.log('[rebuildIndexNode] Saving index to disk...');
357
- onProgress?.(totalMemories, totalMemories, 'Saving index...');
358
- await hnswService.flushBatch(userAddress);
359
-
360
- const duration = Date.now() - startTime;
361
- console.log('[rebuildIndexNode] Index rebuild complete!');
362
- console.log(`[rebuildIndexNode] Total: ${totalMemories}, Indexed: ${indexedCount}, Failed: ${failedCount}`);
363
- console.log(`[rebuildIndexNode] Duration: ${(duration / 1000).toFixed(2)}s`);
364
-
365
- return {
366
- success: true,
367
- totalMemories,
368
- indexedMemories: indexedCount,
369
- failedMemories: failedCount,
370
- errors,
371
- duration
372
- };
373
-
374
- } catch (error: any) {
375
- console.error('[rebuildIndexNode] Index rebuild failed:', error);
376
- return {
377
- success: false,
378
- totalMemories: 0,
379
- indexedMemories: 0,
380
- failedMemories: 0,
381
- errors: [{ blobId: '', error: error.message || String(error) }],
382
- duration: Date.now() - startTime
383
- };
384
- }
385
- }
386
-
387
- /**
388
- * Check if index exists for a user (Node.js)
389
- */
390
- export async function hasExistingIndexNode(
391
- userAddress: string,
392
- indexDirectory = './.pdw-indexes'
393
- ): Promise<boolean> {
394
- try {
395
- const fs = await import('fs/promises');
396
- const indexPath = `${indexDirectory}/${userAddress.replace(/[^a-zA-Z0-9]/g, '_')}.hnsw`;
397
- await fs.access(indexPath);
398
- return true;
399
- } catch {
400
- return false;
401
- }
402
- }
403
-
404
- /**
405
- * Clear index for a user (Node.js)
406
- */
407
- export async function clearIndexNode(
408
- userAddress: string,
409
- indexDirectory = './.pdw-indexes'
410
- ): Promise<void> {
411
- try {
412
- const fs = await import('fs/promises');
413
- const safeAddress = userAddress.replace(/[^a-zA-Z0-9]/g, '_');
414
- const indexPath = `${indexDirectory}/${safeAddress}.hnsw`;
415
- const metaPath = `${indexDirectory}/${safeAddress}.hnsw.meta.json`;
416
-
417
- await fs.unlink(indexPath).catch(() => {});
418
- await fs.unlink(metaPath).catch(() => {});
419
-
420
- console.log(`[clearIndexNode] Cleared index for user ${userAddress}`);
421
- } catch (error) {
422
- console.warn('[clearIndexNode] Error clearing index:', error);
423
- }
424
- }
1
+ /**
2
+ * Rebuild HNSW Index from Blockchain + Walrus (Node.js)
3
+ *
4
+ * This utility fetches all existing memories from the Sui blockchain,
5
+ * downloads embeddings from Walrus using the Walrus SDK, and rebuilds the local HNSW index.
6
+ *
7
+ * Use this when:
8
+ * 1. User logs in on a new device
9
+ * 2. Local index file was deleted/corrupted
10
+ * 3. Need to sync with latest on-chain state
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * import { rebuildIndexNode } from 'personal-data-wallet-sdk';
15
+ * import { getFullnodeUrl, SuiClient } from '@mysten/sui/client';
16
+ *
17
+ * const client = new SuiClient({ url: getFullnodeUrl('testnet') });
18
+ *
19
+ * await rebuildIndexNode({
20
+ * userAddress: '0x...',
21
+ * client,
22
+ * packageId: process.env.PACKAGE_ID!,
23
+ * network: 'testnet',
24
+ * onProgress: (current, total, status) => console.log(`${current}/${total}: ${status}`)
25
+ * });
26
+ * ```
27
+ */
28
+
29
+ import type { SuiClient } from '@mysten/sui/client';
30
+ import { WalrusClient, WalrusFile } from '@mysten/walrus';
31
+
32
+ export interface RebuildIndexNodeOptions {
33
+ /** User's blockchain address */
34
+ userAddress: string;
35
+
36
+ /** Sui client instance */
37
+ client: SuiClient;
38
+
39
+ /** Package ID for the PDW smart contract */
40
+ packageId: string;
41
+
42
+ /** Walrus network (testnet or mainnet) */
43
+ network?: 'testnet' | 'mainnet';
44
+
45
+ /** @deprecated Use network instead. Walrus aggregator URL (fallback) */
46
+ walrusAggregator?: string;
47
+
48
+ /** Index directory (default: .pdw-indexes) */
49
+ indexDirectory?: string;
50
+
51
+ /** Progress callback */
52
+ onProgress?: (current: number, total: number, status: string) => void;
53
+
54
+ /** Whether to force re-index even if index exists */
55
+ force?: boolean;
56
+
57
+ /**
58
+ * Quilt IDs to include in the rebuild.
59
+ * Quilts contain batch-uploaded memories that may not have on-chain Memory objects.
60
+ * Pass Quilt IDs here to include them in the index rebuild.
61
+ */
62
+ quiltIds?: string[];
63
+
64
+ /**
65
+ * Number of concurrent blob fetches (default: 10)
66
+ * Higher values can speed up rebuilding but may overwhelm the server
67
+ * Benchmark results: 10 is ~1.64x faster than sequential
68
+ */
69
+ fetchConcurrency?: number;
70
+ }
71
+
72
+ export interface RebuildIndexNodeResult {
73
+ success: boolean;
74
+ totalMemories: number;
75
+ indexedMemories: number;
76
+ failedMemories: number;
77
+ errors: Array<{ blobId: string; error: string }>;
78
+ duration: number;
79
+ /** Detailed timing breakdown for performance analysis */
80
+ timing?: {
81
+ /** Time to initialize services (ms) */
82
+ initMs: number;
83
+ /** Time to fetch blockchain data (ms) */
84
+ blockchainFetchMs: number;
85
+ /** Time to fetch all blobs from Walrus (ms) */
86
+ walrusFetchMs: number;
87
+ /** Time to process memories and build index (ms) */
88
+ processingMs: number;
89
+ /** Time to save index to disk (ms) */
90
+ saveMs: number;
91
+ /** Total blobs fetched */
92
+ blobsFetched: number;
93
+ /** Total content bytes downloaded */
94
+ totalBytesDownloaded: number;
95
+ };
96
+ }
97
+
98
+ interface MemoryContent {
99
+ content: string;
100
+ embedding: number[];
101
+ metadata: {
102
+ category: string;
103
+ importance: number;
104
+ topic: string;
105
+ memoryId?: string;
106
+ };
107
+ timestamp: number;
108
+ }
109
+
110
+ /**
111
+ * Find a matching file in a Quilt using multiple strategies
112
+ * Mirrors the logic in SDK's QuiltBatchManager.findMemoryInQuilt()
113
+ *
114
+ * Strategies (in order):
115
+ * 1. Match by tags['memory_id'] === vectorId
116
+ * 2. Match by identifier === `memory-${vectorId}.json`
117
+ * 3. Match by JSON metadata.memoryId === vectorId
118
+ * 4. Fallback to index-based matching
119
+ */
120
+ async function findMatchingFile(
121
+ files: WalrusFile[],
122
+ vectorId: number,
123
+ fallbackIndex: number
124
+ ): Promise<{ file: WalrusFile | undefined; matchStrategy: string }> {
125
+ let matchedFile: WalrusFile | undefined;
126
+ let matchStrategy = '';
127
+
128
+ // Strategy 1: Match by tags['memory_id']
129
+ for (const f of files) {
130
+ const tags = await f.getTags();
131
+ if (tags?.['memory_id'] === String(vectorId)) {
132
+ matchedFile = f;
133
+ const identifier = await f.getIdentifier();
134
+ matchStrategy = `memory_id tag: ${tags['memory_id']} (${identifier})`;
135
+ break;
136
+ }
137
+ }
138
+
139
+ // Strategy 2: Match by identifier pattern "memory-{vectorId}.json"
140
+ if (!matchedFile) {
141
+ for (const f of files) {
142
+ const identifier = await f.getIdentifier();
143
+ if (identifier === `memory-${vectorId}.json`) {
144
+ matchedFile = f;
145
+ matchStrategy = `identifier: ${identifier}`;
146
+ break;
147
+ }
148
+ }
149
+ }
150
+
151
+ // Strategy 3: Parse JSON to find matching metadata.memoryId
152
+ if (!matchedFile) {
153
+ for (const f of files) {
154
+ try {
155
+ const json = await f.json() as MemoryContent;
156
+ if (json?.metadata?.memoryId === String(vectorId)) {
157
+ matchedFile = f;
158
+ const identifier = await f.getIdentifier();
159
+ matchStrategy = `JSON metadata.memoryId: ${json.metadata.memoryId} (${identifier})`;
160
+ break;
161
+ }
162
+ } catch {
163
+ // Not valid JSON, continue
164
+ }
165
+ }
166
+ }
167
+
168
+ // Strategy 4: Fallback to index-based matching
169
+ if (!matchedFile && fallbackIndex < files.length) {
170
+ matchedFile = files[fallbackIndex];
171
+ const identifier = await matchedFile.getIdentifier();
172
+ matchStrategy = `index fallback (${fallbackIndex}): ${identifier || 'no identifier'}`;
173
+ }
174
+
175
+ return { file: matchedFile, matchStrategy };
176
+ }
177
+
178
+ /**
179
+ * Rebuild HNSW index from blockchain + Walrus (Node.js)
180
+ */
181
+ export async function rebuildIndexNode(options: RebuildIndexNodeOptions): Promise<RebuildIndexNodeResult> {
182
+ const {
183
+ userAddress,
184
+ client,
185
+ packageId,
186
+ network = (process.env.WALRUS_NETWORK as 'testnet' | 'mainnet') || 'testnet',
187
+ walrusAggregator,
188
+ indexDirectory = './.pdw-indexes',
189
+ onProgress,
190
+ force = false,
191
+ quiltIds = [],
192
+ fetchConcurrency = 10
193
+ } = options;
194
+
195
+ const startTime = Date.now();
196
+ const errors: Array<{ blobId: string; error: string }> = [];
197
+
198
+ // Detailed timing
199
+ const timing = {
200
+ initMs: 0,
201
+ blockchainFetchMs: 0,
202
+ walrusFetchMs: 0,
203
+ processingMs: 0,
204
+ saveMs: 0,
205
+ blobsFetched: 0,
206
+ totalBytesDownloaded: 0,
207
+ };
208
+
209
+ console.log('[rebuildIndexNode] Starting index rebuild...');
210
+ onProgress?.(0, 0, 'Initializing...');
211
+
212
+ try {
213
+ // Dynamic imports for Node.js modules
214
+ const { NodeHnswService } = await import('../vector/NodeHnswService');
215
+ const fs = await import('fs/promises');
216
+
217
+ // Initialize Walrus client
218
+ const walrusClient = client.$extend(
219
+ WalrusClient.experimental_asClientExtension({
220
+ network,
221
+ storageNodeClientOptions: {
222
+ timeout: 60_000,
223
+ },
224
+ })
225
+ );
226
+
227
+ // Initialize HNSW service
228
+ const hnswService = new NodeHnswService({
229
+ indexDirectory,
230
+ indexConfig: {
231
+ dimension: 3072,
232
+ maxElements: 10000,
233
+ m: 16,
234
+ efConstruction: 200
235
+ }
236
+ });
237
+
238
+ await hnswService.initialize();
239
+ timing.initMs = Date.now() - startTime;
240
+ console.log(`[rebuildIndexNode] ⏱️ Init: ${timing.initMs}ms`);
241
+
242
+ // Check if index exists
243
+ const indexPath = `${indexDirectory}/${userAddress.replace(/[^a-zA-Z0-9]/g, '_')}.hnsw`;
244
+ let indexExists = false;
245
+ try {
246
+ await fs.access(indexPath);
247
+ indexExists = true;
248
+ } catch {
249
+ // Index doesn't exist
250
+ }
251
+
252
+ if (indexExists && !force) {
253
+ console.log('[rebuildIndexNode] Index already exists. Use force=true to rebuild.');
254
+ return {
255
+ success: false,
256
+ totalMemories: 0,
257
+ indexedMemories: 0,
258
+ failedMemories: 0,
259
+ errors: [{ blobId: '', error: 'Index already exists. Use force=true to rebuild.' }],
260
+ duration: Date.now() - startTime
261
+ };
262
+ }
263
+
264
+ if (indexExists && force) {
265
+ await hnswService.deleteIndex(userAddress);
266
+ console.log('[rebuildIndexNode] Deleted existing index for rebuild');
267
+ }
268
+
269
+ // Fetch all memories from blockchain
270
+ const blockchainFetchStart = Date.now();
271
+ console.log('[rebuildIndexNode] Fetching memories from blockchain...');
272
+ onProgress?.(0, 0, 'Fetching memories from blockchain...');
273
+
274
+ const memories: Array<{
275
+ id: string;
276
+ blobId: string;
277
+ vectorId: number;
278
+ category: string;
279
+ importance: number;
280
+ }> = [];
281
+
282
+ let cursor: string | null | undefined = undefined;
283
+ let hasMore = true;
284
+
285
+ while (hasMore) {
286
+ const response = await client.getOwnedObjects({
287
+ owner: userAddress,
288
+ filter: {
289
+ StructType: `${packageId}::memory::Memory`,
290
+ },
291
+ options: {
292
+ showContent: true,
293
+ showType: true,
294
+ },
295
+ cursor,
296
+ limit: 50
297
+ });
298
+
299
+ for (const obj of response.data) {
300
+ if (obj.data?.content && 'fields' in obj.data.content) {
301
+ const fields = obj.data.content.fields as any;
302
+ memories.push({
303
+ id: obj.data.objectId,
304
+ blobId: fields.blob_id || '',
305
+ vectorId: parseInt(fields.vector_id || '0'),
306
+ category: fields.category || 'general',
307
+ importance: parseInt(fields.importance || '5')
308
+ });
309
+ }
310
+ }
311
+
312
+ cursor = response.nextCursor;
313
+ hasMore = response.hasNextPage;
314
+ }
315
+
316
+ const totalMemories = memories.length;
317
+ console.log(`[rebuildIndexNode] Found ${totalMemories} memories on-chain`);
318
+
319
+ if (totalMemories === 0) {
320
+ console.log('[rebuildIndexNode] No memories to index');
321
+ return {
322
+ success: true,
323
+ totalMemories: 0,
324
+ indexedMemories: 0,
325
+ failedMemories: 0,
326
+ errors: [],
327
+ duration: Date.now() - startTime
328
+ };
329
+ }
330
+
331
+ // Process memories grouped by blobId (for Quilt support)
332
+ // In a Quilt, multiple memories share the same blobId
333
+ const memoriesByBlobId = new Map<string, typeof memories>();
334
+ for (const memory of memories) {
335
+ const list = memoriesByBlobId.get(memory.blobId) || [];
336
+ list.push(memory);
337
+ memoriesByBlobId.set(memory.blobId, list);
338
+ }
339
+
340
+ console.log(`[rebuildIndexNode] Unique blobIds: ${memoriesByBlobId.size} (${memoriesByBlobId.size < totalMemories ? 'Quilt detected' : 'individual blobs'})`);
341
+ timing.blockchainFetchMs = Date.now() - blockchainFetchStart;
342
+ console.log(`[rebuildIndexNode] ⏱️ Blockchain fetch: ${timing.blockchainFetchMs}ms`);
343
+
344
+ let indexedCount = 0;
345
+ let failedCount = 0;
346
+ let processedCount = 0;
347
+
348
+ // ==================== PARALLEL BLOB FETCHING + CONTENT ====================
349
+ // Step 1: Check blob types (Quilt vs regular) in parallel
350
+ // Step 2: Fetch content in parallel (patches for Quilt, bytes for regular)
351
+ const blobIds = Array.from(memoriesByBlobId.keys());
352
+
353
+ console.log(`[rebuildIndexNode] Fetching ${blobIds.length} blobs (concurrency: ${fetchConcurrency})...`);
354
+ const fetchStartTime = Date.now();
355
+
356
+ const quiltFileCache = new Map<string, WalrusFile[]>();
357
+ const contentCache = new Map<string, Uint8Array>(); // blobId or blobId:identifier -> content
358
+ const fetchErrors: Array<{ blobId: string; error: string }> = [];
359
+
360
+ // Process in batches to control concurrency
361
+ for (let i = 0; i < blobIds.length; i += fetchConcurrency) {
362
+ const batch = blobIds.slice(i, i + fetchConcurrency);
363
+ const batchNum = Math.floor(i / fetchConcurrency) + 1;
364
+ const totalBatches = Math.ceil(blobIds.length / fetchConcurrency);
365
+
366
+ console.log(`[rebuildIndexNode] 📥 Batch ${batchNum}/${totalBatches}: ${batch.length} blobs...`);
367
+ onProgress?.(i, blobIds.length, `Fetching batch ${batchNum}/${totalBatches}...`);
368
+
369
+ // Parallel fetch: check type + fetch content for each blob
370
+ const results = await Promise.all(
371
+ batch.map(async (blobId) => {
372
+ try {
373
+ // Try as Quilt first (getBlob + files)
374
+ try {
375
+ const blob = await walrusClient.walrus.getBlob({ blobId });
376
+ const quiltFiles = await blob.files();
377
+
378
+ if (quiltFiles.length > 1) {
379
+ // It's a Quilt with multiple patches - fetch all content in parallel
380
+ const patchResults = await Promise.all(
381
+ quiltFiles.map(async (file) => {
382
+ const identifier = await file.getIdentifier();
383
+ const tags = await file.getTags();
384
+ const bytes = await file.bytes();
385
+ return { file, identifier, tags, bytes };
386
+ })
387
+ );
388
+
389
+ return {
390
+ blobId,
391
+ success: true,
392
+ isQuilt: true,
393
+ files: quiltFiles,
394
+ patches: patchResults,
395
+ };
396
+ } else {
397
+ // Single file in blob - fetch content
398
+ const bytes = await quiltFiles[0].bytes();
399
+ return {
400
+ blobId,
401
+ success: true,
402
+ isQuilt: false,
403
+ files: quiltFiles,
404
+ bytes,
405
+ };
406
+ }
407
+ } catch {
408
+ // Not a Quilt - try as regular blob
409
+ const files = await walrusClient.walrus.getFiles({ ids: [blobId] });
410
+ if (files[0]) {
411
+ const bytes = await files[0].bytes();
412
+ return {
413
+ blobId,
414
+ success: true,
415
+ isQuilt: false,
416
+ files,
417
+ bytes,
418
+ };
419
+ }
420
+ return { blobId, success: false, error: 'No file found' };
421
+ }
422
+ } catch (error: any) {
423
+ return { blobId, success: false, error: error.message || String(error) };
424
+ }
425
+ })
426
+ );
427
+
428
+ // Process results into caches
429
+ for (const result of results) {
430
+ if (!result.success) {
431
+ fetchErrors.push({ blobId: result.blobId, error: result.error || 'Unknown error' });
432
+ console.error(`[rebuildIndexNode] ✗ ${result.blobId.substring(0, 16)}...: ${result.error}`);
433
+ continue;
434
+ }
435
+
436
+ if (result.isQuilt && result.patches) {
437
+ // Quilt: cache files and patch contents
438
+ quiltFileCache.set(result.blobId, result.files!);
439
+ for (const patch of result.patches) {
440
+ const cacheKey = patch.identifier
441
+ ? `${result.blobId}:${patch.identifier}`
442
+ : result.blobId;
443
+ contentCache.set(cacheKey, patch.bytes);
444
+ }
445
+ console.log(`[rebuildIndexNode] ✓ ${result.blobId.substring(0, 16)}... (Quilt: ${result.patches.length} patches)`);
446
+ } else if (result.bytes) {
447
+ // Regular blob: cache file and content
448
+ quiltFileCache.set(result.blobId, result.files!);
449
+ contentCache.set(result.blobId, result.bytes);
450
+ console.log(`[rebuildIndexNode] ✓ ${result.blobId.substring(0, 16)}... (${result.bytes.length} bytes)`);
451
+ }
452
+ }
453
+ }
454
+
455
+ timing.walrusFetchMs = Date.now() - fetchStartTime;
456
+ timing.blobsFetched = quiltFileCache.size;
457
+ // Calculate total bytes downloaded
458
+ for (const bytes of contentCache.values()) {
459
+ timing.totalBytesDownloaded += bytes.length;
460
+ }
461
+ console.log(`[rebuildIndexNode] ⏱️ Walrus fetch: ${timing.walrusFetchMs}ms (${quiltFileCache.size} blobs, ${contentCache.size} contents, ${(timing.totalBytesDownloaded / 1024).toFixed(1)}KB)`);
462
+
463
+ const processingStart = Date.now();
464
+
465
+ // ==================== PROCESS MEMORIES ====================
466
+ for (const [blobId, memoriesInBlob] of memoriesByBlobId) {
467
+ console.log(`[rebuildIndexNode] Processing blobId ${blobId.substring(0, 20)}... (${memoriesInBlob.length} memories)`);
468
+
469
+ // Get pre-fetched files from cache
470
+ const files = quiltFileCache.get(blobId);
471
+
472
+ if (!files) {
473
+ // Blob fetch failed - mark all memories in this blob as failed
474
+ const fetchError = fetchErrors.find(e => e.blobId === blobId);
475
+ const errorMsg = fetchError?.error || 'Failed to fetch blob';
476
+ console.error(`[rebuildIndexNode] ✗ No files available: ${errorMsg}`);
477
+
478
+ for (const memory of memoriesInBlob) {
479
+ processedCount++;
480
+ failedCount++;
481
+ errors.push({ blobId: memory.blobId, error: `Blob fetch failed: ${errorMsg}` });
482
+ }
483
+ continue;
484
+ }
485
+
486
+ console.log(`[rebuildIndexNode] 📦 Using ${files.length} pre-fetched file(s)`);
487
+
488
+ // For each memory in this blobId
489
+ for (let i = 0; i < memoriesInBlob.length; i++) {
490
+ const memory = memoriesInBlob[i];
491
+ processedCount++;
492
+ const progress = `Memory ${processedCount}/${totalMemories}`;
493
+
494
+ console.log(`[rebuildIndexNode] Processing ${progress}: vectorId=${memory.vectorId}`);
495
+ onProgress?.(processedCount, totalMemories, `Processing ${progress}...`);
496
+
497
+ try {
498
+ // Find matching file using helper function (mirrors SDK's QuiltBatchManager.findMemoryInQuilt)
499
+ let file: WalrusFile | undefined;
500
+
501
+ if (files.length === 1) {
502
+ // Single file - use it directly
503
+ file = files[0];
504
+ } else if (files.length > 1) {
505
+ // Multiple files in Quilt - use matching strategies
506
+ const { file: matchedFile, matchStrategy } = await findMatchingFile(files, memory.vectorId, i);
507
+ file = matchedFile;
508
+ if (matchStrategy) {
509
+ console.log(`[rebuildIndexNode] 🎯 Matched by ${matchStrategy}`);
510
+ }
511
+ }
512
+
513
+ if (!file) {
514
+ throw new Error(`No file found for memory vectorId=${memory.vectorId} (blob has ${files.length} files)`);
515
+ }
516
+
517
+ // Get file identifier and tags if available (for Quilts)
518
+ const identifier = await file.getIdentifier();
519
+ const tags = await file.getTags();
520
+
521
+ // Get content from cache (already pre-fetched) or fetch if not cached
522
+ const cacheKey = identifier ? `${blobId}:${identifier}` : blobId;
523
+ let rawBytes = contentCache.get(cacheKey);
524
+ if (!rawBytes) {
525
+ // Fallback: fetch content if not in cache
526
+ rawBytes = await file.bytes();
527
+ }
528
+ const rawText = new TextDecoder().decode(rawBytes);
529
+ const trimmedText = rawText.trim();
530
+
531
+ if (identifier) {
532
+ console.log(`[rebuildIndexNode] 📎 File identifier: ${identifier}`);
533
+ }
534
+
535
+ let content: string;
536
+ let embedding: number[];
537
+ let metadata: { category?: string; importance?: number; topic?: string } = {};
538
+ let timestamp = Date.now();
539
+
540
+ if (trimmedText.startsWith('{') && trimmedText.endsWith('}')) {
541
+ // JSON package format (correct format)
542
+ try {
543
+ const memoryData: MemoryContent = JSON.parse(trimmedText);
544
+ content = memoryData.content;
545
+ embedding = memoryData.embedding;
546
+ metadata = memoryData.metadata || {};
547
+ timestamp = memoryData.timestamp || Date.now();
548
+
549
+ if (!embedding || embedding.length !== 3072) {
550
+ throw new Error(`Invalid embedding in JSON: length=${embedding?.length || 0}`);
551
+ }
552
+
553
+ console.log(`[rebuildIndexNode] 📦 Format: JSON package`);
554
+ } catch (jsonError) {
555
+ throw new Error(`Invalid JSON structure: ${(jsonError as Error).message}`);
556
+ }
557
+ } else if (trimmedText.length > 0 && !trimmedText.includes('\x00') && trimmedText.length < 10000) {
558
+ // Plain text format - cannot index without embedding
559
+ throw new Error('Plain text format detected but no embedding available - skip');
560
+ } else {
561
+ throw new Error('Binary, encrypted, or empty content - cannot index');
562
+ }
563
+
564
+ // Add to HNSW index
565
+ await hnswService.addVector(
566
+ userAddress,
567
+ memory.vectorId,
568
+ embedding,
569
+ {
570
+ blobId: memory.blobId,
571
+ memoryObjectId: memory.id,
572
+ category: metadata.category || memory.category || tags?.['category'],
573
+ importance: metadata.importance || memory.importance || parseInt(tags?.['importance'] || '5'),
574
+ topic: metadata.topic || tags?.['topic'] || '',
575
+ timestamp,
576
+ content,
577
+ isEncrypted: false
578
+ }
579
+ );
580
+
581
+ indexedCount++;
582
+ console.log(`[rebuildIndexNode] ✓ Indexed: "${content.substring(0, 30)}..."`);
583
+
584
+ } catch (error: any) {
585
+ failedCount++;
586
+ const errorMsg = error.message || String(error);
587
+ errors.push({ blobId: memory.blobId, error: errorMsg });
588
+ console.error(`[rebuildIndexNode] ✗ Failed: ${errorMsg}`);
589
+ }
590
+ }
591
+ }
592
+
593
+ // ==================== QUILT MEMORIES ====================
594
+ // Process additional Quilts that may not have on-chain Memory objects
595
+ let quiltMemoriesTotal = 0;
596
+ let quiltMemoriesIndexed = 0;
597
+
598
+ if (quiltIds.length > 0) {
599
+ console.log(`\n[rebuildIndexNode] Processing ${quiltIds.length} additional Quilt(s)...`);
600
+ onProgress?.(processedCount, totalMemories + quiltIds.length, 'Processing Quilts...');
601
+
602
+ for (const quiltId of quiltIds) {
603
+ console.log(`[rebuildIndexNode] Processing Quilt: ${quiltId.substring(0, 30)}...`);
604
+
605
+ try {
606
+ // Fetch Quilt files
607
+ const blob = await walrusClient.walrus.getBlob({ blobId: quiltId });
608
+ const files = await blob.files();
609
+ console.log(`[rebuildIndexNode] 📥 Fetched Quilt: ${files.length} file(s)`);
610
+
611
+ // Process each file in the Quilt
612
+ for (let fileIdx = 0; fileIdx < files.length; fileIdx++) {
613
+ const file = files[fileIdx];
614
+ quiltMemoriesTotal++;
615
+
616
+ try {
617
+ const identifier = await file.getIdentifier() || `quilt-file-${fileIdx}`;
618
+ const tags = await file.getTags();
619
+
620
+ // Parse JSON content
621
+ const rawBytes = await file.bytes();
622
+ let rawText = new TextDecoder().decode(rawBytes);
623
+
624
+ // Trim trailing null bytes (Quilt corruption workaround)
625
+ let lastValidIndex = rawText.length - 1;
626
+ while (lastValidIndex >= 0 && rawText.charCodeAt(lastValidIndex) === 0) {
627
+ lastValidIndex--;
628
+ }
629
+ rawText = rawText.slice(0, lastValidIndex + 1);
630
+
631
+ if (!rawText.startsWith('{') || !rawText.endsWith('}')) {
632
+ throw new Error('Not a JSON file');
633
+ }
634
+
635
+ const memoryData: MemoryContent = JSON.parse(rawText);
636
+
637
+ if (!memoryData.embedding || memoryData.embedding.length === 0) {
638
+ throw new Error('No embedding in package');
639
+ }
640
+
641
+ // Generate unique vector ID for Quilt memory
642
+ const vectorId = Date.now() % 4294967295 + fileIdx;
643
+ const memoryId = (memoryData as any).metadata?.memoryId || identifier.replace('.json', '');
644
+
645
+ // Add to HNSW index
646
+ await hnswService.addVector(
647
+ userAddress,
648
+ vectorId,
649
+ memoryData.embedding,
650
+ {
651
+ blobId: quiltId,
652
+ memoryObjectId: memoryId,
653
+ category: memoryData.metadata?.category || tags?.['category'] || 'general',
654
+ importance: memoryData.metadata?.importance || parseInt(tags?.['importance'] || '3'),
655
+ topic: memoryData.metadata?.topic || tags?.['topic'] || '',
656
+ timestamp: memoryData.timestamp || Date.now(),
657
+ content: memoryData.content || '[encrypted]',
658
+ isEncrypted: (memoryData as any).encrypted === true,
659
+ quiltId,
660
+ identifier
661
+ }
662
+ );
663
+
664
+ quiltMemoriesIndexed++;
665
+ console.log(`[rebuildIndexNode] ✓ Indexed Quilt file: ${identifier}`);
666
+
667
+ } catch (fileError: any) {
668
+ const errorMsg = fileError.message || String(fileError);
669
+ errors.push({ blobId: quiltId, error: `File ${fileIdx}: ${errorMsg}` });
670
+ console.error(`[rebuildIndexNode] ✗ Failed file ${fileIdx}: ${errorMsg}`);
671
+ }
672
+ }
673
+
674
+ } catch (quiltError: any) {
675
+ const errorMsg = quiltError.message || String(quiltError);
676
+ errors.push({ blobId: quiltId, error: `Quilt fetch failed: ${errorMsg}` });
677
+ console.error(`[rebuildIndexNode] ✗ Failed to fetch Quilt: ${errorMsg}`);
678
+ }
679
+ }
680
+
681
+ console.log(`[rebuildIndexNode] Quilt indexing complete: ${quiltMemoriesIndexed}/${quiltMemoriesTotal}`);
682
+ }
683
+
684
+ // Update totals
685
+ const finalTotal = totalMemories + quiltMemoriesTotal;
686
+ const finalIndexed = indexedCount + quiltMemoriesIndexed;
687
+ const finalFailed = failedCount + (quiltMemoriesTotal - quiltMemoriesIndexed);
688
+
689
+ timing.processingMs = Date.now() - processingStart;
690
+ console.log(`[rebuildIndexNode] ⏱️ Processing: ${timing.processingMs}ms`);
691
+
692
+ // Force save index
693
+ const saveStart = Date.now();
694
+ console.log('[rebuildIndexNode] Saving index to disk...');
695
+ onProgress?.(finalTotal, finalTotal, 'Saving index...');
696
+ await hnswService.flushBatch(userAddress);
697
+ timing.saveMs = Date.now() - saveStart;
698
+ console.log(`[rebuildIndexNode] ⏱️ Save: ${timing.saveMs}ms`);
699
+
700
+ const duration = Date.now() - startTime;
701
+ console.log('[rebuildIndexNode] Index rebuild complete!');
702
+ console.log(`[rebuildIndexNode] On-chain: ${totalMemories}, Quilts: ${quiltMemoriesTotal}, Total indexed: ${finalIndexed}, Failed: ${finalFailed}`);
703
+ console.log(`[rebuildIndexNode] Duration: ${(duration / 1000).toFixed(2)}s`);
704
+ console.log(`[rebuildIndexNode] ⏱️ TIMING BREAKDOWN:`);
705
+ console.log(` Init: ${timing.initMs}ms (${((timing.initMs / duration) * 100).toFixed(1)}%)`);
706
+ console.log(` Blockchain: ${timing.blockchainFetchMs}ms (${((timing.blockchainFetchMs / duration) * 100).toFixed(1)}%)`);
707
+ console.log(` Walrus: ${timing.walrusFetchMs}ms (${((timing.walrusFetchMs / duration) * 100).toFixed(1)}%)`);
708
+ console.log(` Processing: ${timing.processingMs}ms (${((timing.processingMs / duration) * 100).toFixed(1)}%)`);
709
+ console.log(` Save: ${timing.saveMs}ms (${((timing.saveMs / duration) * 100).toFixed(1)}%)`);
710
+
711
+ return {
712
+ success: true,
713
+ totalMemories: finalTotal,
714
+ indexedMemories: finalIndexed,
715
+ failedMemories: finalFailed,
716
+ errors,
717
+ duration,
718
+ timing
719
+ };
720
+
721
+ } catch (error: any) {
722
+ console.error('[rebuildIndexNode] Index rebuild failed:', error);
723
+ return {
724
+ success: false,
725
+ totalMemories: 0,
726
+ indexedMemories: 0,
727
+ failedMemories: 0,
728
+ errors: [{ blobId: '', error: error.message || String(error) }],
729
+ duration: Date.now() - startTime
730
+ };
731
+ }
732
+ }
733
+
734
+ /**
735
+ * Check if index exists for a user (Node.js)
736
+ */
737
+ export async function hasExistingIndexNode(
738
+ userAddress: string,
739
+ indexDirectory = './.pdw-indexes'
740
+ ): Promise<boolean> {
741
+ try {
742
+ const fs = await import('fs/promises');
743
+ const indexPath = `${indexDirectory}/${userAddress.replace(/[^a-zA-Z0-9]/g, '_')}.hnsw`;
744
+ await fs.access(indexPath);
745
+ return true;
746
+ } catch {
747
+ return false;
748
+ }
749
+ }
750
+
751
+ /**
752
+ * Clear index for a user (Node.js)
753
+ */
754
+ export async function clearIndexNode(
755
+ userAddress: string,
756
+ indexDirectory = './.pdw-indexes'
757
+ ): Promise<void> {
758
+ try {
759
+ const fs = await import('fs/promises');
760
+ const safeAddress = userAddress.replace(/[^a-zA-Z0-9]/g, '_');
761
+ const indexPath = `${indexDirectory}/${safeAddress}.hnsw`;
762
+ const metaPath = `${indexDirectory}/${safeAddress}.hnsw.meta.json`;
763
+
764
+ await fs.unlink(indexPath).catch(() => {});
765
+ await fs.unlink(metaPath).catch(() => {});
766
+
767
+ console.log(`[clearIndexNode] Cleared index for user ${userAddress}`);
768
+ } catch (error) {
769
+ console.warn('[clearIndexNode] Error clearing index:', error);
770
+ }
771
+ }