@enbox/agent 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import type { AbstractLevel } from 'abstract-level';
2
- import type { GenericMessage, MessageEvent, MessagesSubscribeReply, MessagesSyncReply, SubscriptionMessage } from '@enbox/dwn-sdk-js';
2
+ import type { GenericMessage, MessageEvent, MessagesSubscribeReply, MessagesSyncDiffEntry, MessagesSyncReply, StateIndex, SubscriptionMessage } from '@enbox/dwn-sdk-js';
3
3
 
4
4
  import ms from 'ms';
5
5
 
@@ -23,12 +23,71 @@ export type SyncEngineLevelParams = {
23
23
  };
24
24
 
25
25
  /**
26
- * Maximum bit prefix depth before falling back to leaf enumeration.
27
- * At depth 16, each subtree covers ~1/65536 of the key space, which is a good
28
- * balance between round-trip count and leaf-set size.
26
+ * Maximum bit prefix depth for the per-node tree walk (legacy fallback).
27
+ * At depth 16, each subtree covers ~1/65536 of the key space.
29
28
  */
30
29
  const MAX_DIFF_DEPTH = 16;
31
30
 
31
+ /**
32
+ * Bit depth for the batched diff protocol.
33
+ * Lower than MAX_DIFF_DEPTH because the batched diff sends all subtree hashes
34
+ * in a single request — fine granularity comes from the server-side leaf
35
+ * enumeration, not from deeper prefixes. Depth 8 = 256 buckets, which is
36
+ * a good balance between hash map size and leaf-set resolution.
37
+ */
38
+ const BATCHED_DIFF_DEPTH = 8;
39
+
40
+ /**
41
+ * Maximum number of concurrent remote HTTP requests during a tree diff.
42
+ * The binary tree walk fans out in parallel — without a limit, depth N
43
+ * produces 2^N concurrent requests, which can exhaust server rate limits.
44
+ */
45
+ const REMOTE_CONCURRENCY = 4;
46
+
47
+ /**
48
+ * Counting semaphore for bounding concurrent async operations.
49
+ * Used by the tree walk to limit in-flight remote HTTP requests.
50
+ */
51
+ class Semaphore {
52
+ private _permits: number;
53
+ private readonly _waiting: (() => void)[] = [];
54
+
55
+ constructor(permits: number) {
56
+ this._permits = permits;
57
+ }
58
+
59
+ /** Wait until a permit is available, then consume one. */
60
+ async acquire(): Promise<void> {
61
+ if (this._permits > 0) {
62
+ this._permits--;
63
+ return;
64
+ }
65
+ return new Promise<void>((resolve) => {
66
+ this._waiting.push(resolve);
67
+ });
68
+ }
69
+
70
+ /** Release a permit, waking the next waiter if any. */
71
+ release(): void {
72
+ const next = this._waiting.shift();
73
+ if (next) {
74
+ next();
75
+ } else {
76
+ this._permits++;
77
+ }
78
+ }
79
+
80
+ /** Acquire a permit, run the task, then release regardless of outcome. */
81
+ async run<T>(fn: () => Promise<T>): Promise<T> {
82
+ await this.acquire();
83
+ try {
84
+ return await fn();
85
+ } finally {
86
+ this.release();
87
+ }
88
+ }
89
+ }
90
+
32
91
  /**
33
92
  * Key for the subscription cursor sublevel. Cursors are keyed by
34
93
  * `{did}^{dwnUrl}[^{protocol}]` and store an opaque EventLog cursor string.
@@ -238,15 +297,48 @@ export class SyncEngineLevel implements SyncEngine {
238
297
  continue;
239
298
  }
240
299
 
241
- // Phase 2: Walk the tree to find differing subtrees.
242
- const diff = await this.walkTreeDiff({
300
+ // Phase 2: Compute the diff in a single round-trip using the
301
+ // batched 'diff' action. This replaces the per-node tree walk
302
+ // that previously required dozens of HTTP requests.
303
+ const diff = await this.diffWithRemote({
243
304
  did, dwnUrl, delegateDid, protocol,
244
305
  });
245
306
 
246
307
  // Phase 3: Pull missing messages (remote has, local doesn't).
308
+ // The diff response may include inline message data — use it
309
+ // directly instead of re-fetching via individual MessagesRead calls.
247
310
  if (!direction || direction === 'pull') {
248
311
  if (diff.onlyRemote.length > 0) {
249
- await this.pullMessages({ did, dwnUrl, delegateDid, protocol, messageCids: diff.onlyRemote });
312
+ // Separate entries into three categories:
313
+ // 1. Fully prefetched: have message + inline data (or no data needed)
314
+ // 2. Need data fetch: have message but missing data for RecordsWrite
315
+ // 3. Need full fetch: no message at all
316
+ const prefetched: (MessagesSyncDiffEntry & { message: GenericMessage })[] = [];
317
+ const needsFetchCids: string[] = [];
318
+
319
+ for (const entry of diff.onlyRemote) {
320
+ if (!entry.message) {
321
+ // No message at all — need full fetch.
322
+ needsFetchCids.push(entry.messageCid);
323
+ } else if (
324
+ entry.message.descriptor.interface === 'Records' &&
325
+ entry.message.descriptor.method === 'Write' &&
326
+ (entry.message.descriptor as any).dataCid &&
327
+ !entry.encodedData
328
+ ) {
329
+ // RecordsWrite with data but data wasn't inlined (too large).
330
+ // Need to fetch individually to get the data stream.
331
+ needsFetchCids.push(entry.messageCid);
332
+ } else {
333
+ // Fully prefetched (message + data or no data needed).
334
+ prefetched.push(entry as MessagesSyncDiffEntry & { message: GenericMessage });
335
+ }
336
+ }
337
+ await this.pullMessages({
338
+ did, dwnUrl, delegateDid, protocol,
339
+ messageCids: needsFetchCids,
340
+ prefetched,
341
+ });
250
342
  }
251
343
  }
252
344
 
@@ -756,17 +848,55 @@ export class SyncEngineLevel implements SyncEngine {
756
848
  return this._defaultHashHex.get(depth) ?? '';
757
849
  }
758
850
 
851
+ /**
852
+ * Parse a bit prefix string (e.g. "0110101") into a boolean array
853
+ * for the StateIndex API. Each '1' maps to `true` (right child),
854
+ * each '0' maps to `false` (left child).
855
+ */
856
+ private static parseBitPrefix(prefix: string): boolean[] {
857
+ return Array.from(prefix, (ch): boolean => ch === '1');
858
+ }
859
+
759
860
  // ---------------------------------------------------------------------------
760
861
  // SMT Root Comparison
761
862
  // ---------------------------------------------------------------------------
762
863
 
763
864
  /**
764
- * Get the SMT root hash from the local DWN via a MessagesSync 'root' action.
865
+ * Access the local DWN's StateIndex directly, bypassing the `processMessage`
866
+ * pipeline. The sync engine runs in the same process as the local DWN, so
867
+ * there is no need for message signing, schema validation, or authentication
868
+ * when querying our own state.
869
+ *
870
+ * Returns `undefined` in remote mode (no in-process DWN). The local methods
871
+ * fall back to `processRequest` in that case, routing through RPC to the
872
+ * local DWN server.
873
+ */
874
+ private get stateIndex(): StateIndex | undefined {
875
+ if (this.agent.dwn.isRemoteMode) {
876
+ return undefined;
877
+ }
878
+ return this.agent.dwn.node.storage.stateIndex;
879
+ }
880
+
881
+ /**
882
+ * Get the SMT root hash from the local DWN.
883
+ *
884
+ * In local mode: queries the StateIndex directly (fast, no processMessage overhead).
885
+ * In remote mode: constructs a signed MessagesSync message and routes through RPC.
886
+ *
765
887
  * Returns a hex-encoded root hash string.
766
888
  */
767
889
  private async getLocalRoot(did: string, delegateDid?: string, protocol?: string): Promise<string> {
768
- const permissionGrantId = await this.getSyncPermissionGrantId(did, delegateDid, protocol);
890
+ const si = this.stateIndex;
891
+ if (si) {
892
+ const rootHash = protocol !== undefined
893
+ ? await si.getProtocolRoot(did, protocol)
894
+ : await si.getRoot(did);
895
+ return hashToHex(rootHash);
896
+ }
769
897
 
898
+ // Remote mode fallback: go through processRequest → RPC.
899
+ const permissionGrantId = await this.getSyncPermissionGrantId(did, delegateDid, protocol);
770
900
  const response = await this.agent.dwn.processRequest({
771
901
  author : did,
772
902
  target : did,
@@ -778,7 +908,6 @@ export class SyncEngineLevel implements SyncEngine {
778
908
  permissionGrantId
779
909
  }
780
910
  });
781
-
782
911
  const reply = response.reply as MessagesSyncReply;
783
912
  return reply.root ?? '';
784
913
  }
@@ -834,11 +963,17 @@ export class SyncEngineLevel implements SyncEngine {
834
963
  // Hoist permission grant lookup — resolved once and reused for all subtree/leaf requests.
835
964
  const permissionGrantId = await this.getSyncPermissionGrantId(did, delegateDid, protocol);
836
965
 
966
+ // Gate remote HTTP requests through a semaphore so the binary tree walk
967
+ // doesn't produce an exponential burst of concurrent requests. Local
968
+ // DWN requests (in-process) are not gated.
969
+ const remoteSemaphore = new Semaphore(REMOTE_CONCURRENCY);
970
+
837
971
  const walk = async (prefix: string): Promise<void> => {
838
972
  // Get subtree hashes for this prefix from local and remote.
973
+ // Only the remote request is gated by the semaphore.
839
974
  const [localHash, remoteHash] = await Promise.all([
840
975
  this.getLocalSubtreeHash(did, prefix, delegateDid, protocol, permissionGrantId),
841
- this.getRemoteSubtreeHash(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId),
976
+ remoteSemaphore.run(() => this.getRemoteSubtreeHash(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId)),
842
977
  ]);
843
978
 
844
979
  // If hashes match, this subtree is identical — skip.
@@ -857,7 +992,9 @@ export class SyncEngineLevel implements SyncEngine {
857
992
  return;
858
993
  }
859
994
  if (localHash === emptyHash && remoteHash !== emptyHash) {
860
- const remoteLeaves = await this.getRemoteLeaves(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId);
995
+ const remoteLeaves = await remoteSemaphore.run(
996
+ () => this.getRemoteLeaves(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId),
997
+ );
861
998
  onlyRemote.push(...remoteLeaves);
862
999
  return;
863
1000
  }
@@ -866,7 +1003,7 @@ export class SyncEngineLevel implements SyncEngine {
866
1003
  if (prefix.length >= MAX_DIFF_DEPTH) {
867
1004
  const [localLeaves, remoteLeaves] = await Promise.all([
868
1005
  this.getLocalLeaves(did, prefix, delegateDid, protocol, permissionGrantId),
869
- this.getRemoteLeaves(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId),
1006
+ remoteSemaphore.run(() => this.getRemoteLeaves(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId)),
870
1007
  ]);
871
1008
 
872
1009
  const localSet = new Set(localLeaves);
@@ -896,9 +1033,146 @@ export class SyncEngineLevel implements SyncEngine {
896
1033
  return { onlyLocal, onlyRemote };
897
1034
  }
898
1035
 
1036
+ // ---------------------------------------------------------------------------
1037
+ // Batched Diff — single round-trip set reconciliation
1038
+ // ---------------------------------------------------------------------------
1039
+
1040
+ /**
1041
+ * Compute the diff between local and remote in a single HTTP round-trip.
1042
+ *
1043
+ * 1. Walk the local SMT directly (no processMessage) to collect subtree
1044
+ * hashes at `MAX_DIFF_DEPTH`.
1045
+ * 2. Send a single `MessagesSync action:'diff'` to the remote with all
1046
+ * non-empty subtree hashes.
1047
+ * 3. The remote compares and returns `onlyRemote` (with inline messages)
1048
+ * and `onlyLocal` prefixes.
1049
+ * 4. Enumerate local leaves for the `onlyLocal` prefixes directly.
1050
+ *
1051
+ * This replaces `walkTreeDiff()` which required one HTTP call per tree node.
1052
+ */
1053
+ private async diffWithRemote({ did, dwnUrl, delegateDid, protocol }: {
1054
+ did: string;
1055
+ dwnUrl: string;
1056
+ delegateDid?: string;
1057
+ protocol?: string;
1058
+ }): Promise<{ onlyRemote: MessagesSyncDiffEntry[]; onlyLocal: string[] }> {
1059
+ // Step 1: Collect local subtree hashes at BATCHED_DIFF_DEPTH directly from StateIndex.
1060
+ const localHashes = await this.collectLocalSubtreeHashes(did, protocol, BATCHED_DIFF_DEPTH);
1061
+
1062
+ // Step 2: Send a single 'diff' request to the remote with our hashes.
1063
+ const permissionGrantId = await this.getSyncPermissionGrantId(did, delegateDid, protocol);
1064
+
1065
+ const syncMessage = await this.agent.dwn.processRequest({
1066
+ store : false,
1067
+ author : did,
1068
+ target : did,
1069
+ messageType : DwnInterface.MessagesSync,
1070
+ granteeDid : delegateDid,
1071
+ messageParams : {
1072
+ action : 'diff',
1073
+ protocol,
1074
+ hashes : localHashes,
1075
+ depth : BATCHED_DIFF_DEPTH,
1076
+ permissionGrantId,
1077
+ }
1078
+ });
1079
+
1080
+ const reply = await this.agent.rpc.sendDwnRequest({
1081
+ dwnUrl,
1082
+ targetDid : did,
1083
+ message : syncMessage.message,
1084
+ }) as MessagesSyncReply;
1085
+
1086
+ if (reply.status.code !== 200) {
1087
+ throw new Error(`SyncEngineLevel: diff failed with ${reply.status.code}: ${reply.status.detail}`);
1088
+ }
1089
+
1090
+ // Step 3: Enumerate local leaves for prefixes the remote reported as onlyLocal.
1091
+ const permissionGrantIdForLeaves = await this.getSyncPermissionGrantId(did, delegateDid, protocol);
1092
+ const onlyLocalCids: string[] = [];
1093
+ for (const prefix of reply.onlyLocal ?? []) {
1094
+ const leaves = await this.getLocalLeaves(did, prefix, delegateDid, protocol, permissionGrantIdForLeaves);
1095
+ onlyLocalCids.push(...leaves);
1096
+ }
1097
+
1098
+ return {
1099
+ onlyRemote : reply.onlyRemote ?? [],
1100
+ onlyLocal : onlyLocalCids,
1101
+ };
1102
+ }
1103
+
1104
+ /**
1105
+ * Walk the local SMT to a given depth and collect non-empty subtree hashes.
1106
+ * Returns a `{ prefix: hexHash }` map. Empty subtrees (matching the default
1107
+ * hash) are omitted.
1108
+ *
1109
+ * Uses direct StateIndex access in local mode. In remote mode, falls back
1110
+ * to `getLocalSubtreeHash` which routes through RPC.
1111
+ */
1112
+ private async collectLocalSubtreeHashes(
1113
+ did: string,
1114
+ protocol: string | undefined,
1115
+ depth: number,
1116
+ ): Promise<Record<string, string>> {
1117
+ const result: Record<string, string> = {};
1118
+ const defaultHash = await this.getDefaultHashHex(depth);
1119
+ const si = this.stateIndex;
1120
+
1121
+ const walk = async (prefix: string, currentDepth: number): Promise<void> => {
1122
+ let hexHash: string;
1123
+
1124
+ if (si) {
1125
+ // Fast path: direct StateIndex access (local mode).
1126
+ const bitPath = SyncEngineLevel.parseBitPrefix(prefix);
1127
+ const hash = protocol !== undefined
1128
+ ? await si.getProtocolSubtreeHash(did, protocol, bitPath)
1129
+ : await si.getSubtreeHash(did, bitPath);
1130
+ hexHash = hashToHex(hash);
1131
+ } else {
1132
+ // Remote mode fallback.
1133
+ hexHash = await this.getLocalSubtreeHash(did, prefix, undefined, protocol);
1134
+ }
1135
+
1136
+ if (hexHash === defaultHash) {
1137
+ // Empty subtree — omit from the map.
1138
+ return;
1139
+ }
1140
+
1141
+ if (currentDepth >= depth) {
1142
+ result[prefix] = hexHash;
1143
+ return;
1144
+ }
1145
+
1146
+ // Recurse into children.
1147
+ await Promise.all([
1148
+ walk(prefix + '0', currentDepth + 1),
1149
+ walk(prefix + '1', currentDepth + 1),
1150
+ ]);
1151
+ };
1152
+
1153
+ await walk('', 0);
1154
+ return result;
1155
+ }
1156
+
1157
+ /**
1158
+ * Get the subtree hash at a given bit prefix from the local DWN.
1159
+ *
1160
+ * In local mode: queries the StateIndex directly.
1161
+ * In remote mode: constructs a signed MessagesSync message and routes through RPC.
1162
+ */
899
1163
  private async getLocalSubtreeHash(
900
1164
  did: string, prefix: string, delegateDid?: string, protocol?: string, permissionGrantId?: string
901
1165
  ): Promise<string> {
1166
+ const si = this.stateIndex;
1167
+ if (si) {
1168
+ const bitPath = SyncEngineLevel.parseBitPrefix(prefix);
1169
+ const hash = protocol !== undefined
1170
+ ? await si.getProtocolSubtreeHash(did, protocol, bitPath)
1171
+ : await si.getSubtreeHash(did, bitPath);
1172
+ return hashToHex(hash);
1173
+ }
1174
+
1175
+ // Remote mode fallback.
902
1176
  const response = await this.agent.dwn.processRequest({
903
1177
  author : did,
904
1178
  target : did,
@@ -911,7 +1185,6 @@ export class SyncEngineLevel implements SyncEngine {
911
1185
  permissionGrantId
912
1186
  }
913
1187
  });
914
-
915
1188
  const reply = response.reply as MessagesSyncReply;
916
1189
  return reply.hash ?? '';
917
1190
  }
@@ -942,9 +1215,24 @@ export class SyncEngineLevel implements SyncEngine {
942
1215
  return reply.hash ?? '';
943
1216
  }
944
1217
 
1218
+ /**
1219
+ * Get all leaf messageCids under a given prefix from the local DWN.
1220
+ *
1221
+ * In local mode: queries the StateIndex directly.
1222
+ * In remote mode: constructs a signed MessagesSync message and routes through RPC.
1223
+ */
945
1224
  private async getLocalLeaves(
946
1225
  did: string, prefix: string, delegateDid?: string, protocol?: string, permissionGrantId?: string
947
1226
  ): Promise<string[]> {
1227
+ const si = this.stateIndex;
1228
+ if (si) {
1229
+ const bitPath = SyncEngineLevel.parseBitPrefix(prefix);
1230
+ return protocol !== undefined
1231
+ ? await si.getProtocolLeaves(did, protocol, bitPath)
1232
+ : await si.getLeaves(did, bitPath);
1233
+ }
1234
+
1235
+ // Remote mode fallback.
948
1236
  const response = await this.agent.dwn.processRequest({
949
1237
  author : did,
950
1238
  target : did,
@@ -957,7 +1245,6 @@ export class SyncEngineLevel implements SyncEngine {
957
1245
  permissionGrantId
958
1246
  }
959
1247
  });
960
-
961
1248
  const reply = response.reply as MessagesSyncReply;
962
1249
  return reply.entries ?? [];
963
1250
  }
@@ -995,16 +1282,21 @@ export class SyncEngineLevel implements SyncEngine {
995
1282
  /**
996
1283
  * Fetches missing messages from the remote DWN and processes them locally
997
1284
  * in dependency order (topological sort).
1285
+ *
1286
+ * When prefetched entries are provided (from the batched diff response),
1287
+ * they are processed directly without additional HTTP round-trips.
1288
+ * Only `messageCids` that were NOT prefetched are fetched individually.
998
1289
  */
999
- private async pullMessages({ did, dwnUrl, delegateDid, protocol, messageCids }: {
1290
+ private async pullMessages({ did, dwnUrl, delegateDid, protocol, messageCids, prefetched }: {
1000
1291
  did: string;
1001
1292
  dwnUrl: string;
1002
1293
  delegateDid?: string;
1003
1294
  protocol?: string;
1004
1295
  messageCids: string[];
1296
+ prefetched?: MessagesSyncDiffEntry[];
1005
1297
  }): Promise<void> {
1006
1298
  return pullMessages({
1007
- did, dwnUrl, delegateDid, protocol, messageCids,
1299
+ did, dwnUrl, delegateDid, protocol, messageCids, prefetched,
1008
1300
  agent : this.agent,
1009
1301
  permissionsApi : this._permissionsApi,
1010
1302
  });
@@ -1,15 +1,23 @@
1
1
  import type { EnboxPlatformAgent } from './types/agent.js';
2
2
  import type { PermissionsApi } from './types/permissions.js';
3
- import type { GenericMessage, MessagesReadReply, UnionMessageReply } from '@enbox/dwn-sdk-js';
3
+ import type { GenericMessage, MessagesReadReply, MessagesSyncDiffEntry, UnionMessageReply } from '@enbox/dwn-sdk-js';
4
4
 
5
- import { DwnInterfaceName, DwnMethodName, Message } from '@enbox/dwn-sdk-js';
5
+ import { DwnInterfaceName, DwnMethodName, Encoder, Message } from '@enbox/dwn-sdk-js';
6
6
 
7
7
  import { DwnInterface } from './types/dwn.js';
8
8
  import { isRecordsWrite } from './utils.js';
9
9
  import { topologicalSort } from './sync-topological-sort.js';
10
10
 
11
- /** Entry type for fetched messages with optional data stream. */
12
- export type SyncMessageEntry = { message: GenericMessage; dataStream?: ReadableStream<Uint8Array> };
11
+ /** Maximum data size (in bytes) to buffer in memory for retry. Larger payloads are re-fetched. */
12
+ const MAX_BUFFER_SIZE = 1_048_576; // 1 MB
13
+
14
+ /** Entry type for fetched messages with optional data stream and retry buffer. */
15
+ export type SyncMessageEntry = {
16
+ message: GenericMessage;
17
+ dataStream?: ReadableStream<Uint8Array>;
18
+ /** Buffered data bytes for retry — avoids re-fetching from remote when stream is consumed. */
19
+ bufferedData?: Uint8Array;
20
+ };
13
21
 
14
22
  /**
15
23
  * 202: message was successfully written to the remote DWN
@@ -43,55 +51,165 @@ export async function getMessageCid(message: GenericMessage): Promise<string> {
43
51
  * Fetches missing messages from the remote DWN and processes them on the local DWN
44
52
  * in dependency order (topological sort).
45
53
  *
46
- * Messages that fail processing are re-fetched from the remote before each retry
47
- * pass rather than buffered in memory. ReadableStream is single-use, so a failed
48
- * message's data stream is consumed on the first attempt. Re-fetching provides a
49
- * fresh stream without holding all record data in memory simultaneously.
54
+ * Small data payloads (≤ 1 MB) are buffered during the initial fetch so that
55
+ * retries can replay the data from memory instead of re-fetching from remote.
56
+ * Large payloads are re-fetched on retry since buffering them would consume
57
+ * too much memory.
50
58
  */
51
- export async function pullMessages({ did, dwnUrl, delegateDid, protocol, messageCids, agent, permissionsApi }: {
59
+ export async function pullMessages({ did, dwnUrl, delegateDid, protocol, messageCids, prefetched, agent, permissionsApi }: {
52
60
  did: string;
53
61
  dwnUrl: string;
54
62
  delegateDid?: string;
55
63
  protocol?: string;
56
64
  messageCids: string[];
65
+ /** Pre-fetched message entries from the batched diff response (already have message + data). */
66
+ prefetched?: MessagesSyncDiffEntry[];
57
67
  agent: EnboxPlatformAgent;
58
68
  permissionsApi: PermissionsApi;
59
69
  }): Promise<void> {
60
- // Step 1: Fetch all missing messages from the remote in parallel.
61
- const fetched = await fetchRemoteMessages({ did, dwnUrl, delegateDid, protocol, messageCids, agent, permissionsApi });
70
+ // Convert prefetched diff entries into SyncMessageEntry format.
71
+ const prefetchedEntries: SyncMessageEntry[] = [];
72
+ if (prefetched) {
73
+ for (const entry of prefetched) {
74
+ if (!entry.message) { continue; }
75
+ const syncEntry: SyncMessageEntry = { message: entry.message };
76
+ if (entry.encodedData) {
77
+ // Convert base64url-encoded data to a ReadableStream.
78
+ const bytes = Encoder.base64UrlToBytes(entry.encodedData);
79
+ syncEntry.bufferedData = bytes;
80
+ syncEntry.dataStream = new ReadableStream<Uint8Array>({
81
+ start(controller): void {
82
+ controller.enqueue(bytes);
83
+ controller.close();
84
+ }
85
+ });
86
+ }
87
+ prefetchedEntries.push(syncEntry);
88
+ }
89
+ }
90
+
91
+ // Step 1: Fetch remaining messages (not prefetched) from the remote.
92
+ const fetched = messageCids.length > 0
93
+ ? await fetchRemoteMessages({ did, dwnUrl, delegateDid, protocol, messageCids, agent, permissionsApi })
94
+ : [];
95
+
96
+ // Merge prefetched entries with remotely fetched ones.
97
+ const allFetched = [...prefetchedEntries, ...fetched];
62
98
 
63
99
  // Step 2: Build dependency graph and topological sort.
64
- const sorted = topologicalSort(fetched);
100
+ const sorted = topologicalSort(allFetched);
101
+
102
+ // Step 3: Buffer small data streams so they can be replayed on retry.
103
+ await bufferSmallStreams(sorted);
65
104
 
66
- // Step 3: Process messages in dependency order with multi-pass retry.
67
- // Retry up to MAX_RETRY_PASSES times for messages that fail due to
68
- // dependency ordering issues (e.g., a RecordsWrite whose ProtocolsConfigure
69
- // hasn't committed yet). Failed messages are re-fetched from the remote
70
- // to obtain a fresh data stream, since ReadableStream is single-use.
105
+ // Step 4: Process messages in dependency order with multi-pass retry.
71
106
  const MAX_RETRY_PASSES = 3;
72
107
  let pending = sorted;
73
108
 
74
109
  for (let pass = 0; pass <= MAX_RETRY_PASSES && pending.length > 0; pass++) {
75
- const failedCids: string[] = [];
110
+ const failed: SyncMessageEntry[] = [];
76
111
 
77
112
  for (const entry of pending) {
78
- const pullReply = await agent.dwn.processRawMessage(did, entry.message, { dataStream: entry.dataStream });
113
+ // Create a fresh ReadableStream from the buffer if available (stream is single-use).
114
+ const dataStream = entry.bufferedData
115
+ ? new ReadableStream<Uint8Array>({ start(c): void { c.enqueue(entry.bufferedData!); c.close(); } })
116
+ : entry.dataStream;
117
+
118
+ const pullReply = await agent.dwn.processRawMessage(did, entry.message, { dataStream });
119
+
79
120
  if (!syncMessageReplyIsSuccessful(pullReply)) {
80
- const cid = await getMessageCid(entry.message);
81
- failedCids.push(cid);
121
+ failed.push(entry);
82
122
  }
83
123
  }
84
124
 
85
- // Re-fetch failed messages from the remote to get fresh data streams.
86
- if (failedCids.length > 0) {
87
- const reFetched = await fetchRemoteMessages({ did, dwnUrl, delegateDid, protocol, messageCids: failedCids, agent, permissionsApi });
88
- pending = topologicalSort(reFetched);
125
+ if (failed.length > 0) {
126
+ // Separate entries that have a buffer (can retry locally) from those
127
+ // that need a fresh fetch (large payloads whose stream was consumed).
128
+ const needsRefetch: string[] = [];
129
+ const canRetry: SyncMessageEntry[] = [];
130
+
131
+ for (const entry of failed) {
132
+ if (entry.bufferedData || !entry.dataStream) {
133
+ // Has a buffer or has no data — can retry without re-fetching.
134
+ canRetry.push(entry);
135
+ } else {
136
+ // Large payload whose stream was consumed — must re-fetch.
137
+ const cid = await getMessageCid(entry.message);
138
+ needsRefetch.push(cid);
139
+ }
140
+ }
141
+
142
+ // Re-fetch only the large-payload messages that we couldn't buffer.
143
+ if (needsRefetch.length > 0) {
144
+ const reFetched = await fetchRemoteMessages({ did, dwnUrl, delegateDid, protocol, messageCids: needsRefetch, agent, permissionsApi });
145
+ canRetry.push(...reFetched);
146
+ }
147
+
148
+ pending = topologicalSort(canRetry);
89
149
  } else {
90
150
  pending = [];
91
151
  }
92
152
  }
93
153
  }
94
154
 
155
+ /**
156
+ * Buffers small data streams into `Uint8Array` so they can be replayed on retry.
157
+ * Streams larger than `MAX_BUFFER_SIZE` are left as-is (will be re-fetched on retry).
158
+ */
159
+ async function bufferSmallStreams(entries: SyncMessageEntry[]): Promise<void> {
160
+ for (const entry of entries) {
161
+ if (!entry.dataStream) {
162
+ continue;
163
+ }
164
+
165
+ // Read the stream into memory. If it exceeds the threshold, stop and
166
+ // leave the entry without a buffer (it will be re-fetched on retry).
167
+ const chunks: Uint8Array[] = [];
168
+ let totalSize = 0;
169
+ let exceededThreshold = false;
170
+ const reader = entry.dataStream.getReader();
171
+
172
+ try {
173
+ for (;;) {
174
+ const { done, value } = await reader.read();
175
+ if (done) { break; }
176
+ totalSize += value.byteLength;
177
+ if (totalSize > MAX_BUFFER_SIZE) {
178
+ exceededThreshold = true;
179
+ break;
180
+ }
181
+ chunks.push(value);
182
+ }
183
+ } finally {
184
+ reader.releaseLock();
185
+ }
186
+
187
+ if (exceededThreshold) {
188
+ // Stream exceeded the buffer threshold. Leave dataStream consumed —
189
+ // the retry path will re-fetch from remote.
190
+ entry.dataStream = undefined;
191
+ continue;
192
+ }
193
+
194
+ // Combine chunks into a single Uint8Array buffer.
195
+ const buffer = new Uint8Array(totalSize);
196
+ let offset = 0;
197
+ for (const chunk of chunks) {
198
+ buffer.set(chunk, offset);
199
+ offset += chunk.byteLength;
200
+ }
201
+
202
+ entry.bufferedData = buffer;
203
+ // Create a fresh ReadableStream from the buffer for the first processing attempt.
204
+ entry.dataStream = new ReadableStream<Uint8Array>({
205
+ start(controller): void {
206
+ controller.enqueue(buffer);
207
+ controller.close();
208
+ }
209
+ });
210
+ }
211
+ }
212
+
95
213
  /**
96
214
  * Fetches messages from a remote DWN by their CIDs using MessagesRead.
97
215
  */
@@ -123,8 +241,9 @@ export async function fetchRemoteMessages({ did, dwnUrl, delegateDid, protocol,
123
241
  }
124
242
  }
125
243
 
126
- // Fetch messages in parallel with bounded concurrency.
127
- const CONCURRENCY = 10;
244
+ // Fetch messages in parallel with bounded concurrency. Keep this low
245
+ // to avoid bursting through the remote server's rate limits during sync.
246
+ const CONCURRENCY = 4;
128
247
  let cursor = 0;
129
248
 
130
249
  while (cursor < messageCids.length) {
package/src/utils.ts CHANGED
@@ -168,4 +168,4 @@ export function concatenateUrl(baseUrl: string, path: string): string {
168
168
  }
169
169
 
170
170
  return `${baseUrl}/${path}`;
171
- }
171
+ }