@enbox/agent 0.6.4 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +18 -5
  2. package/dist/browser.mjs +11 -11
  3. package/dist/browser.mjs.map +4 -4
  4. package/dist/esm/agent-did-resolver-cache.js +5 -5
  5. package/dist/esm/agent-did-resolver-cache.js.map +1 -1
  6. package/dist/esm/crypto-api.js.map +1 -1
  7. package/dist/esm/did-api.js +1 -1
  8. package/dist/esm/did-api.js.map +1 -1
  9. package/dist/esm/dwn-api.js +93 -53
  10. package/dist/esm/dwn-api.js.map +1 -1
  11. package/dist/esm/dwn-discovery-payload.js +7 -4
  12. package/dist/esm/dwn-discovery-payload.js.map +1 -1
  13. package/dist/esm/dwn-key-delivery.js +8 -3
  14. package/dist/esm/dwn-key-delivery.js.map +1 -1
  15. package/dist/esm/enbox-connect-protocol.js +34 -14
  16. package/dist/esm/enbox-connect-protocol.js.map +1 -1
  17. package/dist/esm/enbox-user-agent.js +11 -3
  18. package/dist/esm/enbox-user-agent.js.map +1 -1
  19. package/dist/esm/hd-identity-vault.js +33 -18
  20. package/dist/esm/hd-identity-vault.js.map +1 -1
  21. package/dist/esm/identity-api.js +5 -4
  22. package/dist/esm/identity-api.js.map +1 -1
  23. package/dist/esm/index.js +1 -0
  24. package/dist/esm/index.js.map +1 -1
  25. package/dist/esm/local-dwn.js.map +1 -1
  26. package/dist/esm/local-key-manager.js.map +1 -1
  27. package/dist/esm/permissions-api.js +9 -5
  28. package/dist/esm/permissions-api.js.map +1 -1
  29. package/dist/esm/prototyping/crypto/jose/jwe-flattened.js +9 -9
  30. package/dist/esm/prototyping/crypto/jose/jwe-flattened.js.map +1 -1
  31. package/dist/esm/secret-store.js +106 -0
  32. package/dist/esm/secret-store.js.map +1 -0
  33. package/dist/esm/store-data.js +32 -11
  34. package/dist/esm/store-data.js.map +1 -1
  35. package/dist/esm/sync-closure-resolver.js +1 -1
  36. package/dist/esm/sync-closure-resolver.js.map +1 -1
  37. package/dist/esm/sync-engine-level.js +418 -141
  38. package/dist/esm/sync-engine-level.js.map +1 -1
  39. package/dist/esm/sync-replication-ledger.js +25 -0
  40. package/dist/esm/sync-replication-ledger.js.map +1 -1
  41. package/dist/esm/test-harness.js +32 -5
  42. package/dist/esm/test-harness.js.map +1 -1
  43. package/dist/esm/types/sync.js +9 -3
  44. package/dist/esm/types/sync.js.map +1 -1
  45. package/dist/esm/utils.js.map +1 -1
  46. package/dist/types/agent-did-resolver-cache.d.ts +1 -1
  47. package/dist/types/agent-did-resolver-cache.d.ts.map +1 -1
  48. package/dist/types/anonymous-dwn-api.d.ts +2 -2
  49. package/dist/types/anonymous-dwn-api.d.ts.map +1 -1
  50. package/dist/types/crypto-api.d.ts +1 -1
  51. package/dist/types/crypto-api.d.ts.map +1 -1
  52. package/dist/types/did-api.d.ts +2 -2
  53. package/dist/types/did-api.d.ts.map +1 -1
  54. package/dist/types/dwn-api.d.ts +51 -11
  55. package/dist/types/dwn-api.d.ts.map +1 -1
  56. package/dist/types/dwn-key-delivery.d.ts +4 -1
  57. package/dist/types/dwn-key-delivery.d.ts.map +1 -1
  58. package/dist/types/enbox-connect-protocol.d.ts +3 -2
  59. package/dist/types/enbox-connect-protocol.d.ts.map +1 -1
  60. package/dist/types/enbox-user-agent.d.ts +5 -1
  61. package/dist/types/enbox-user-agent.d.ts.map +1 -1
  62. package/dist/types/hd-identity-vault.d.ts +9 -2
  63. package/dist/types/hd-identity-vault.d.ts.map +1 -1
  64. package/dist/types/identity-api.d.ts +1 -1
  65. package/dist/types/identity-api.d.ts.map +1 -1
  66. package/dist/types/index.d.ts +1 -0
  67. package/dist/types/index.d.ts.map +1 -1
  68. package/dist/types/local-dwn.d.ts +3 -3
  69. package/dist/types/local-dwn.d.ts.map +1 -1
  70. package/dist/types/local-key-manager.d.ts +2 -2
  71. package/dist/types/local-key-manager.d.ts.map +1 -1
  72. package/dist/types/permissions-api.d.ts +1 -1
  73. package/dist/types/permissions-api.d.ts.map +1 -1
  74. package/dist/types/secret-store.d.ts +81 -0
  75. package/dist/types/secret-store.d.ts.map +1 -0
  76. package/dist/types/store-data.d.ts +15 -3
  77. package/dist/types/store-data.d.ts.map +1 -1
  78. package/dist/types/sync-engine-level.d.ts +52 -16
  79. package/dist/types/sync-engine-level.d.ts.map +1 -1
  80. package/dist/types/sync-replication-ledger.d.ts +10 -1
  81. package/dist/types/sync-replication-ledger.d.ts.map +1 -1
  82. package/dist/types/test-harness.d.ts +3 -0
  83. package/dist/types/test-harness.d.ts.map +1 -1
  84. package/dist/types/types/agent.d.ts +3 -0
  85. package/dist/types/types/agent.d.ts.map +1 -1
  86. package/dist/types/types/sync.d.ts +27 -4
  87. package/dist/types/types/sync.d.ts.map +1 -1
  88. package/package.json +3 -3
  89. package/src/agent-did-resolver-cache.ts +5 -5
  90. package/src/anonymous-dwn-api.ts +2 -2
  91. package/src/crypto-api.ts +1 -1
  92. package/src/did-api.ts +3 -3
  93. package/src/dwn-api.ts +107 -69
  94. package/src/dwn-discovery-payload.ts +5 -4
  95. package/src/dwn-key-delivery.ts +8 -2
  96. package/src/enbox-connect-protocol.ts +38 -21
  97. package/src/enbox-user-agent.ts +15 -3
  98. package/src/hd-identity-vault.ts +47 -21
  99. package/src/identity-api.ts +6 -5
  100. package/src/index.ts +1 -0
  101. package/src/local-dwn.ts +3 -3
  102. package/src/local-key-manager.ts +2 -2
  103. package/src/permissions-api.ts +12 -8
  104. package/src/prototyping/crypto/jose/jwe-flattened.ts +8 -8
  105. package/src/secret-store.ts +173 -0
  106. package/src/store-data.ts +40 -14
  107. package/src/sync-closure-resolver.ts +2 -2
  108. package/src/sync-engine-level.ts +423 -162
  109. package/src/sync-replication-ledger.ts +26 -1
  110. package/src/test-harness.ts +40 -5
  111. package/src/types/agent.ts +3 -0
  112. package/src/types/sync.ts +35 -7
  113. package/src/utils.ts +1 -1
@@ -175,7 +175,7 @@ export class SyncEngineLevel implements SyncEngine {
175
175
  */
176
176
  private _permissionsApi: PermissionsApi;
177
177
 
178
- private _db: AbstractLevel<string | Buffer | Uint8Array>;
178
+ private readonly _db: AbstractLevel<string | Buffer | Uint8Array>;
179
179
  private _syncIntervalId?: ReturnType<typeof setInterval>;
180
180
  private _syncLock = false;
181
181
 
@@ -191,7 +191,7 @@ export class SyncEngineLevel implements SyncEngine {
191
191
  * Populated from the ledger on `startLiveSync`, used by subscription handlers
192
192
  * to avoid async ledger lookups on every event.
193
193
  */
194
- private _activeLinks: Map<string, ReplicationLinkState> = new Map();
194
+ private readonly _activeLinks: Map<string, ReplicationLinkState> = new Map();
195
195
 
196
196
  /**
197
197
  * Per-link in-memory delivery-order tracking for the pull path. Keyed by
@@ -199,7 +199,7 @@ export class SyncEngineLevel implements SyncEngine {
199
199
  * restarts from `contiguousAppliedToken` and idempotent apply handles
200
200
  * re-delivered events.
201
201
  */
202
- private _linkRuntimes: Map<string, LinkRuntimeState> = new Map();
202
+ private readonly _linkRuntimes: Map<string, LinkRuntimeState> = new Map();
203
203
 
204
204
  /**
205
205
  * Hex-encoded default hashes for empty subtrees at each depth, keyed by depth.
@@ -212,8 +212,8 @@ export class SyncEngineLevel implements SyncEngine {
212
212
  // Live sync state
213
213
  // ---------------------------------------------------------------------------
214
214
 
215
- /** Current sync mode, set by `startSync`. */
216
- private _syncMode: SyncMode = 'poll';
215
+ /** Current sync mode, set by `startSync`. Reset to `undefined` by `stopSync`/`clear`. */
216
+ private _syncMode: SyncMode | undefined = 'poll';
217
217
 
218
218
  /**
219
219
  * Monotonic session generation counter. Incremented on every teardown.
@@ -233,10 +233,10 @@ export class SyncEngineLevel implements SyncEngine {
233
233
  private _connectivityState: SyncConnectivityState = 'unknown';
234
234
 
235
235
  /** Registered event listeners for observability. */
236
- private _eventListeners: Set<SyncEventListener> = new Set();
236
+ private readonly _eventListeners: Set<SyncEventListener> = new Set();
237
237
 
238
238
  /** Per-link push runtime: queue, debounce timer, retry state. */
239
- private _pushRuntimes: Map<string, PushRuntimeState> = new Map();
239
+ private readonly _pushRuntimes: Map<string, PushRuntimeState> = new Map();
240
240
 
241
241
  /**
242
242
  * CIDs recently received via pull subscription, keyed by `cid|dwnUrl` to
@@ -244,7 +244,7 @@ export class SyncEngineLevel implements SyncEngine {
244
244
  * is only suppressed for push back to Provider A — it still fans out to
245
245
  * Provider B and C. TTL: 60 seconds. Cap: 10,000 entries.
246
246
  */
247
- private _recentlyPulledCids: Map<string, number> = new Map();
247
+ private readonly _recentlyPulledCids: Map<string, number> = new Map();
248
248
 
249
249
  /** TTL for echo-loop suppression entries (60 seconds). */
250
250
  private static readonly ECHO_SUPPRESS_TTL_MS = 60_000;
@@ -254,11 +254,46 @@ export class SyncEngineLevel implements SyncEngine {
254
254
  * Caches ProtocolsConfigure and grant lookups across events for the same
255
255
  * tenant. Keyed by tenantDid to prevent cross-tenant cache pollution.
256
256
  */
257
- private _closureContexts: Map<string, ClosureEvaluationContext> = new Map();
257
+ private readonly _closureContexts: Map<string, ClosureEvaluationContext> = new Map();
258
258
 
259
259
  /** Maximum entries in the echo-loop suppression cache. */
260
260
  private static readonly ECHO_SUPPRESS_MAX_ENTRIES = 10_000;
261
261
 
262
+ /** Validate `SyncIdentityOptions` for `registerIdentity` and `updateIdentityOptions`. */
263
+ private static validateSyncIdentityOptions(options: SyncIdentityOptions): void {
264
+ if (!options || !('protocols' in options)) {
265
+ throw new Error('SyncEngineLevel: options.protocols is required — pass \'all\' for a full replica or a non-empty protocol list.');
266
+ }
267
+ if (options.protocols !== 'all' && !Array.isArray(options.protocols)) {
268
+ throw new Error('SyncEngineLevel: protocols must be \'all\' or a non-empty string array.');
269
+ }
270
+ if (Array.isArray(options.protocols) && options.protocols.length === 0) {
271
+ throw new Error('SyncEngineLevel: protocols must be \'all\' or a non-empty array of protocol URIs. An empty array is ambiguous.');
272
+ }
273
+ }
274
+
275
+ /**
276
+ * Cached sync targets result from the last {@link getSyncTargets} call.
277
+ * Invalidated on identity registration/unregistration/update.
278
+ * TTL-based: cleared after 30 seconds to pick up DID document changes.
279
+ */
280
+ private _syncTargetsCache?: {
281
+ targets: { did: string; dwnUrl: string; delegateDid?: string; protocol?: string }[];
282
+ timestamp: number;
283
+ };
284
+
285
+ /**
286
+ * Monotonic generation counter for sync target cache invalidation.
287
+ * Bumped on every invalidation (register/unregister/update/clear/close/stopSync).
288
+ * An in-flight `getSyncTargets()` captures the generation before awaiting
289
+ * and only writes to the cache if it hasn't changed, preventing a
290
+ * concurrent mutation from being masked by stale data.
291
+ */
292
+ private _syncTargetsCacheGeneration = 0;
293
+
294
+ /** TTL for the sync targets cache (30 seconds). */
295
+ private static readonly SYNC_TARGETS_CACHE_TTL_MS = 30_000;
296
+
262
297
  /** Count of consecutive SMT sync failures (for backoff in poll mode). */
263
298
  private _consecutiveFailures = 0;
264
299
 
@@ -312,6 +347,16 @@ export class SyncEngineLevel implements SyncEngine {
312
347
  set agent(agent: EnboxPlatformAgent) {
313
348
  this._agent = agent;
314
349
  this._permissionsApi = new AgentPermissionsApi({ agent: agent as EnboxAgent });
350
+ // Cached sync targets were resolved through the previous agent's
351
+ // DID resolver / endpoint lookup — invalidate so the next sync
352
+ // tick re-resolves through the new agent.
353
+ this._syncTargetsCache = undefined;
354
+ this._syncTargetsCacheGeneration++;
355
+ }
356
+
357
+ get hasActiveSubscriptions(): boolean {
358
+ return this._liveSubscriptions.length > 0 ||
359
+ this._localSubscriptions.length > 0;
315
360
  }
316
361
 
317
362
  get connectivityState(): SyncConnectivityState {
@@ -351,17 +396,24 @@ export class SyncEngineLevel implements SyncEngine {
351
396
  }
352
397
 
353
398
  public async clear(): Promise<void> {
399
+ this._syncTargetsCache = undefined;
400
+ this._syncTargetsCacheGeneration++;
354
401
  await this.teardownLiveSync();
402
+ this._syncMode = undefined;
355
403
  await this._permissionsApi.clear();
356
404
  await this._db.clear();
357
405
  }
358
406
 
359
407
  public async close(): Promise<void> {
408
+ this._syncTargetsCache = undefined;
409
+ this._syncTargetsCacheGeneration++;
360
410
  await this.teardownLiveSync();
361
411
  await this._db.close();
362
412
  }
363
413
 
364
- public async registerIdentity({ did, options }: { did: string; options?: SyncIdentityOptions }): Promise<void> {
414
+ public async registerIdentity({ did, options }: { did: string; options: SyncIdentityOptions }): Promise<void> {
415
+ SyncEngineLevel.validateSyncIdentityOptions(options);
416
+
365
417
  const registeredIdentities = this._db.sublevel('registeredIdentities');
366
418
 
367
419
  const existing = await this.getIdentityOptions(did);
@@ -369,10 +421,14 @@ export class SyncEngineLevel implements SyncEngine {
369
421
  throw new Error(`SyncEngineLevel: Identity with DID ${did} is already registered.`);
370
422
  }
371
423
 
372
- // if no options are provided, we default to no delegateDid and all protocols (empty array)
373
- options ??= { protocols: [] };
374
-
375
424
  await registeredIdentities.put(did, JSON.stringify(options));
425
+ this._syncTargetsCache = undefined;
426
+ this._syncTargetsCacheGeneration++;
427
+
428
+ // If live sync is active, hot-add subscriptions for this identity.
429
+ if (this._syncMode === 'live') {
430
+ await this.addIdentityToLiveSync(did, options);
431
+ }
376
432
  }
377
433
 
378
434
  public async unregisterIdentity(did: string): Promise<void> {
@@ -382,7 +438,14 @@ export class SyncEngineLevel implements SyncEngine {
382
438
  throw new Error(`SyncEngineLevel: Identity with DID ${did} is not registered.`);
383
439
  }
384
440
 
441
+ // If live sync is active, hot-remove subscriptions for this identity.
442
+ if (this._syncMode === 'live') {
443
+ await this.removeIdentityFromLiveSync(did);
444
+ }
445
+
385
446
  await registeredIdentities.del(did);
447
+ this._syncTargetsCache = undefined;
448
+ this._syncTargetsCacheGeneration++;
386
449
  }
387
450
 
388
451
  public async getIdentityOptions(did: string): Promise<SyncIdentityOptions | undefined> {
@@ -404,6 +467,8 @@ export class SyncEngineLevel implements SyncEngine {
404
467
  }
405
468
 
406
469
  public async updateIdentityOptions({ did, options }: { did: string, options: SyncIdentityOptions }): Promise<void> {
470
+ SyncEngineLevel.validateSyncIdentityOptions(options);
471
+
407
472
  const registeredIdentities = this._db.sublevel('registeredIdentities');
408
473
  const existingOptions = await this.getIdentityOptions(did);
409
474
  if (!existingOptions) {
@@ -411,6 +476,23 @@ export class SyncEngineLevel implements SyncEngine {
411
476
  }
412
477
 
413
478
  await registeredIdentities.put(did, JSON.stringify(options));
479
+ this._syncTargetsCache = undefined;
480
+ this._syncTargetsCacheGeneration++;
481
+
482
+ // Always persist the new delegate to durable links, regardless of
483
+ // sync mode. If sync is stopped or polling, existing persisted links
484
+ // would otherwise keep the old delegateDid. When live sync starts
485
+ // later, initializeLinkTarget() loads the link from LevelDB without
486
+ // normalizing delegateDid, so repair/reconcile paths could use stale
487
+ // delegate data.
488
+ await this.ledger.updateDelegateDid(did, options.delegateDid);
489
+
490
+ // If live sync is active, tear down and rebuild subscriptions with
491
+ // the new options.
492
+ if (this._syncMode === 'live' && this.hasActiveLinksForDid(did)) {
493
+ await this.removeIdentityFromLiveSync(did);
494
+ await this.addIdentityToLiveSync(did, options);
495
+ }
414
496
  }
415
497
 
416
498
  // ---------------------------------------------------------------------------
@@ -534,7 +616,10 @@ export class SyncEngineLevel implements SyncEngine {
534
616
  this._syncIntervalId = undefined;
535
617
  }
536
618
 
619
+ this._syncTargetsCache = undefined;
620
+ this._syncTargetsCacheGeneration++;
537
621
  await this.teardownLiveSync();
622
+ this._syncMode = undefined;
538
623
  }
539
624
 
540
625
  // ---------------------------------------------------------------------------
@@ -611,95 +696,7 @@ export class SyncEngineLevel implements SyncEngine {
611
696
  // Step 2: Initialize replication links and open live subscriptions.
612
697
  // Each target's link initialization is independent — process concurrently.
613
698
  const syncTargets = await this.getSyncTargets();
614
- await Promise.allSettled(syncTargets.map(async (target) => {
615
- let link: ReplicationLinkState | undefined;
616
- try {
617
- // Get or create the link in the durable ledger.
618
- // Use protocol-scoped scope when a protocol is specified, otherwise full-tenant.
619
- const linkScope: SyncScope = target.protocol
620
- ? { kind: 'protocol', protocol: target.protocol }
621
- : { kind: 'full' };
622
- link = await this.ledger.getOrCreateLink({
623
- tenantDid : target.did,
624
- remoteEndpoint : target.dwnUrl,
625
- scope : linkScope,
626
- delegateDid : target.delegateDid,
627
- protocol : target.protocol,
628
- });
629
-
630
- // Cache the link for fast access by subscription handlers.
631
- // Use scopeId from the link for consistent runtime identity.
632
- const linkKey = this.buildLinkKey(target.did, target.dwnUrl, link.scopeId);
633
-
634
- // One-time migration: if the link has no pull checkpoint, check for
635
- // a legacy cursor in the old syncCursors sublevel. The legacy key
636
- // used protocol, not scopeId, so we must build it the old way.
637
- if (!link.pull.contiguousAppliedToken) {
638
- const legacyKey = buildLegacyCursorKey(target.did, target.dwnUrl, target.protocol);
639
- const legacyCursor = await this.getCursor(legacyKey);
640
- if (legacyCursor) {
641
- ReplicationLedger.resetCheckpoint(link.pull, legacyCursor);
642
- await this.ledger.saveLink(link);
643
- await this.deleteLegacyCursor(legacyKey);
644
- }
645
- }
646
-
647
- this._activeLinks.set(linkKey, link);
648
-
649
- // Open subscriptions — only transition to live if both succeed.
650
- // If pull succeeds but push fails, close the pull subscription to
651
- // avoid a resource leak with inconsistent state.
652
- const targetWithKey = { ...target, linkKey };
653
- await this.openLivePullSubscription(targetWithKey);
654
- try {
655
- await this.openLocalPushSubscription(targetWithKey);
656
- } catch (pushError) {
657
- // Close the already-opened pull subscription.
658
- const pullSub = this._liveSubscriptions.find((s) => s.linkKey === linkKey);
659
- if (pullSub) {
660
- try { await pullSub.close(); } catch { /* best effort */ }
661
- this._liveSubscriptions = this._liveSubscriptions.filter(s => s !== pullSub);
662
- }
663
- throw pushError;
664
- }
665
-
666
- this.emitEvent({ type: 'link:status-change', tenantDid: target.did, remoteEndpoint: target.dwnUrl, protocol: target.protocol, from: 'initializing', to: 'live' });
667
- await this.ledger.setStatus(link!, 'live');
668
-
669
- // If the link was marked dirty in a previous session, schedule
670
- // immediate reconciliation now that subscriptions are open.
671
- if (link!.needsReconcile) {
672
- this.scheduleReconcile(linkKey, 1000);
673
- }
674
- } catch (error: any) {
675
- const linkKey = link
676
- ? this.buildLinkKey(target.did, target.dwnUrl, link.scopeId)
677
- : buildLegacyCursorKey(target.did, target.dwnUrl, target.protocol);
678
-
679
- // Detect ProgressGap (410) — the cursor is stale, link needs SMT repair.
680
- if ((error as any).isProgressGap && link) {
681
- console.warn(`SyncEngineLevel: ProgressGap detected for ${target.did} -> ${target.dwnUrl}, initiating repair`);
682
- this.emitEvent({ type: 'gap:detected', tenantDid: target.did, remoteEndpoint: target.dwnUrl, protocol: target.protocol, reason: 'ProgressGap' });
683
- const gapInfo = (error as any).gapInfo;
684
- await this.transitionToRepairing(linkKey, link, {
685
- resumeToken: gapInfo?.latestAvailable,
686
- });
687
- return;
688
- }
689
-
690
- console.error(`SyncEngineLevel: Failed to open live subscription for ${target.did} -> ${target.dwnUrl}`, error);
691
-
692
- // Clean up in-memory state for the failed link so it doesn't appear
693
- // active to later code. The durable link remains at 'initializing'.
694
- this._activeLinks.delete(linkKey);
695
- this._linkRuntimes.delete(linkKey);
696
-
697
- // Recompute connectivity — if no live subscriptions remain, reset to unknown.
698
- if (this._liveSubscriptions.length === 0) {
699
- this._connectivityState = 'unknown';
700
- }
701
- }
702
- }));
699
+ await Promise.allSettled(syncTargets.map(t => this.initializeLinkTarget(t)));
703
700
 
704
701
  // Step 3: Schedule infrequent SMT integrity check.
705
702
  const integrityCheck = async (): Promise<void> => {
@@ -742,7 +739,7 @@ export class SyncEngineLevel implements SyncEngine {
742
739
  let drained = 0;
743
740
  while (true) {
744
741
  const entry = rt.inflight.get(rt.nextCommitOrdinal);
745
- if (!entry || !entry.committed) { break; }
742
+ if (!entry?.committed) { break; }
746
743
 
747
744
  // This ordinal is committed — advance the durable checkpoint.
748
745
  ReplicationLedger.commitContiguousToken(link.pull, entry.token);
@@ -765,16 +762,16 @@ export class SyncEngineLevel implements SyncEngine {
765
762
  private static readonly MAX_REPAIR_ATTEMPTS = 3;
766
763
 
767
764
  /** Per-link degraded-poll interval timers. */
768
- private _degradedPollTimers: Map<string, ReturnType<typeof setInterval>> = new Map();
765
+ private readonly _degradedPollTimers: Map<string, ReturnType<typeof setInterval>> = new Map();
769
766
 
770
767
  /** Per-link repair attempt counters. */
771
- private _repairAttempts: Map<string, number> = new Map();
768
+ private readonly _repairAttempts: Map<string, number> = new Map();
772
769
 
773
770
  /** Per-link active repair promises — prevents concurrent repair for the same link. */
774
- private _activeRepairs: Map<string, Promise<void>> = new Map();
771
+ private readonly _activeRepairs: Map<string, Promise<void>> = new Map();
775
772
 
776
773
  /** Per-link retry timers for failed repairs below max attempts. */
777
- private _repairRetryTimers: Map<string, ReturnType<typeof setTimeout>> = new Map();
774
+ private readonly _repairRetryTimers: Map<string, ReturnType<typeof setTimeout>> = new Map();
778
775
 
779
776
  /** Backoff schedule for repair retries (milliseconds). */
780
777
  private static readonly REPAIR_BACKOFF_MS = [1_000, 3_000, 10_000];
@@ -785,7 +782,7 @@ export class SyncEngineLevel implements SyncEngine {
785
782
  * the post-repair checkpoint so the reopened subscription replays from
786
783
  * a valid boundary instead of starting live-only.
787
784
  */
788
- private _repairContext: Map<string, { resumeToken?: ProgressToken }> = new Map();
785
+ private readonly _repairContext: Map<string, { resumeToken?: ProgressToken }> = new Map();
789
786
 
790
787
  /**
791
788
  * Central helper for transitioning a link to `repairing`. Encapsulates:
@@ -855,7 +852,7 @@ export class SyncEngineLevel implements SyncEngine {
855
852
 
856
853
  // Verify link still exists and is still repairing.
857
854
  const currentLink = this._activeLinks.get(linkKey);
858
- if (!currentLink || currentLink.status !== 'repairing') { return; }
855
+ if (currentLink?.status !== 'repairing') { return; }
859
856
 
860
857
  try {
861
858
  await this.repairLink(linkKey);
@@ -910,6 +907,11 @@ export class SyncEngineLevel implements SyncEngine {
910
907
  // mutating state — preventing the race where repair continues after teardown.
911
908
  const generation = this._engineGeneration;
912
909
 
910
+ // Identity guard helper: if the DID was hot-removed and quickly re-added,
911
+ // `_activeLinks` may contain a *different* link object for the same key.
912
+ // The old repair closure must not mutate the replacement link's state.
913
+ const isStaleLink = (): boolean => this._activeLinks.get(linkKey) !== link;
914
+
913
915
  const { tenantDid: did, remoteEndpoint: dwnUrl, delegateDid, protocol } = link;
914
916
 
915
917
  this.emitEvent({ type: 'repair:started', tenantDid: did, remoteEndpoint: dwnUrl, protocol, attempt: (this._repairAttempts.get(linkKey) ?? 0) + 1 });
@@ -919,7 +921,7 @@ export class SyncEngineLevel implements SyncEngine {
919
921
  // Step 1: Close existing subscriptions FIRST to stop old events from
920
922
  // mutating local state while repair runs.
921
923
  await this.closeLinkSubscriptions(link);
922
- if (this._engineGeneration !== generation) { return; } // Teardown occurred.
924
+ if (this._engineGeneration !== generation || isStaleLink()) { return; }
923
925
 
924
926
  // Step 2: Clear runtime ordinals immediately — stale state must not
925
927
  // persist across repair attempts (successful or failed).
@@ -931,7 +933,7 @@ export class SyncEngineLevel implements SyncEngine {
931
933
  try {
932
934
  // Step 3: Run SMT reconciliation for this link.
933
935
  const reconcileOutcome = await this.createLinkReconciler(
934
- () => this._engineGeneration === generation
936
+ () => this._engineGeneration === generation && !isStaleLink()
935
937
  ).reconcile({ did, dwnUrl, delegateDid, protocol });
936
938
  if (reconcileOutcome.aborted) { return; }
937
939
 
@@ -945,7 +947,7 @@ export class SyncEngineLevel implements SyncEngine {
945
947
  const resumeToken = repairCtx?.resumeToken ?? link.pull.contiguousAppliedToken;
946
948
  ReplicationLedger.resetCheckpoint(link.pull, resumeToken);
947
949
  await this.ledger.saveLink(link);
948
- if (this._engineGeneration !== generation) { return; }
950
+ if (this._engineGeneration !== generation || isStaleLink()) { return; }
949
951
 
950
952
  // Step 5: Reopen subscriptions.
951
953
  // Mark needsReconcile BEFORE reopening — local push starts from "now",
@@ -955,7 +957,7 @@ export class SyncEngineLevel implements SyncEngine {
955
957
  // if roots already match).
956
958
  link.needsReconcile = true;
957
959
  await this.ledger.saveLink(link);
958
- if (this._engineGeneration !== generation) { return; }
960
+ if (this._engineGeneration !== generation || isStaleLink()) { return; }
959
961
 
960
962
  const target = { did, dwnUrl, delegateDid, protocol, linkKey };
961
963
  try {
@@ -965,13 +967,13 @@ export class SyncEngineLevel implements SyncEngine {
965
967
  console.warn(`SyncEngineLevel: Stale pull resume token for ${did} -> ${dwnUrl}, resetting to start fresh`);
966
968
  ReplicationLedger.resetCheckpoint(link.pull);
967
969
  await this.ledger.saveLink(link);
968
- if (this._engineGeneration !== generation) { return; }
970
+ if (this._engineGeneration !== generation || isStaleLink()) { return; }
969
971
  await this.openLivePullSubscription(target);
970
972
  } else {
971
973
  throw pullErr;
972
974
  }
973
975
  }
974
- if (this._engineGeneration !== generation) { return; }
976
+ if (this._engineGeneration !== generation || isStaleLink()) { return; }
975
977
  try {
976
978
  await this.openLocalPushSubscription(target);
977
979
  } catch (pushError) {
@@ -982,7 +984,7 @@ export class SyncEngineLevel implements SyncEngine {
982
984
  }
983
985
  throw pushError;
984
986
  }
985
- if (this._engineGeneration !== generation) { return; }
987
+ if (this._engineGeneration !== generation || isStaleLink()) { return; }
986
988
 
987
989
  // Note: post-repair reconcile to close the repair-window gap is
988
990
  // scheduled by repairLink() AFTER _activeRepairs is cleared — not
@@ -1010,8 +1012,9 @@ export class SyncEngineLevel implements SyncEngine {
1010
1012
  this.emitEvent({ type: 'link:status-change', tenantDid: did, remoteEndpoint: dwnUrl, protocol, from: 'repairing', to: 'live' });
1011
1013
 
1012
1014
  } catch (error: any) {
1013
- // If teardown occurred during repair, don't retry or enter degraded_poll.
1014
- if (this._engineGeneration !== generation) { return; }
1015
+ // If teardown occurred during repair or the link was replaced by a
1016
+ // hot-remove + re-add, don't retry or enter degraded_poll.
1017
+ if (this._engineGeneration !== generation || isStaleLink()) { return; }
1015
1018
 
1016
1019
  console.error(`SyncEngineLevel: Repair failed for ${did} -> ${dwnUrl} (attempt ${attempts})`, error);
1017
1020
  this.emitEvent({ type: 'repair:failed', tenantDid: did, remoteEndpoint: dwnUrl, protocol, attempt: attempts, error: String(error.message ?? error) });
@@ -1070,8 +1073,14 @@ export class SyncEngineLevel implements SyncEngine {
1070
1073
  if (existing) { clearInterval(existing); }
1071
1074
 
1072
1075
  // Schedule per-link polling with jitter (15-30 seconds).
1076
+ // Rejection sampling: mask to 14 bits ([0, 16383]), reject >= 15000.
1073
1077
  const baseInterval = 15_000;
1074
- const jitter = Math.floor(Math.random() * 15_000);
1078
+ const randomBuf = new Uint32Array(1);
1079
+ let jitter: number;
1080
+ do {
1081
+ crypto.getRandomValues(randomBuf);
1082
+ jitter = randomBuf[0] & 0x3FFF;
1083
+ } while (jitter >= baseInterval);
1075
1084
  const interval = baseInterval + jitter;
1076
1085
 
1077
1086
  const pollGeneration = this._engineGeneration;
@@ -1083,9 +1092,12 @@ export class SyncEngineLevel implements SyncEngine {
1083
1092
  return;
1084
1093
  }
1085
1094
 
1086
- // If the link was transitioned out of degraded_poll externally (e.g.,
1087
- // by teardown or manual intervention), stop polling.
1088
- if (link.status !== 'degraded_poll') {
1095
+ // Resolve the *current* link from _activeLinks on each tick, not the
1096
+ // captured closure reference. After hot-remove + re-add, the captured
1097
+ // `link` object is stale and must not be used for status checks or
1098
+ // ledger writes.
1099
+ const currentLink = this._activeLinks.get(linkKey);
1100
+ if (currentLink?.status !== 'degraded_poll') {
1089
1101
  clearInterval(timer);
1090
1102
  this._degradedPollTimers.delete(linkKey);
1091
1103
  return;
@@ -1095,11 +1107,11 @@ export class SyncEngineLevel implements SyncEngine {
1095
1107
  // Attempt repair. Reset attempt counter so repairLink doesn't
1096
1108
  // immediately re-enter degraded_poll on failure.
1097
1109
  this._repairAttempts.set(linkKey, 0);
1098
- await this.ledger.setStatus(link, 'repairing');
1110
+ await this.ledger.setStatus(currentLink, 'repairing');
1099
1111
  await this.repairLink(linkKey);
1100
1112
 
1101
1113
  // If repairLink succeeded, link is now 'live' — stop polling.
1102
- if ((link.status as string) === 'live') {
1114
+ if ((currentLink.status as string) === 'live') {
1103
1115
  clearInterval(timer);
1104
1116
  this._degradedPollTimers.delete(linkKey);
1105
1117
  }
@@ -1108,7 +1120,9 @@ export class SyncEngineLevel implements SyncEngine {
1108
1120
  // This is critical: repairLink sets status to 'repairing' internally,
1109
1121
  // and if we don't restore degraded_poll, the next tick would see
1110
1122
  // status !== 'degraded_poll' and stop the timer permanently.
1111
- await this.ledger.setStatus(link, 'degraded_poll');
1123
+ if (this._activeLinks.get(linkKey) === currentLink) {
1124
+ await this.ledger.setStatus(currentLink, 'degraded_poll');
1125
+ }
1112
1126
  }
1113
1127
  }, interval);
1114
1128
 
@@ -1286,6 +1300,172 @@ export class SyncEngineLevel implements SyncEngine {
1286
1300
  this._linkRuntimes.clear();
1287
1301
  }
1288
1302
 
1303
+ // ---------------------------------------------------------------------------
1304
+ // Per-target link initialization (shared by startLiveSync + addIdentityToLiveSync)
1305
+ // ---------------------------------------------------------------------------
1306
+
1307
+ /**
1308
+ * Initialize a single replication link target: create or resume the durable
1309
+ * link, migrate legacy cursors, open pull + push subscriptions, and
1310
+ * transition the link to `'live'`.
1311
+ */
1312
+ private async initializeLinkTarget(target: {
1313
+ did: string; dwnUrl: string; delegateDid?: string; protocol?: string;
1314
+ }): Promise<void> {
1315
+ let link: ReplicationLinkState | undefined;
1316
+ try {
1317
+ const linkScope: SyncScope = target.protocol
1318
+ ? { kind: 'protocol', protocol: target.protocol }
1319
+ : { kind: 'full' };
1320
+ link = await this.ledger.getOrCreateLink({
1321
+ tenantDid : target.did,
1322
+ remoteEndpoint : target.dwnUrl,
1323
+ scope : linkScope,
1324
+ delegateDid : target.delegateDid,
1325
+ protocol : target.protocol,
1326
+ });
1327
+
1328
+ const linkKey = this.buildLinkKey(target.did, target.dwnUrl, link.scopeId);
1329
+
1330
+ if (!link.pull.contiguousAppliedToken) {
1331
+ const legacyKey = buildLegacyCursorKey(target.did, target.dwnUrl, target.protocol);
1332
+ const legacyCursor = await this.getCursor(legacyKey);
1333
+ if (legacyCursor) {
1334
+ ReplicationLedger.resetCheckpoint(link.pull, legacyCursor);
1335
+ await this.ledger.saveLink(link);
1336
+ await this.deleteLegacyCursor(legacyKey);
1337
+ }
1338
+ }
1339
+
1340
+ this._activeLinks.set(linkKey, link);
1341
+
1342
+ const targetWithKey = { ...target, linkKey };
1343
+ await this.openLivePullSubscription(targetWithKey);
1344
+ try {
1345
+ await this.openLocalPushSubscription(targetWithKey);
1346
+ } catch (pushError) {
1347
+ const pullSub = this._liveSubscriptions.find((s) => s.linkKey === linkKey);
1348
+ if (pullSub) {
1349
+ try { await pullSub.close(); } catch { /* best effort */ }
1350
+ this._liveSubscriptions = this._liveSubscriptions.filter(s => s !== pullSub);
1351
+ }
1352
+ throw pushError;
1353
+ }
1354
+
1355
+ this.emitEvent({ type: 'link:status-change', tenantDid: target.did, remoteEndpoint: target.dwnUrl, protocol: target.protocol, from: 'initializing', to: 'live' });
1356
+ await this.ledger.setStatus(link, 'live');
1357
+
1358
+ if (link.needsReconcile) {
1359
+ this.scheduleReconcile(linkKey, 1000);
1360
+ }
1361
+ } catch (error: any) {
1362
+ const linkKey = link
1363
+ ? this.buildLinkKey(target.did, target.dwnUrl, link.scopeId)
1364
+ : buildLegacyCursorKey(target.did, target.dwnUrl, target.protocol);
1365
+
1366
+ if (error.isProgressGap && link) {
1367
+ console.warn(`SyncEngineLevel: ProgressGap detected for ${target.did} -> ${target.dwnUrl}, initiating repair`);
1368
+ this.emitEvent({ type: 'gap:detected', tenantDid: target.did, remoteEndpoint: target.dwnUrl, protocol: target.protocol, reason: 'ProgressGap' });
1369
+ await this.transitionToRepairing(linkKey, link, {
1370
+ resumeToken: error.gapInfo?.latestAvailable,
1371
+ });
1372
+ return;
1373
+ }
1374
+
1375
+ console.error(`SyncEngineLevel: Failed to open live subscription for ${target.did} -> ${target.dwnUrl}`, error);
1376
+
1377
+ this._activeLinks.delete(linkKey);
1378
+ this._linkRuntimes.delete(linkKey);
1379
+
1380
+ if (this._liveSubscriptions.length === 0) {
1381
+ this._connectivityState = 'unknown';
1382
+ }
1383
+ }
1384
+ }
1385
+
1386
+ // ---------------------------------------------------------------------------
1387
+ // Hot-add / hot-remove: per-identity live sync management
1388
+ // ---------------------------------------------------------------------------
1389
+
1390
+ /** Check whether a link key belongs to a given DID. */
1391
+ private isLinkKeyForDid(key: string, did: string): boolean {
1392
+ return key.startsWith(did + '^') || key.startsWith(did + '_');
1393
+ }
1394
+
1395
+ /** Check whether this DID has any active links. */
1396
+ private hasActiveLinksForDid(did: string): boolean {
1397
+ for (const key of this._activeLinks.keys()) {
1398
+ if (this.isLinkKeyForDid(key, did)) { return true; }
1399
+ }
1400
+ return false;
1401
+ }
1402
+
1403
+ /** Hot-add a single identity to the active live sync session. */
1404
+ private async addIdentityToLiveSync(did: string, options: SyncIdentityOptions): Promise<void> {
1405
+ const { protocols, delegateDid } = options;
1406
+ const dwnEndpointUrls = await this.agent.dwn.getDwnEndpointUrlsForTarget(did);
1407
+ if (dwnEndpointUrls.length === 0) { return; }
1408
+
1409
+ const targets: { did: string; dwnUrl: string; delegateDid?: string; protocol?: string }[] = [];
1410
+ for (const dwnUrl of dwnEndpointUrls) {
1411
+ if (protocols === 'all') {
1412
+ targets.push({ did, delegateDid, dwnUrl });
1413
+ } else {
1414
+ for (const protocol of protocols) {
1415
+ targets.push({ did, delegateDid, dwnUrl, protocol });
1416
+ }
1417
+ }
1418
+ }
1419
+
1420
+ await Promise.allSettled(targets.map(t => this.initializeLinkTarget(t)));
1421
+ }
1422
+
1423
+ /** Hot-remove a single identity from the active live sync session. */
1424
+ private async removeIdentityFromLiveSync(did: string): Promise<void> {
1425
+ for (const sub of this._liveSubscriptions.filter(s => s.did === did)) {
1426
+ try { await sub.close(); } catch { /* best effort */ }
1427
+ }
1428
+ this._liveSubscriptions = this._liveSubscriptions.filter(s => s.did !== did);
1429
+
1430
+ for (const sub of this._localSubscriptions.filter(s => s.did === did)) {
1431
+ try { await sub.close(); } catch { /* best effort */ }
1432
+ }
1433
+ this._localSubscriptions = this._localSubscriptions.filter(s => s.did !== did);
1434
+
1435
+ for (const [key, runtime] of this._pushRuntimes) {
1436
+ if (runtime.did === did) {
1437
+ if (runtime.timer) { clearTimeout(runtime.timer); }
1438
+ this._pushRuntimes.delete(key);
1439
+ }
1440
+ }
1441
+
1442
+ for (const [key, timer] of this._degradedPollTimers) {
1443
+ if (this.isLinkKeyForDid(key, did)) { clearInterval(timer); this._degradedPollTimers.delete(key); }
1444
+ }
1445
+ for (const key of this._repairAttempts.keys()) {
1446
+ if (this.isLinkKeyForDid(key, did)) { this._repairAttempts.delete(key); }
1447
+ }
1448
+ for (const key of this._activeRepairs.keys()) {
1449
+ if (this.isLinkKeyForDid(key, did)) { this._activeRepairs.delete(key); }
1450
+ }
1451
+ for (const key of this._repairContext.keys()) {
1452
+ if (this.isLinkKeyForDid(key, did)) { this._repairContext.delete(key); }
1453
+ }
1454
+ for (const [key, timer] of this._repairRetryTimers) {
1455
+ if (this.isLinkKeyForDid(key, did)) { clearTimeout(timer); this._repairRetryTimers.delete(key); }
1456
+ }
1457
+ for (const [key, timer] of this._reconcileTimers) {
1458
+ if (this.isLinkKeyForDid(key, did)) { clearTimeout(timer); this._reconcileTimers.delete(key); }
1459
+ }
1460
+ for (const key of this._reconcileInFlight.keys()) {
1461
+ if (this.isLinkKeyForDid(key, did)) { this._reconcileInFlight.delete(key); }
1462
+ }
1463
+ for (const key of this._activeLinks.keys()) {
1464
+ if (this.isLinkKeyForDid(key, did)) { this._activeLinks.delete(key); this._linkRuntimes.delete(key); }
1465
+ }
1466
+ this._closureContexts.delete(did);
1467
+ }
1468
+
1289
1469
  // ---------------------------------------------------------------------------
1290
1470
  // Live pull: MessagesSubscribe to remote DWN
1291
1471
  // ---------------------------------------------------------------------------
@@ -1354,8 +1534,16 @@ export class SyncEngineLevel implements SyncEngine {
1354
1534
  // NOTE: The WebSocket client fires handlers without awaiting (fire-and-forget),
1355
1535
  // so multiple handlers can be in-flight concurrently. The ordinal tracker
1356
1536
  // ensures the checkpoint advances only when all earlier deliveries are committed.
1537
+ // Capture the link reference at subscription-open time so we can
1538
+ // detect remove+re-add via object identity, not just key existence.
1539
+ const capturedLink = link;
1540
+ const isStale = (): boolean =>
1541
+ this._engineGeneration !== handlerGeneration ||
1542
+ !this._activeLinks.has(cursorKey) ||
1543
+ (capturedLink !== undefined && this._activeLinks.get(cursorKey) !== capturedLink);
1544
+
1357
1545
  const subscriptionHandler = async (subMessage: SubscriptionMessage): Promise<void> => {
1358
- if (this._engineGeneration !== handlerGeneration) {
1546
+ if (isStale()) {
1359
1547
  return;
1360
1548
  }
1361
1549
 
@@ -1370,15 +1558,12 @@ export class SyncEngineLevel implements SyncEngine {
1370
1558
 
1371
1559
  if (!ReplicationLedger.validateTokenDomain(link.pull, subMessage.cursor)) {
1372
1560
  console.warn(`SyncEngineLevel: Token domain mismatch on EOSE for ${did} -> ${dwnUrl}, transitioning to repairing`);
1373
- await this.transitionToRepairing(cursorKey, link);
1561
+ if (!isStale()) { await this.transitionToRepairing(cursorKey, link); }
1374
1562
  return;
1375
1563
  }
1376
1564
  ReplicationLedger.setReceivedToken(link.pull, subMessage.cursor);
1377
- // Drain committed entries. Do NOT unconditionally advance to the
1378
- // EOSE cursor — earlier stored events may still be in-flight
1379
- // (handlers are fire-and-forget). The checkpoint advances only as
1380
- // far as the contiguous drain reaches.
1381
1565
  this.drainCommittedPull(cursorKey);
1566
+ if (isStale()) { return; }
1382
1567
  await this.ledger.saveLink(link);
1383
1568
  }
1384
1569
  // Transport is reachable — set connectivity to online.
@@ -1412,7 +1597,7 @@ export class SyncEngineLevel implements SyncEngine {
1412
1597
  // Domain validation: reject tokens from a different stream/epoch.
1413
1598
  if (link && !ReplicationLedger.validateTokenDomain(link.pull, subMessage.cursor)) {
1414
1599
  console.warn(`SyncEngineLevel: Token domain mismatch for ${did} -> ${dwnUrl}, transitioning to repairing`);
1415
- await this.transitionToRepairing(cursorKey, link);
1600
+ if (!isStale()) { await this.transitionToRepairing(cursorKey, link); }
1416
1601
  return;
1417
1602
  }
1418
1603
 
@@ -1425,9 +1610,11 @@ export class SyncEngineLevel implements SyncEngine {
1425
1610
  // reconnect/repair. This is safe because the event is intentionally
1426
1611
  // excluded from this scope and doesn't need processing.
1427
1612
  if (link && !isEventInScope(event.message, link.scope)) {
1428
- ReplicationLedger.setReceivedToken(link.pull, subMessage.cursor);
1429
- ReplicationLedger.commitContiguousToken(link.pull, subMessage.cursor);
1430
- await this.ledger.saveLink(link);
1613
+ if (!isStale()) {
1614
+ ReplicationLedger.setReceivedToken(link.pull, subMessage.cursor);
1615
+ ReplicationLedger.commitContiguousToken(link.pull, subMessage.cursor);
1616
+ await this.ledger.saveLink(link);
1617
+ }
1431
1618
  return;
1432
1619
  }
1433
1620
 
@@ -1459,6 +1646,7 @@ export class SyncEngineLevel implements SyncEngine {
1459
1646
  }
1460
1647
 
1461
1648
  await this.agent.dwn.processRawMessage(did, event.message, { dataStream });
1649
+ if (isStale()) { return; }
1462
1650
 
1463
1651
  // Invalidate closure cache entries that may be affected by this message.
1464
1652
  // Must run before closure validation so subsequent evaluations in the
@@ -1472,7 +1660,7 @@ export class SyncEngineLevel implements SyncEngine {
1472
1660
  // For protocol-scoped links, verify that all hard dependencies for
1473
1661
  // this operation are locally present before considering it committed.
1474
1662
  // Full-tenant scope bypasses this entirely (returns complete with 0 queries).
1475
- if (link && link.scope.kind === 'protocol') {
1663
+ if (link?.scope.kind === 'protocol') {
1476
1664
  const messageStore = this.agent.dwn.node.storage.messageStore;
1477
1665
  let closureCtx = this._closureContexts.get(did);
1478
1666
  if (!closureCtx) {
@@ -1486,6 +1674,8 @@ export class SyncEngineLevel implements SyncEngine {
1486
1674
  event.message, messageStore, link.scope, closureCtx
1487
1675
  );
1488
1676
 
1677
+ if (isStale()) { return; }
1678
+
1489
1679
  if (!closureResult.complete) {
1490
1680
  const failureCode = closureResult.failure!.code;
1491
1681
  const failureDetail = closureResult.failure!.detail;
@@ -1506,7 +1696,7 @@ export class SyncEngineLevel implements SyncEngine {
1506
1696
  errorDetail : failureDetail,
1507
1697
  });
1508
1698
 
1509
- await this.transitionToRepairing(cursorKey, link);
1699
+ if (!isStale()) { await this.transitionToRepairing(cursorKey, link); }
1510
1700
  return;
1511
1701
  }
1512
1702
  }
@@ -1533,7 +1723,7 @@ export class SyncEngineLevel implements SyncEngine {
1533
1723
  // Guard: if the link transitioned to repairing while this handler was
1534
1724
  // in-flight (e.g., an earlier ordinal's handler failed concurrently),
1535
1725
  // skip all state mutations — the repair process owns progression now.
1536
- if (link && rt && link.status === 'live') {
1726
+ if (link && rt && link.status === 'live' && !isStale()) {
1537
1727
  const entry = rt.inflight.get(ordinal);
1538
1728
  if (entry) { entry.committed = true; }
1539
1729
 
@@ -1584,7 +1774,7 @@ export class SyncEngineLevel implements SyncEngine {
1584
1774
  // Transition to repairing immediately — do NOT advance the checkpoint
1585
1775
  // past this failure or let later ordinals commit past it. SMT
1586
1776
  // reconciliation will discover and fill the gap.
1587
- if (link) {
1777
+ if (link && !isStale()) {
1588
1778
  await this.transitionToRepairing(cursorKey, link);
1589
1779
  }
1590
1780
  }
@@ -1706,9 +1896,16 @@ export class SyncEngineLevel implements SyncEngine {
1706
1896
 
1707
1897
  const handlerGeneration = this._engineGeneration;
1708
1898
 
1899
+ // Capture the link for identity-based staleness detection.
1900
+ const capturedPushLink = this._activeLinks.get(target.linkKey);
1901
+ const isPushStale = (): boolean =>
1902
+ this._engineGeneration !== handlerGeneration ||
1903
+ !this._activeLinks.has(target.linkKey) ||
1904
+ (capturedPushLink !== undefined && this._activeLinks.get(target.linkKey) !== capturedPushLink);
1905
+
1709
1906
  // Subscribe to the local DWN's EventLog.
1710
1907
  const subscriptionHandler = async (subMessage: SubscriptionMessage): Promise<void> => {
1711
- if (this._engineGeneration !== handlerGeneration) {
1908
+ if (isPushStale()) {
1712
1909
  return;
1713
1910
  }
1714
1911
 
@@ -1727,7 +1924,7 @@ export class SyncEngineLevel implements SyncEngine {
1727
1924
  // Accumulate the message CID for a debounced push.
1728
1925
  const targetKey = pushLinkKey;
1729
1926
  const cid = await Message.getCid(subMessage.event.message);
1730
- if (cid === undefined) {
1927
+ if (cid === undefined || isPushStale()) {
1731
1928
  return;
1732
1929
  }
1733
1930
 
@@ -1788,11 +1985,25 @@ export class SyncEngineLevel implements SyncEngine {
1788
1985
  }
1789
1986
 
1790
1987
  private async flushPendingPushesForLink(linkKey: string): Promise<void> {
1988
+ // Guard: bail if this link was hot-removed. Without this, a stale
1989
+ // debounce timer or retry callback could send pushes after the DID
1990
+ // was removed.
1991
+ if (!this._activeLinks.has(linkKey)) {
1992
+ return;
1993
+ }
1994
+
1791
1995
  const pushRuntime = this._pushRuntimes.get(linkKey);
1792
1996
  if (!pushRuntime) {
1793
1997
  return;
1794
1998
  }
1795
1999
 
2000
+ // Capture the current active link identity so we can detect
2001
+ // remove+re-add during the await pushMessages() call.
2002
+ const flushLink = this._activeLinks.get(linkKey);
2003
+ const isFlushStale = (): boolean =>
2004
+ !this._activeLinks.has(linkKey) ||
2005
+ (flushLink !== undefined && this._activeLinks.get(linkKey) !== flushLink);
2006
+
1796
2007
  const { did, dwnUrl, delegateDid, protocol, entries: pushEntries, retryCount } = pushRuntime;
1797
2008
  pushRuntime.entries = [];
1798
2009
 
@@ -1814,6 +2025,10 @@ export class SyncEngineLevel implements SyncEngine {
1814
2025
  permissionsApi : this._permissionsApi,
1815
2026
  });
1816
2027
 
2028
+ // If the link was replaced during pushMessages, abandon all
2029
+ // post-push state mutations — the replacement session owns this key.
2030
+ if (isFlushStale()) { return; }
2031
+
1817
2032
  // Auto-clear dead letters for CIDs that succeeded — a previously
1818
2033
  // failed message may have been repaired by reconciliation.
1819
2034
  for (const cid of result.succeeded) {
@@ -1834,6 +2049,7 @@ export class SyncEngineLevel implements SyncEngine {
1834
2049
  }
1835
2050
 
1836
2051
  if (result.failed.length > 0) {
2052
+ if (isFlushStale()) { return; }
1837
2053
  const failedSet = new Set(result.failed);
1838
2054
  const failedEntries = pushEntries.filter((entry) => failedSet.has(entry.cid));
1839
2055
  this.requeueOrReconcile(linkKey, {
@@ -1850,6 +2066,7 @@ export class SyncEngineLevel implements SyncEngine {
1850
2066
  }
1851
2067
  }
1852
2068
  } catch (error: any) {
2069
+ if (isFlushStale()) { return; }
1853
2070
  console.error(`SyncEngineLevel: Push batch failed for ${did} -> ${dwnUrl}`, error);
1854
2071
  this.requeueOrReconcile(linkKey, {
1855
2072
  did, dwnUrl, delegateDid, protocol,
@@ -1944,10 +2161,10 @@ export class SyncEngineLevel implements SyncEngine {
1944
2161
  // ---------------------------------------------------------------------------
1945
2162
 
1946
2163
  /** Active reconcile timers, keyed by link key. */
1947
- private _reconcileTimers: Map<string, ReturnType<typeof setTimeout>> = new Map();
2164
+ private readonly _reconcileTimers: Map<string, ReturnType<typeof setTimeout>> = new Map();
1948
2165
 
1949
2166
  /** Active reconcile operations, keyed by link key (dedup). */
1950
- private _reconcileInFlight: Map<string, Promise<void>> = new Map();
2167
+ private readonly _reconcileInFlight: Map<string, Promise<void>> = new Map();
1951
2168
 
1952
2169
  /**
1953
2170
  * Schedule a per-link reconciliation after a short debounce. Coalesces
@@ -1962,7 +2179,14 @@ export class SyncEngineLevel implements SyncEngine {
1962
2179
  const timer = setTimeout((): void => {
1963
2180
  this._reconcileTimers.delete(linkKey);
1964
2181
  if (this._engineGeneration !== generation) { return; }
1965
- void this.reconcileLink(linkKey);
2182
+ // Guard: bail if this link was hot-removed since the timer was
2183
+ // scheduled. Without this, a stale timer could restart reconcile
2184
+ // work for a DID that is no longer active.
2185
+ if (!this._activeLinks.has(linkKey)) { return; }
2186
+ void this.reconcileLink(linkKey).catch((): void => {
2187
+ // Errors are already logged inside doReconcileLink; swallow here
2188
+ // to prevent unhandled-rejection flakes in the test runner.
2189
+ });
1966
2190
  }, delayMs);
1967
2191
  this._reconcileTimers.set(linkKey, timer);
1968
2192
  }
@@ -2002,13 +2226,19 @@ export class SyncEngineLevel implements SyncEngine {
2002
2226
  }
2003
2227
 
2004
2228
  const generation = this._engineGeneration;
2229
+
2230
+ // Identity guard: if the DID was hot-removed and re-added, this
2231
+ // closure's captured `link` reference may no longer be the active
2232
+ // link object. Bail before mutating the replacement's state.
2233
+ const isStaleLink = (): boolean => this._activeLinks.get(linkKey) !== link;
2234
+
2005
2235
  const { tenantDid: did, remoteEndpoint: dwnUrl, delegateDid, protocol } = link;
2006
2236
 
2007
2237
  try {
2008
2238
  const reconcileOutcome = await this.createLinkReconciler(
2009
- () => this._engineGeneration === generation
2239
+ () => this._engineGeneration === generation && !isStaleLink()
2010
2240
  ).reconcile({ did, dwnUrl, delegateDid, protocol }, { verifyConvergence: true });
2011
- if (reconcileOutcome.aborted) { return; }
2241
+ if (reconcileOutcome.aborted || isStaleLink()) { return; }
2012
2242
 
2013
2243
  if (reconcileOutcome.converged) {
2014
2244
  await this.ledger.clearNeedsReconcile(link);
@@ -2020,9 +2250,10 @@ export class SyncEngineLevel implements SyncEngine {
2020
2250
  // Roots still differ — retry after a delay. This can happen when
2021
2251
  // pushMessages() had permanent failures, pullMessages() partially
2022
2252
  // failed, or new writes arrived during reconciliation.
2023
- this.scheduleReconcile(linkKey, 5000);
2253
+ if (!isStaleLink()) { this.scheduleReconcile(linkKey, 5000); }
2024
2254
  }
2025
2255
  } catch (error: any) {
2256
+ if (isStaleLink()) { return; }
2026
2257
  console.error(`SyncEngineLevel: Reconciliation failed for ${did} -> ${dwnUrl}`, error);
2027
2258
  // Schedule retry with longer delay.
2028
2259
  this.scheduleReconcile(linkKey, 5000);
@@ -2218,9 +2449,9 @@ export class SyncEngineLevel implements SyncEngine {
2218
2449
  private async getLocalRoot(did: string, delegateDid?: string, protocol?: string): Promise<string> {
2219
2450
  const si = this.stateIndex;
2220
2451
  if (si) {
2221
- const rootHash = protocol !== undefined
2222
- ? await si.getProtocolRoot(did, protocol)
2223
- : await si.getRoot(did);
2452
+ const rootHash = protocol === undefined
2453
+ ? await si.getRoot(did)
2454
+ : await si.getProtocolRoot(did, protocol);
2224
2455
  return hashToHex(rootHash);
2225
2456
  }
2226
2457
 
@@ -2363,9 +2594,9 @@ export class SyncEngineLevel implements SyncEngine {
2363
2594
  if (si) {
2364
2595
  // Fast path: direct StateIndex access (local mode).
2365
2596
  const bitPath = SyncEngineLevel.parseBitPrefix(prefix);
2366
- const hash = protocol !== undefined
2367
- ? await si.getProtocolSubtreeHash(did, protocol, bitPath)
2368
- : await si.getSubtreeHash(did, bitPath);
2597
+ const hash = protocol === undefined
2598
+ ? await si.getSubtreeHash(did, bitPath)
2599
+ : await si.getProtocolSubtreeHash(did, protocol, bitPath);
2369
2600
  hexHash = hashToHex(hash);
2370
2601
  } else {
2371
2602
  // Remote mode fallback.
@@ -2405,9 +2636,9 @@ export class SyncEngineLevel implements SyncEngine {
2405
2636
  const si = this.stateIndex;
2406
2637
  if (si) {
2407
2638
  const bitPath = SyncEngineLevel.parseBitPrefix(prefix);
2408
- const hash = protocol !== undefined
2409
- ? await si.getProtocolSubtreeHash(did, protocol, bitPath)
2410
- : await si.getSubtreeHash(did, bitPath);
2639
+ const hash = protocol === undefined
2640
+ ? await si.getSubtreeHash(did, bitPath)
2641
+ : await si.getProtocolSubtreeHash(did, protocol, bitPath);
2411
2642
  return hashToHex(hash);
2412
2643
  }
2413
2644
 
@@ -2440,9 +2671,9 @@ export class SyncEngineLevel implements SyncEngine {
2440
2671
  const si = this.stateIndex;
2441
2672
  if (si) {
2442
2673
  const bitPath = SyncEngineLevel.parseBitPrefix(prefix);
2443
- return protocol !== undefined
2444
- ? await si.getProtocolLeaves(did, protocol, bitPath)
2445
- : await si.getLeaves(did, bitPath);
2674
+ return protocol === undefined
2675
+ ? await si.getLeaves(did, bitPath)
2676
+ : await si.getProtocolLeaves(did, protocol, bitPath);
2446
2677
  }
2447
2678
 
2448
2679
  // Remote mode fallback.
@@ -2735,6 +2966,9 @@ export class SyncEngineLevel implements SyncEngine {
2735
2966
 
2736
2967
  /**
2737
2968
  * Returns the list of sync targets: (did, dwnUrl, delegateDid?, protocol?) tuples.
2969
+ * Results are cached for up to 30 seconds to avoid redundant DID resolution
2970
+ * on every sync tick. The cache is invalidated when identities are registered,
2971
+ * unregistered, or updated.
2738
2972
  */
2739
2973
  private async getSyncTargets(): Promise<{
2740
2974
  did: string;
@@ -2742,25 +2976,41 @@ export class SyncEngineLevel implements SyncEngine {
2742
2976
  delegateDid?: string;
2743
2977
  protocol?: string;
2744
2978
  }[]> {
2979
+ // Return cached targets if still valid.
2980
+ if (this._syncTargetsCache
2981
+ && (Date.now() - this._syncTargetsCache.timestamp) < SyncEngineLevel.SYNC_TARGETS_CACHE_TTL_MS) {
2982
+ return this._syncTargetsCache.targets;
2983
+ }
2984
+
2985
+ // Capture the generation before any async work so we can detect
2986
+ // concurrent invalidations (register/unregister/update) that would
2987
+ // make our result stale.
2988
+ const generationAtStart = this._syncTargetsCacheGeneration;
2989
+
2745
2990
  const targets: { did: string; dwnUrl: string; delegateDid?: string; protocol?: string }[] = [];
2991
+ let hasRegisteredIdentities = false;
2992
+ let anyEndpointMissing = false;
2746
2993
 
2747
2994
  for await (const [did, options] of this._db.sublevel('registeredIdentities').iterator()) {
2995
+ hasRegisteredIdentities = true;
2748
2996
  let parsed: SyncIdentityOptions;
2749
2997
  try {
2750
2998
  parsed = JSON.parse(options) as SyncIdentityOptions;
2751
2999
  } catch (error: unknown) {
2752
- console.warn(`SyncEngineLevel: Corrupt sync options for ${did}, falling back to global sync:`, error);
2753
- parsed = { protocols: [] };
3000
+ console.warn(`SyncEngineLevel: Corrupt sync options for ${did}, skipping identity:`, error);
3001
+ continue;
2754
3002
  }
3003
+
2755
3004
  const { protocols, delegateDid } = parsed;
2756
3005
 
2757
3006
  const dwnEndpointUrls = await this.agent.dwn.getDwnEndpointUrlsForTarget(did);
2758
3007
  if (dwnEndpointUrls.length === 0) {
3008
+ anyEndpointMissing = true;
2759
3009
  continue;
2760
3010
  }
2761
3011
 
2762
3012
  for (const dwnUrl of dwnEndpointUrls) {
2763
- if (protocols.length === 0) {
3013
+ if (protocols === 'all') {
2764
3014
  // Sync all protocols (global tree).
2765
3015
  targets.push({ did, delegateDid, dwnUrl });
2766
3016
  } else {
@@ -2771,6 +3021,17 @@ export class SyncEngineLevel implements SyncEngine {
2771
3021
  }
2772
3022
  }
2773
3023
 
3024
+ // Only cache when:
3025
+ // - The result is non-empty (empty = transient resolution failure).
3026
+ // - All registered identities resolved successfully (partial =
3027
+ // one identity's endpoints failed transiently; caching would
3028
+ // suppress retries for that identity for the full TTL).
3029
+ // - The generation hasn't changed (a concurrent register/unregister
3030
+ // invalidated the cache while we were awaiting).
3031
+ const isComplete = hasRegisteredIdentities && !anyEndpointMissing;
3032
+ if (targets.length > 0 && isComplete && this._syncTargetsCacheGeneration === generationAtStart) {
3033
+ this._syncTargetsCache = { targets, timestamp: Date.now() };
3034
+ }
2774
3035
  return targets;
2775
3036
  }
2776
3037