@enbox/agent 0.5.13 → 0.5.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,9 @@ import { createClosureContext, invalidateClosureCache } from './sync-closure-typ
24
24
  import { AgentPermissionsApi } from './permissions-api.js';
25
25
  import { DwnInterface } from './types/dwn.js';
26
26
  import { isRecordsWrite } from './utils.js';
27
+ import { SyncLinkReconciler } from './sync-link-reconciler.js';
27
28
  import { topologicalSort } from './sync-topological-sort.js';
29
+ import { buildLegacyCursorKey, buildLinkId } from './sync-link-id.js';
28
30
  import { fetchRemoteMessages, pullMessages, pushMessages } from './sync-messages.js';
29
31
  /**
30
32
  * Maximum bit prefix depth for the per-node tree walk (legacy fallback).
@@ -40,16 +42,12 @@ const MAX_DIFF_DEPTH = 16;
40
42
  */
41
43
  const BATCHED_DIFF_DEPTH = 8;
42
44
  /**
43
- /**
44
- * Key for the subscription cursor sublevel. Cursors are keyed by
45
- * `{did}^{dwnUrl}[^{protocol}]` and store an opaque EventLog cursor string.
46
- */
47
- const CURSOR_SEPARATOR = '^';
48
- /**
49
- * Debounce window for push-on-write. When the local EventLog emits events,
50
- * we batch them and push after this delay to avoid a push per individual write.
45
+ * Debounce window for batching writes that arrive while a push is in flight.
46
+ * The first write in a quiet window triggers an immediate push; subsequent
47
+ * writes arriving during the push are batched and flushed after this delay
48
+ * once the in-flight push completes.
51
49
  */
52
- const PUSH_DEBOUNCE_MS = 250;
50
+ const PUSH_DEBOUNCE_MS = 100;
53
51
  /**
54
52
  * Checks whether a message's protocolPath and contextId match the link's
55
53
  * subset scope prefixes. Returns true if the message is in scope.
@@ -122,7 +120,7 @@ export class SyncEngineLevel {
122
120
  * and bail if it has changed — this prevents stale work from mutating
123
121
  * state after teardown or mode switch.
124
122
  */
125
- this._syncGeneration = 0;
123
+ this._engineGeneration = 0;
126
124
  /** Active live pull subscriptions (remote -> local via MessagesSubscribe). */
127
125
  this._liveSubscriptions = [];
128
126
  /** Active local EventLog subscriptions for push-on-write (local -> remote). */
@@ -131,8 +129,8 @@ export class SyncEngineLevel {
131
129
  this._connectivityState = 'unknown';
132
130
  /** Registered event listeners for observability. */
133
131
  this._eventListeners = new Set();
134
- /** Entry in the pending push queue a message CID with its local EventLog token. */
135
- this._pendingPushCids = new Map();
132
+ /** Per-link push runtime: queue, debounce timer, retry state. */
133
+ this._pushRuntimes = new Map();
136
134
  /**
137
135
  * CIDs recently received via pull subscription, keyed by `cid|dwnUrl` to
138
136
  * scope suppression per remote endpoint. A message pulled from Provider A
@@ -163,6 +161,13 @@ export class SyncEngineLevel {
163
161
  * a valid boundary instead of starting live-only.
164
162
  */
165
163
  this._repairContext = new Map();
164
+ // ---------------------------------------------------------------------------
165
+ // Per-link reconciliation
166
+ // ---------------------------------------------------------------------------
167
+ /** Active reconcile timers, keyed by link key. */
168
+ this._reconcileTimers = new Map();
169
+ /** Active reconcile operations, keyed by link key (dedup). */
170
+ this._reconcileInFlight = new Map();
166
171
  this._agent = agent;
167
172
  this._permissionsApi = new AgentPermissionsApi({ agent: agent });
168
173
  this._db = (db) ? db : new Level(dataPath !== null && dataPath !== void 0 ? dataPath : 'DATA/AGENT/SYNC_STORE');
@@ -232,12 +237,14 @@ export class SyncEngineLevel {
232
237
  }
233
238
  clear() {
234
239
  return __awaiter(this, void 0, void 0, function* () {
240
+ yield this.teardownLiveSync();
235
241
  yield this._permissionsApi.clear();
236
242
  yield this._db.clear();
237
243
  });
238
244
  }
239
245
  close() {
240
246
  return __awaiter(this, void 0, void 0, function* () {
247
+ yield this.teardownLiveSync();
241
248
  yield this._db.close();
242
249
  });
243
250
  }
@@ -304,91 +311,60 @@ export class SyncEngineLevel {
304
311
  }
305
312
  this._syncLock = true;
306
313
  try {
307
- // Iterate over all registered identities and their DWN endpoints.
314
+ // Group targets by remote endpoint so each URL group can be reconciled
315
+ // concurrently. Within a group, targets are processed sequentially so
316
+ // that a single network failure skips the rest of that group.
308
317
  const syncTargets = yield this.getSyncTargets();
309
- const errored = new Set();
310
- let hadFailure = false;
318
+ const byUrl = new Map();
311
319
  for (const target of syncTargets) {
312
- const { did, delegateDid, dwnUrl, protocol } = target;
313
- if (errored.has(dwnUrl)) {
314
- continue;
320
+ let group = byUrl.get(target.dwnUrl);
321
+ if (!group) {
322
+ group = [];
323
+ byUrl.set(target.dwnUrl, group);
315
324
  }
316
- try {
317
- // Phase 1: Compare SMT roots between local and remote.
318
- const localRoot = yield this.getLocalRoot(did, delegateDid, protocol);
319
- const remoteRoot = yield this.getRemoteRoot(did, dwnUrl, delegateDid, protocol);
320
- if (localRoot === remoteRoot) {
321
- // Trees are identical nothing to sync for this target.
322
- continue;
323
- }
324
- // Phase 2: Compute the diff in a single round-trip using the
325
- // batched 'diff' action. This replaces the per-node tree walk
326
- // that previously required dozens of HTTP requests.
327
- const diff = yield this.diffWithRemote({
328
- did, dwnUrl, delegateDid, protocol,
329
- });
330
- // Phase 3: Pull missing messages (remote has, local doesn't).
331
- // The diff response may include inline message data — use it
332
- // directly instead of re-fetching via individual MessagesRead calls.
333
- if (!direction || direction === 'pull') {
334
- if (diff.onlyRemote.length > 0) {
335
- // Separate entries into three categories:
336
- // 1. Fully prefetched: have message + inline data (or no data needed)
337
- // 2. Need data fetch: have message but missing data for RecordsWrite
338
- // 3. Need full fetch: no message at all
339
- const prefetched = [];
340
- const needsFetchCids = [];
341
- for (const entry of diff.onlyRemote) {
342
- if (!entry.message) {
343
- // No message at all — need full fetch.
344
- needsFetchCids.push(entry.messageCid);
345
- }
346
- else if (entry.message.descriptor.interface === 'Records' &&
347
- entry.message.descriptor.method === 'Write' &&
348
- entry.message.descriptor.dataCid &&
349
- !entry.encodedData) {
350
- // RecordsWrite with data but data wasn't inlined (too large).
351
- // Need to fetch individually to get the data stream.
352
- needsFetchCids.push(entry.messageCid);
353
- }
354
- else {
355
- // Fully prefetched (message + data or no data needed).
356
- prefetched.push(entry);
357
- }
358
- }
359
- yield this.pullMessages({
360
- did, dwnUrl, delegateDid, protocol,
361
- messageCids: needsFetchCids,
362
- prefetched,
363
- });
364
- }
325
+ group.push(target);
326
+ }
327
+ let groupsSucceeded = 0;
328
+ let groupsFailed = 0;
329
+ const results = yield Promise.allSettled([...byUrl.entries()].map((_a) => __awaiter(this, [_a], void 0, function* ([dwnUrl, targets]) {
330
+ for (const target of targets) {
331
+ const { did, delegateDid, protocol } = target;
332
+ try {
333
+ yield this.createLinkReconciler().reconcile({
334
+ did, dwnUrl, delegateDid, protocol,
335
+ }, { direction });
365
336
  }
366
- // Phase 4: Push missing messages (local has, remote doesn't).
367
- if (!direction || direction === 'push') {
368
- if (diff.onlyLocal.length > 0) {
369
- yield this.pushMessages({ did, dwnUrl, delegateDid, protocol, messageCids: diff.onlyLocal });
370
- }
337
+ catch (error) {
338
+ // Skip remaining targets for this DWN endpoint.
339
+ groupsFailed++;
340
+ console.error(`SyncEngineLevel: Error syncing ${did} with ${dwnUrl}`, error);
341
+ return;
371
342
  }
372
343
  }
373
- catch (error) {
374
- // Skip this DWN endpoint for remaining targets and log the real cause.
375
- errored.add(dwnUrl);
376
- hadFailure = true;
377
- console.error(`SyncEngineLevel: Error syncing ${did} with ${dwnUrl}`, error);
344
+ groupsSucceeded++;
345
+ })));
346
+ // Check for unexpected rejections (should not happen given inner try/catch).
347
+ for (const result of results) {
348
+ if (result.status === 'rejected') {
349
+ groupsFailed++;
378
350
  }
379
351
  }
380
- // Track consecutive failures for backoff in poll mode.
381
- if (hadFailure) {
352
+ // Track connectivity based on per-group outcomes. If at least one
353
+ // group succeeded, stay online — partial reachability is still online.
354
+ if (groupsSucceeded > 0) {
355
+ this._consecutiveFailures = 0;
356
+ this._connectivityState = 'online';
357
+ }
358
+ else if (groupsFailed > 0) {
382
359
  this._consecutiveFailures++;
383
360
  if (this._connectivityState === 'online') {
384
361
  this._connectivityState = 'offline';
385
362
  }
386
363
  }
387
- else {
364
+ else if (syncTargets.length > 0) {
365
+ // All targets had matching roots (no reconciliation needed).
388
366
  this._consecutiveFailures = 0;
389
- if (syncTargets.length > 0) {
390
- this._connectivityState = 'online';
391
- }
367
+ this._connectivityState = 'online';
392
368
  }
393
369
  }
394
370
  finally {
@@ -428,6 +404,7 @@ export class SyncEngineLevel {
428
404
  */
429
405
  stopSync() {
430
406
  return __awaiter(this, arguments, void 0, function* (timeout = 2000) {
407
+ this._engineGeneration++;
431
408
  let elapsedTimeout = 0;
432
409
  while (this._syncLock) {
433
410
  if (elapsedTimeout >= timeout) {
@@ -448,7 +425,11 @@ export class SyncEngineLevel {
448
425
  // ---------------------------------------------------------------------------
449
426
  startPollSync(intervalMilliseconds) {
450
427
  return __awaiter(this, void 0, void 0, function* () {
428
+ const generation = this._engineGeneration;
451
429
  const intervalSync = () => __awaiter(this, void 0, void 0, function* () {
430
+ if (this._engineGeneration !== generation) {
431
+ return;
432
+ }
452
433
  if (this._syncLock) {
453
434
  return;
454
435
  }
@@ -465,6 +446,9 @@ export class SyncEngineLevel {
465
446
  const effectiveInterval = this._consecutiveFailures > 0
466
447
  ? intervalMilliseconds * backoffMultiplier
467
448
  : intervalMilliseconds;
449
+ if (this._engineGeneration !== generation) {
450
+ return;
451
+ }
468
452
  if (!this._syncIntervalId) {
469
453
  this._syncIntervalId = setInterval(intervalSync, effectiveInterval);
470
454
  }
@@ -499,8 +483,9 @@ export class SyncEngineLevel {
499
483
  console.error('SyncEngineLevel: Error during initial live-sync catch-up', error);
500
484
  }
501
485
  // Step 2: Initialize replication links and open live subscriptions.
486
+ // Each target's link initialization is independent — process concurrently.
502
487
  const syncTargets = yield this.getSyncTargets();
503
- for (const target of syncTargets) {
488
+ yield Promise.allSettled(syncTargets.map((target) => __awaiter(this, void 0, void 0, function* () {
504
489
  let link;
505
490
  try {
506
491
  // Get or create the link in the durable ledger.
@@ -516,18 +501,32 @@ export class SyncEngineLevel {
516
501
  protocol: target.protocol,
517
502
  });
518
503
  // Cache the link for fast access by subscription handlers.
519
- const linkKey = this.buildCursorKey(target.did, target.dwnUrl, target.protocol);
504
+ // Use scopeId from the link for consistent runtime identity.
505
+ const linkKey = this.buildLinkKey(target.did, target.dwnUrl, link.scopeId);
506
+ // One-time migration: if the link has no pull checkpoint, check for
507
+ // a legacy cursor in the old syncCursors sublevel. The legacy key
508
+ // used protocol, not scopeId, so we must build it the old way.
509
+ if (!link.pull.contiguousAppliedToken) {
510
+ const legacyKey = buildLegacyCursorKey(target.did, target.dwnUrl, target.protocol);
511
+ const legacyCursor = yield this.getCursor(legacyKey);
512
+ if (legacyCursor) {
513
+ ReplicationLedger.resetCheckpoint(link.pull, legacyCursor);
514
+ yield this.ledger.saveLink(link);
515
+ yield this.deleteLegacyCursor(legacyKey);
516
+ }
517
+ }
520
518
  this._activeLinks.set(linkKey, link);
521
519
  // Open subscriptions — only transition to live if both succeed.
522
520
  // If pull succeeds but push fails, close the pull subscription to
523
521
  // avoid a resource leak with inconsistent state.
524
- yield this.openLivePullSubscription(target);
522
+ const targetWithKey = Object.assign(Object.assign({}, target), { linkKey });
523
+ yield this.openLivePullSubscription(targetWithKey);
525
524
  try {
526
- yield this.openLocalPushSubscription(target);
525
+ yield this.openLocalPushSubscription(targetWithKey);
527
526
  }
528
527
  catch (pushError) {
529
528
  // Close the already-opened pull subscription.
530
- const pullSub = this._liveSubscriptions.find(s => s.did === target.did && s.dwnUrl === target.dwnUrl && s.protocol === target.protocol);
529
+ const pullSub = this._liveSubscriptions.find((s) => s.linkKey === linkKey);
531
530
  if (pullSub) {
532
531
  try {
533
532
  yield pullSub.close();
@@ -539,9 +538,16 @@ export class SyncEngineLevel {
539
538
  }
540
539
  this.emitEvent({ type: 'link:status-change', tenantDid: target.did, remoteEndpoint: target.dwnUrl, protocol: target.protocol, from: 'initializing', to: 'live' });
541
540
  yield this.ledger.setStatus(link, 'live');
541
+ // If the link was marked dirty in a previous session, schedule
542
+ // immediate reconciliation now that subscriptions are open.
543
+ if (link.needsReconcile) {
544
+ this.scheduleReconcile(linkKey, 1000);
545
+ }
542
546
  }
543
547
  catch (error) {
544
- const linkKey = this.buildCursorKey(target.did, target.dwnUrl, target.protocol);
548
+ const linkKey = link
549
+ ? this.buildLinkKey(target.did, target.dwnUrl, link.scopeId)
550
+ : buildLegacyCursorKey(target.did, target.dwnUrl, target.protocol);
545
551
  // Detect ProgressGap (410) — the cursor is stale, link needs SMT repair.
546
552
  if (error.isProgressGap && link) {
547
553
  console.warn(`SyncEngineLevel: ProgressGap detected for ${target.did} -> ${target.dwnUrl}, initiating repair`);
@@ -550,7 +556,7 @@ export class SyncEngineLevel {
550
556
  yield this.transitionToRepairing(linkKey, link, {
551
557
  resumeToken: gapInfo === null || gapInfo === void 0 ? void 0 : gapInfo.latestAvailable,
552
558
  });
553
- continue;
559
+ return;
554
560
  }
555
561
  console.error(`SyncEngineLevel: Failed to open live subscription for ${target.did} -> ${target.dwnUrl}`, error);
556
562
  // Clean up in-memory state for the failed link so it doesn't appear
@@ -562,7 +568,7 @@ export class SyncEngineLevel {
562
568
  this._connectivityState = 'unknown';
563
569
  }
564
570
  }
565
- }
571
+ })));
566
572
  // Step 3: Schedule infrequent SMT integrity check.
567
573
  const integrityCheck = () => __awaiter(this, void 0, void 0, function* () {
568
574
  if (this._syncLock) {
@@ -672,11 +678,11 @@ export class SyncEngineLevel {
672
678
  const attempts = (_a = this._repairAttempts.get(linkKey)) !== null && _a !== void 0 ? _a : 1;
673
679
  const backoff = SyncEngineLevel.REPAIR_BACKOFF_MS;
674
680
  const delayMs = backoff[Math.min(attempts - 1, backoff.length - 1)];
675
- const timerGeneration = this._syncGeneration;
681
+ const timerGeneration = this._engineGeneration;
676
682
  const timer = setTimeout(() => __awaiter(this, void 0, void 0, function* () {
677
683
  this._repairRetryTimers.delete(linkKey);
678
684
  // Bail if teardown occurred since this timer was scheduled.
679
- if (this._syncGeneration !== timerGeneration) {
685
+ if (this._engineGeneration !== timerGeneration) {
680
686
  return;
681
687
  }
682
688
  // Verify link still exists and is still repairing.
@@ -708,6 +714,14 @@ export class SyncEngineLevel {
708
714
  }
709
715
  const promise = this.doRepairLink(linkKey).finally(() => {
710
716
  this._activeRepairs.delete(linkKey);
717
+ // Post-repair reconcile: if doRepairLink() marked needsReconcile
718
+ // (to close the gap between diff snapshot and new push subscription),
719
+ // schedule reconciliation NOW — after _activeRepairs is cleared so
720
+ // scheduleReconcile() won't skip it.
721
+ const link = this._activeLinks.get(linkKey);
722
+ if ((link === null || link === void 0 ? void 0 : link.needsReconcile) && link.status === 'live') {
723
+ this.scheduleReconcile(linkKey, 500);
724
+ }
711
725
  });
712
726
  this._activeRepairs.set(linkKey, promise);
713
727
  return promise;
@@ -728,7 +742,7 @@ export class SyncEngineLevel {
728
742
  // Capture the sync generation at repair start. If teardown occurs during
729
743
  // any await, the generation will have incremented and we bail before
730
744
  // mutating state — preventing the race where repair continues after teardown.
731
- const generation = this._syncGeneration;
745
+ const generation = this._engineGeneration;
732
746
  const { tenantDid: did, remoteEndpoint: dwnUrl, delegateDid, protocol } = link;
733
747
  this.emitEvent({ type: 'repair:started', tenantDid: did, remoteEndpoint: dwnUrl, protocol, attempt: ((_a = this._repairAttempts.get(linkKey)) !== null && _a !== void 0 ? _a : 0) + 1 });
734
748
  const attempts = ((_b = this._repairAttempts.get(linkKey)) !== null && _b !== void 0 ? _b : 0) + 1;
@@ -736,7 +750,7 @@ export class SyncEngineLevel {
736
750
  // Step 1: Close existing subscriptions FIRST to stop old events from
737
751
  // mutating local state while repair runs.
738
752
  yield this.closeLinkSubscriptions(link);
739
- if (this._syncGeneration !== generation) {
753
+ if (this._engineGeneration !== generation) {
740
754
  return;
741
755
  } // Teardown occurred.
742
756
  // Step 2: Clear runtime ordinals immediately — stale state must not
@@ -747,71 +761,60 @@ export class SyncEngineLevel {
747
761
  rt.nextCommitOrdinal = 0;
748
762
  try {
749
763
  // Step 3: Run SMT reconciliation for this link.
750
- const localRoot = yield this.getLocalRoot(did, delegateDid, protocol);
751
- if (this._syncGeneration !== generation) {
752
- return;
753
- }
754
- const remoteRoot = yield this.getRemoteRoot(did, dwnUrl, delegateDid, protocol);
755
- if (this._syncGeneration !== generation) {
764
+ const reconcileOutcome = yield this.createLinkReconciler(() => this._engineGeneration === generation).reconcile({ did, dwnUrl, delegateDid, protocol });
765
+ if (reconcileOutcome.aborted) {
756
766
  return;
757
767
  }
758
- if (localRoot !== remoteRoot) {
759
- const diff = yield this.diffWithRemote({ did, dwnUrl, delegateDid, protocol });
760
- if (this._syncGeneration !== generation) {
761
- return;
762
- }
763
- if (diff.onlyRemote.length > 0) {
764
- const prefetched = [];
765
- const needsFetchCids = [];
766
- for (const entry of diff.onlyRemote) {
767
- if (!entry.message || (entry.message.descriptor.interface === 'Records' &&
768
- entry.message.descriptor.method === 'Write' &&
769
- entry.message.descriptor.dataCid && !entry.encodedData)) {
770
- needsFetchCids.push(entry.messageCid);
771
- }
772
- else {
773
- prefetched.push(entry);
774
- }
775
- }
776
- yield this.pullMessages({ did, dwnUrl, delegateDid, protocol, messageCids: needsFetchCids, prefetched });
777
- if (this._syncGeneration !== generation) {
778
- return;
779
- }
780
- }
781
- if (diff.onlyLocal.length > 0) {
782
- yield this.pushMessages({ did, dwnUrl, delegateDid, protocol, messageCids: diff.onlyLocal });
783
- if (this._syncGeneration !== generation) {
784
- return;
785
- }
786
- }
787
- }
788
- // Step 4: Determine the post-repair resume token.
768
+ // Step 4: Determine the post-repair pull resume token.
789
769
  // - If repair was triggered by ProgressGap, use the stored resumeToken
790
770
  // (from gapInfo.latestAvailable) so the reopened subscription replays
791
771
  // from a valid boundary, closing the race window between SMT and resubscribe.
792
772
  // - Otherwise, use the existing contiguousAppliedToken if still valid.
793
- // - Push checkpoint is NOT reset during repair: push frontier tracks what
794
- // the local EventLog has delivered to the remote. SMT repair handles
795
- // pull-side convergence; push-side convergence is handled by the diff's
796
- // onlyLocal push. The push checkpoint remains the local authority.
773
+ // Push is opportunistic no push checkpoint to reset.
797
774
  const repairCtx = this._repairContext.get(linkKey);
798
775
  const resumeToken = (_c = repairCtx === null || repairCtx === void 0 ? void 0 : repairCtx.resumeToken) !== null && _c !== void 0 ? _c : link.pull.contiguousAppliedToken;
799
776
  ReplicationLedger.resetCheckpoint(link.pull, resumeToken);
800
777
  yield this.ledger.saveLink(link);
801
- if (this._syncGeneration !== generation) {
778
+ if (this._engineGeneration !== generation) {
779
+ return;
780
+ }
781
+ // Step 5: Reopen subscriptions.
782
+ // Mark needsReconcile BEFORE reopening — local push starts from "now",
783
+ // so any writes between the diff snapshot (step 3) and the new push
784
+ // subscription are invisible to both mechanisms. A short post-reopen
785
+ // reconcile will close this gap (cheap: SMT root comparison short-circuits
786
+ // if roots already match).
787
+ link.needsReconcile = true;
788
+ yield this.ledger.saveLink(link);
789
+ if (this._engineGeneration !== generation) {
802
790
  return;
803
791
  }
804
- // Step 5: Reopen subscriptions with the repaired checkpoints.
805
- const target = { did, dwnUrl, delegateDid, protocol };
806
- yield this.openLivePullSubscription(target);
807
- if (this._syncGeneration !== generation) {
792
+ const target = { did, dwnUrl, delegateDid, protocol, linkKey };
793
+ try {
794
+ yield this.openLivePullSubscription(target);
795
+ }
796
+ catch (pullErr) {
797
+ if (pullErr.isProgressGap) {
798
+ console.warn(`SyncEngineLevel: Stale pull resume token for ${did} -> ${dwnUrl}, resetting to start fresh`);
799
+ ReplicationLedger.resetCheckpoint(link.pull);
800
+ yield this.ledger.saveLink(link);
801
+ if (this._engineGeneration !== generation) {
802
+ return;
803
+ }
804
+ yield this.openLivePullSubscription(target);
805
+ }
806
+ else {
807
+ throw pullErr;
808
+ }
809
+ }
810
+ if (this._engineGeneration !== generation) {
808
811
  return;
809
812
  }
810
813
  try {
811
- yield this.openLocalPushSubscription(Object.assign(Object.assign({}, target), { pushCursor: link.push.contiguousAppliedToken }));
814
+ yield this.openLocalPushSubscription(target);
812
815
  }
813
816
  catch (pushError) {
814
- const pullSub = this._liveSubscriptions.find(s => s.did === did && s.dwnUrl === dwnUrl && s.protocol === protocol);
817
+ const pullSub = this._liveSubscriptions.find((s) => s.linkKey === linkKey);
815
818
  if (pullSub) {
816
819
  try {
817
820
  yield pullSub.close();
@@ -821,9 +824,13 @@ export class SyncEngineLevel {
821
824
  }
822
825
  throw pushError;
823
826
  }
824
- if (this._syncGeneration !== generation) {
827
+ if (this._engineGeneration !== generation) {
825
828
  return;
826
829
  }
830
+ // Note: post-repair reconcile to close the repair-window gap is
831
+ // scheduled by repairLink() AFTER _activeRepairs is cleared — not
832
+ // here, because scheduleReconcile() would skip it while _activeRepairs
833
+ // still contains this link.
827
834
  // Step 6: Clean up repair context and transition to live.
828
835
  this._repairContext.delete(linkKey);
829
836
  this._repairAttempts.delete(linkKey);
@@ -843,7 +850,7 @@ export class SyncEngineLevel {
843
850
  }
844
851
  catch (error) {
845
852
  // If teardown occurred during repair, don't retry or enter degraded_poll.
846
- if (this._syncGeneration !== generation) {
853
+ if (this._engineGeneration !== generation) {
847
854
  return;
848
855
  }
849
856
  console.error(`SyncEngineLevel: Repair failed for ${did} -> ${dwnUrl} (attempt ${attempts})`, error);
@@ -863,9 +870,10 @@ export class SyncEngineLevel {
863
870
  */
864
871
  closeLinkSubscriptions(link) {
865
872
  return __awaiter(this, void 0, void 0, function* () {
866
- const { tenantDid: did, remoteEndpoint: dwnUrl, protocol } = link;
873
+ const { tenantDid: did, remoteEndpoint: dwnUrl } = link;
874
+ const linkKey = this.buildLinkKey(did, dwnUrl, link.scopeId);
867
875
  // Close pull subscription.
868
- const pullSub = this._liveSubscriptions.find(s => s.did === did && s.dwnUrl === dwnUrl && s.protocol === protocol);
876
+ const pullSub = this._liveSubscriptions.find((s) => s.linkKey === linkKey);
869
877
  if (pullSub) {
870
878
  try {
871
879
  yield pullSub.close();
@@ -874,7 +882,7 @@ export class SyncEngineLevel {
874
882
  this._liveSubscriptions = this._liveSubscriptions.filter(s => s !== pullSub);
875
883
  }
876
884
  // Close local push subscription.
877
- const pushSub = this._localSubscriptions.find(s => s.did === did && s.dwnUrl === dwnUrl && s.protocol === protocol);
885
+ const pushSub = this._localSubscriptions.find((s) => s.linkKey === linkKey);
878
886
  if (pushSub) {
879
887
  try {
880
888
  yield pushSub.close();
@@ -910,10 +918,10 @@ export class SyncEngineLevel {
910
918
  const baseInterval = 15000;
911
919
  const jitter = Math.floor(Math.random() * 15000);
912
920
  const interval = baseInterval + jitter;
913
- const pollGeneration = this._syncGeneration;
921
+ const pollGeneration = this._engineGeneration;
914
922
  const timer = setInterval(() => __awaiter(this, void 0, void 0, function* () {
915
923
  // Bail if teardown occurred since this timer was created.
916
- if (this._syncGeneration !== pollGeneration) {
924
+ if (this._engineGeneration !== pollGeneration) {
917
925
  clearInterval(timer);
918
926
  this._degradedPollTimers.delete(linkKey);
919
927
  return;
@@ -956,14 +964,14 @@ export class SyncEngineLevel {
956
964
  // Increment generation to invalidate all in-flight async operations
957
965
  // (repairs, retry timers, degraded-poll ticks). Any async work that
958
966
  // captured the previous generation will bail on its next checkpoint.
959
- this._syncGeneration++;
960
- // Clear the push debounce timer.
961
- if (this._pushDebounceTimer) {
962
- clearTimeout(this._pushDebounceTimer);
963
- this._pushDebounceTimer = undefined;
964
- }
965
- // Flush any pending push CIDs.
966
- this._pendingPushCids.clear();
967
+ this._engineGeneration++;
968
+ // Clear per-link push runtime state.
969
+ for (const pushRuntime of this._pushRuntimes.values()) {
970
+ if (pushRuntime.timer) {
971
+ clearTimeout(pushRuntime.timer);
972
+ }
973
+ }
974
+ this._pushRuntimes.clear();
967
975
  // Close all live pull subscriptions.
968
976
  for (const sub of this._liveSubscriptions) {
969
977
  try {
@@ -996,8 +1004,15 @@ export class SyncEngineLevel {
996
1004
  }
997
1005
  this._repairRetryTimers.clear();
998
1006
  this._repairContext.clear();
1007
+ // Clear reconcile timers and in-flight operations.
1008
+ for (const timer of this._reconcileTimers.values()) {
1009
+ clearTimeout(timer);
1010
+ }
1011
+ this._reconcileTimers.clear();
1012
+ this._reconcileInFlight.clear();
999
1013
  // Clear closure evaluation contexts.
1000
1014
  this._closureContexts.clear();
1015
+ this._recentlyPulledCids.clear();
1001
1016
  // Clear the in-memory link and runtime state.
1002
1017
  this._activeLinks.clear();
1003
1018
  this._linkRuntimes.clear();
@@ -1012,12 +1027,13 @@ export class SyncEngineLevel {
1012
1027
  */
1013
1028
  openLivePullSubscription(target) {
1014
1029
  return __awaiter(this, void 0, void 0, function* () {
1015
- var _a, _b;
1030
+ var _a;
1016
1031
  const { did, delegateDid, dwnUrl, protocol } = target;
1017
- // Resolve the cursor from the link's pull checkpoint (preferred) or legacy storage.
1018
- const cursorKey = this.buildCursorKey(did, dwnUrl, protocol);
1032
+ // Resolve the cursor from the link's durable pull checkpoint.
1033
+ // Legacy syncCursors migration happens at link load time in startLiveSync().
1034
+ const cursorKey = target.linkKey;
1019
1035
  const link = this._activeLinks.get(cursorKey);
1020
- let cursor = (_a = link === null || link === void 0 ? void 0 : link.pull.contiguousAppliedToken) !== null && _a !== void 0 ? _a : yield this.getCursor(cursorKey);
1036
+ let cursor = link === null || link === void 0 ? void 0 : link.pull.contiguousAppliedToken;
1021
1037
  // Guard against corrupted tokens with empty fields — these would fail
1022
1038
  // MessagesSubscribe JSON schema validation (minLength: 1). Discard and
1023
1039
  // start from the beginning rather than crash the subscription.
@@ -1037,7 +1053,7 @@ export class SyncEngineLevel {
1037
1053
  // MessagesFilter.protocolPathPrefix is a single string. Multiple prefixes
1038
1054
  // would need multiple filters (OR semantics) — for now we use the first one.
1039
1055
  const protocolPathPrefix = (link === null || link === void 0 ? void 0 : link.scope.kind) === 'protocol'
1040
- ? (_b = link.scope.protocolPathPrefixes) === null || _b === void 0 ? void 0 : _b[0]
1056
+ ? (_a = link.scope.protocolPathPrefixes) === null || _a === void 0 ? void 0 : _a[0]
1041
1057
  : undefined;
1042
1058
  const filters = protocol
1043
1059
  ? [Object.assign({ protocol }, (protocolPathPrefix ? { protocolPathPrefix } : {}))]
@@ -1057,11 +1073,15 @@ export class SyncEngineLevel {
1057
1073
  });
1058
1074
  permissionGrantId = grant.grant.id;
1059
1075
  }
1076
+ const handlerGeneration = this._engineGeneration;
1060
1077
  // Define the subscription handler that processes incoming events.
1061
1078
  // NOTE: The WebSocket client fires handlers without awaiting (fire-and-forget),
1062
1079
  // so multiple handlers can be in-flight concurrently. The ordinal tracker
1063
1080
  // ensures the checkpoint advances only when all earlier deliveries are committed.
1064
1081
  const subscriptionHandler = (subMessage) => __awaiter(this, void 0, void 0, function* () {
1082
+ if (this._engineGeneration !== handlerGeneration) {
1083
+ return;
1084
+ }
1065
1085
  if (subMessage.type === 'eose') {
1066
1086
  // End-of-stored-events — catch-up complete.
1067
1087
  if (link) {
@@ -1083,9 +1103,6 @@ export class SyncEngineLevel {
1083
1103
  this.drainCommittedPull(cursorKey);
1084
1104
  yield this.ledger.saveLink(link);
1085
1105
  }
1086
- else {
1087
- yield this.setCursor(cursorKey, subMessage.cursor);
1088
- }
1089
1106
  // Transport is reachable — set connectivity to online.
1090
1107
  if (link) {
1091
1108
  const prevEoseConnectivity = link.connectivity;
@@ -1093,6 +1110,10 @@ export class SyncEngineLevel {
1093
1110
  if (prevEoseConnectivity !== 'online') {
1094
1111
  this.emitEvent({ type: 'link:connectivity-change', tenantDid: did, remoteEndpoint: dwnUrl, protocol, from: prevEoseConnectivity, to: 'online' });
1095
1112
  }
1113
+ // If the link was marked dirty, schedule reconciliation now that it's healthy.
1114
+ if (link.needsReconcile) {
1115
+ this.scheduleReconcile(cursorKey, 500);
1116
+ }
1096
1117
  }
1097
1118
  else {
1098
1119
  this._connectivityState = 'online';
@@ -1222,10 +1243,6 @@ export class SyncEngineLevel {
1222
1243
  yield this.transitionToRepairing(cursorKey, link);
1223
1244
  }
1224
1245
  }
1225
- else if (!link) {
1226
- // Legacy path: no link available, use simple cursor persistence.
1227
- yield this.setCursor(cursorKey, subMessage.cursor);
1228
- }
1229
1246
  }
1230
1247
  catch (error) {
1231
1248
  console.error(`SyncEngineLevel: Error processing live-pull event for ${did}`, error);
@@ -1296,6 +1313,7 @@ export class SyncEngineLevel {
1296
1313
  throw new Error(`SyncEngineLevel: MessagesSubscribe failed for ${did} -> ${dwnUrl}: ${reply.status.code} ${reply.status.detail}`);
1297
1314
  }
1298
1315
  this._liveSubscriptions.push({
1316
+ linkKey: cursorKey,
1299
1317
  did,
1300
1318
  dwnUrl,
1301
1319
  delegateDid,
@@ -1303,7 +1321,7 @@ export class SyncEngineLevel {
1303
1321
  close: () => __awaiter(this, void 0, void 0, function* () { yield reply.subscription.close(); }),
1304
1322
  });
1305
1323
  // Set per-link connectivity to online after successful subscription setup.
1306
- const pullLink = this._activeLinks.get(this.buildCursorKey(did, dwnUrl, protocol));
1324
+ const pullLink = this._activeLinks.get(cursorKey);
1307
1325
  if (pullLink) {
1308
1326
  const prevPullConnectivity = pullLink.connectivity;
1309
1327
  pullLink.connectivity = 'online';
@@ -1322,19 +1340,8 @@ export class SyncEngineLevel {
1322
1340
  */
1323
1341
  openLocalPushSubscription(target) {
1324
1342
  return __awaiter(this, void 0, void 0, function* () {
1343
+ var _a;
1325
1344
  const { did, delegateDid, dwnUrl, protocol } = target;
1326
- // Guard against corrupted push cursors — same validation as the pull side.
1327
- let pushCursor = target.pushCursor;
1328
- if (pushCursor && (!pushCursor.streamId || !pushCursor.messageCid || !pushCursor.epoch || !pushCursor.position)) {
1329
- console.warn(`SyncEngineLevel: Discarding stored push cursor with empty field(s) for ${did} -> ${dwnUrl}`);
1330
- pushCursor = undefined;
1331
- const cursorKey = this.buildCursorKey(did, dwnUrl, protocol);
1332
- const link = this._activeLinks.get(cursorKey);
1333
- if (link) {
1334
- ReplicationLedger.resetCheckpoint(link.push);
1335
- yield this.ledger.saveLink(link);
1336
- }
1337
- }
1338
1345
  // Build filters scoped to the protocol (if any).
1339
1346
  const filters = protocol ? [{ protocol }] : [];
1340
1347
  // Look up permission grant for local subscription.
@@ -1349,35 +1356,24 @@ export class SyncEngineLevel {
1349
1356
  });
1350
1357
  permissionGrantId = grant.grant.id;
1351
1358
  }
1359
+ const handlerGeneration = this._engineGeneration;
1352
1360
  // Subscribe to the local DWN's EventLog.
1353
1361
  const subscriptionHandler = (subMessage) => __awaiter(this, void 0, void 0, function* () {
1362
+ if (this._engineGeneration !== handlerGeneration) {
1363
+ return;
1364
+ }
1354
1365
  if (subMessage.type !== 'event') {
1355
1366
  return;
1356
1367
  }
1357
- // Subset scope filtering for push: only push events that match the
1358
- // link's scope prefixes. Events outside the scope are not our responsibility.
1359
- // Skipped events MUST advance the push checkpoint to prevent infinite
1360
- // replay after repair/reconnect (same reason as the pull side).
1361
- const pushLink = this._activeLinks.get(this.buildCursorKey(did, dwnUrl, protocol));
1368
+ // Subset scope filtering: only push events that match the link's
1369
+ // scope prefixes. Events outside the scope are not our responsibility.
1370
+ const pushLinkKey = target.linkKey;
1371
+ const pushLink = this._activeLinks.get(pushLinkKey);
1362
1372
  if (pushLink && !isEventInScope(subMessage.event.message, pushLink.scope)) {
1363
- // Guard: only mutate durable state when the link is live/initializing.
1364
- // During repair/degraded_poll, orchestration owns checkpoint progression.
1365
- if (pushLink.status !== 'live' && pushLink.status !== 'initializing') {
1366
- return;
1367
- }
1368
- // Validate token domain before committing — a stream/epoch mismatch
1369
- // on the local EventLog should trigger repair, not silently overwrite.
1370
- if (!ReplicationLedger.validateTokenDomain(pushLink.push, subMessage.cursor)) {
1371
- yield this.transitionToRepairing(this.buildCursorKey(did, dwnUrl, protocol), pushLink);
1372
- return;
1373
- }
1374
- ReplicationLedger.setReceivedToken(pushLink.push, subMessage.cursor);
1375
- ReplicationLedger.commitContiguousToken(pushLink.push, subMessage.cursor);
1376
- yield this.ledger.saveLink(pushLink);
1377
1373
  return;
1378
1374
  }
1379
1375
  // Accumulate the message CID for a debounced push.
1380
- const targetKey = this.buildCursorKey(did, dwnUrl, protocol);
1376
+ const targetKey = pushLinkKey;
1381
1377
  const cid = yield Message.getCid(subMessage.event.message);
1382
1378
  if (cid === undefined) {
1383
1379
  return;
@@ -1388,30 +1384,26 @@ export class SyncEngineLevel {
1388
1384
  if (this.isRecentlyPulled(cid, dwnUrl)) {
1389
1385
  return;
1390
1386
  }
1391
- let pending = this._pendingPushCids.get(targetKey);
1392
- if (!pending) {
1393
- pending = { did, dwnUrl, delegateDid, protocol, entries: [] };
1394
- this._pendingPushCids.set(targetKey, pending);
1395
- }
1396
- pending.entries.push({ cid, localToken: subMessage.cursor });
1397
- // Debounce the push.
1398
- if (this._pushDebounceTimer) {
1399
- clearTimeout(this._pushDebounceTimer);
1387
+ const pushRuntime = this.getOrCreatePushRuntime(targetKey, {
1388
+ did, dwnUrl, delegateDid, protocol,
1389
+ });
1390
+ pushRuntime.entries.push({ cid });
1391
+ // Immediate-first: if no push is in flight and no batch timer is
1392
+ // pending, push immediately. Otherwise, the pending batch timer
1393
+ // or the post-flush drain will pick up the new entry.
1394
+ if (!pushRuntime.flushing && !pushRuntime.timer) {
1395
+ void this.flushPendingPushesForLink(targetKey);
1400
1396
  }
1401
- this._pushDebounceTimer = setTimeout(() => {
1402
- void this.flushPendingPushes();
1403
- }, PUSH_DEBOUNCE_MS);
1404
1397
  });
1405
- // Process the local subscription request.
1406
- // When a push cursor is provided (e.g., after repair), the local subscription
1407
- // replays events from that position, closing the race window where local
1408
- // writes during repair would otherwise be missed by push-on-write.
1398
+ // Subscribe to the local DWN EventLog from "now" — opportunistic push
1399
+ // does not replay from a stored cursor. Any writes missed during outages
1400
+ // are recovered by the post-repair reconciliation path.
1409
1401
  const response = yield this.agent.dwn.processRequest({
1410
1402
  author: did,
1411
1403
  target: did,
1412
1404
  messageType: DwnInterface.MessagesSubscribe,
1413
1405
  granteeDid: delegateDid,
1414
- messageParams: { filters, permissionGrantId, cursor: pushCursor },
1406
+ messageParams: { filters, permissionGrantId },
1415
1407
  subscriptionHandler: subscriptionHandler,
1416
1408
  });
1417
1409
  const reply = response.reply;
@@ -1419,6 +1411,7 @@ export class SyncEngineLevel {
1419
1411
  throw new Error(`SyncEngineLevel: Local MessagesSubscribe failed for ${did}: ${reply.status.code} ${reply.status.detail}`);
1420
1412
  }
1421
1413
  this._localSubscriptions.push({
1414
+ linkKey: (_a = target.linkKey) !== null && _a !== void 0 ? _a : buildLegacyCursorKey(did, dwnUrl, protocol),
1422
1415
  did,
1423
1416
  dwnUrl,
1424
1417
  delegateDid,
@@ -1432,104 +1425,234 @@ export class SyncEngineLevel {
1432
1425
  */
1433
1426
  flushPendingPushes() {
1434
1427
  return __awaiter(this, void 0, void 0, function* () {
1435
- this._pushDebounceTimer = undefined;
1436
- const batches = [...this._pendingPushCids.entries()];
1437
- this._pendingPushCids.clear();
1438
- // Push to all endpoints in parallel — each target is independent.
1439
- yield Promise.all(batches.map((_a) => __awaiter(this, [_a], void 0, function* ([targetKey, pending]) {
1440
- const { did, dwnUrl, delegateDid, protocol, entries: pushEntries } = pending;
1441
- if (pushEntries.length === 0) {
1442
- return;
1428
+ yield Promise.all([...this._pushRuntimes.keys()].map((linkKey) => __awaiter(this, void 0, void 0, function* () {
1429
+ yield this.flushPendingPushesForLink(linkKey);
1430
+ })));
1431
+ });
1432
+ }
1433
+ flushPendingPushesForLink(linkKey) {
1434
+ return __awaiter(this, void 0, void 0, function* () {
1435
+ const pushRuntime = this._pushRuntimes.get(linkKey);
1436
+ if (!pushRuntime) {
1437
+ return;
1438
+ }
1439
+ const { did, dwnUrl, delegateDid, protocol, entries: pushEntries, retryCount } = pushRuntime;
1440
+ pushRuntime.entries = [];
1441
+ if (pushEntries.length === 0) {
1442
+ if (!pushRuntime.timer && !pushRuntime.flushing && retryCount === 0) {
1443
+ this._pushRuntimes.delete(linkKey);
1443
1444
  }
1444
- const cids = pushEntries.map(e => e.cid);
1445
- try {
1446
- const result = yield pushMessages({
1445
+ return;
1446
+ }
1447
+ const cids = pushEntries.map((entry) => entry.cid);
1448
+ pushRuntime.flushing = true;
1449
+ try {
1450
+ const result = yield pushMessages({
1451
+ did, dwnUrl, delegateDid, protocol,
1452
+ messageCids: cids,
1453
+ agent: this.agent,
1454
+ permissionsApi: this._permissionsApi,
1455
+ });
1456
+ if (result.failed.length > 0) {
1457
+ const failedSet = new Set(result.failed);
1458
+ const failedEntries = pushEntries.filter((entry) => failedSet.has(entry.cid));
1459
+ this.requeueOrReconcile(linkKey, {
1447
1460
  did, dwnUrl, delegateDid, protocol,
1448
- messageCids: cids,
1449
- agent: this.agent,
1450
- permissionsApi: this._permissionsApi,
1461
+ entries: failedEntries,
1462
+ retryCount: retryCount + 1,
1451
1463
  });
1452
- // Advance the push checkpoint for successfully pushed entries.
1453
- // Push is sequential (single batch, in-order processing) so we can
1454
- // commit directly without ordinal tracking — there's no concurrent
1455
- // completion to reorder.
1456
- const link = this._activeLinks.get(targetKey);
1457
- if (link) {
1458
- const succeededSet = new Set(result.succeeded);
1459
- // Track highest contiguous success: if a CID fails, we stop advancing.
1460
- let hitFailure = false;
1461
- for (const entry of pushEntries) {
1462
- if (hitFailure) {
1463
- break;
1464
- }
1465
- if (succeededSet.has(entry.cid) && entry.localToken) {
1466
- if (!ReplicationLedger.validateTokenDomain(link.push, entry.localToken)) {
1467
- console.warn(`SyncEngineLevel: Push checkpoint domain mismatch for ${did} -> ${dwnUrl}, transitioning to repairing`);
1468
- yield this.transitionToRepairing(targetKey, link);
1469
- break;
1470
- }
1471
- ReplicationLedger.setReceivedToken(link.push, entry.localToken);
1472
- ReplicationLedger.commitContiguousToken(link.push, entry.localToken);
1473
- }
1474
- else {
1475
- // This CID failed or had no token — stop advancing.
1476
- hitFailure = true;
1477
- }
1478
- }
1479
- yield this.ledger.saveLink(link);
1480
- }
1481
- // Re-queue only TRANSIENT failures for retry. Permanent failures (400/401/403)
1482
- // are dropped — they will never succeed regardless of retry.
1483
- if (result.failed.length > 0) {
1484
- console.error(`SyncEngineLevel: Push-on-write failed for ${did} -> ${dwnUrl}: ` +
1485
- `${result.failed.length} transient failures of ${cids.length} messages`);
1486
- const failedSet = new Set(result.failed);
1487
- const failedEntries = pushEntries.filter(e => failedSet.has(e.cid));
1488
- let requeued = this._pendingPushCids.get(targetKey);
1489
- if (!requeued) {
1490
- requeued = { did, dwnUrl, delegateDid, protocol, entries: [] };
1491
- this._pendingPushCids.set(targetKey, requeued);
1492
- }
1493
- requeued.entries.push(...failedEntries);
1494
- // Schedule a retry after a short delay.
1495
- if (!this._pushDebounceTimer) {
1496
- this._pushDebounceTimer = setTimeout(() => {
1497
- void this.flushPendingPushes();
1498
- }, PUSH_DEBOUNCE_MS * 4);
1499
- }
1500
- }
1501
- // Permanent failures are logged by pushMessages but NOT re-queued.
1502
- // They will be rediscovered by the next SMT integrity check if the
1503
- // local/remote state has changed, but won't spin in a retry loop.
1504
1464
  }
1505
- catch (error) {
1506
- // Truly unexpected error (not per-message failure). Re-queue entire
1507
- // batch so entries aren't silently dropped from the debounce queue.
1508
- console.error(`SyncEngineLevel: Push-on-write failed for ${did} -> ${dwnUrl}`, error);
1509
- let requeued = this._pendingPushCids.get(targetKey);
1510
- if (!requeued) {
1511
- requeued = { did, dwnUrl, delegateDid, protocol, entries: [] };
1512
- this._pendingPushCids.set(targetKey, requeued);
1513
- }
1514
- requeued.entries.push(...pushEntries);
1515
- if (!this._pushDebounceTimer) {
1516
- this._pushDebounceTimer = setTimeout(() => {
1517
- void this.flushPendingPushes();
1518
- }, PUSH_DEBOUNCE_MS * 4);
1465
+ else {
1466
+ // Successful push reset retry count so subsequent unrelated
1467
+ // batches on this link start with a fresh budget.
1468
+ pushRuntime.retryCount = 0;
1469
+ if (!pushRuntime.timer && pushRuntime.entries.length === 0) {
1470
+ this._pushRuntimes.delete(linkKey);
1519
1471
  }
1520
1472
  }
1521
- })));
1473
+ }
1474
+ catch (error) {
1475
+ console.error(`SyncEngineLevel: Push batch failed for ${did} -> ${dwnUrl}`, error);
1476
+ this.requeueOrReconcile(linkKey, {
1477
+ did, dwnUrl, delegateDid, protocol,
1478
+ entries: pushEntries,
1479
+ retryCount: retryCount + 1,
1480
+ });
1481
+ }
1482
+ finally {
1483
+ pushRuntime.flushing = false;
1484
+ // If new entries accumulated while this push was in flight, schedule
1485
+ // a short drain to flush them. This gives a brief batching window
1486
+ // for burst writes while keeping single-write latency low.
1487
+ const rt = this._pushRuntimes.get(linkKey);
1488
+ if (rt && rt.entries.length > 0 && !rt.timer) {
1489
+ rt.timer = setTimeout(() => {
1490
+ rt.timer = undefined;
1491
+ void this.flushPendingPushesForLink(linkKey);
1492
+ }, PUSH_DEBOUNCE_MS);
1493
+ }
1494
+ }
1495
+ });
1496
+ }
1497
+ /**
1498
+ * Re-queues a failed push batch for retry, or marks the link
1499
+ * `needsReconcile` if retries are exhausted. Bounded to prevent
1500
+ * infinite retry loops.
1501
+ */
1502
+ requeueOrReconcile(targetKey, pending) {
1503
+ var _a;
1504
+ const maxRetries = SyncEngineLevel.PUSH_RETRY_BACKOFF_MS.length;
1505
+ const pushRuntime = this.getOrCreatePushRuntime(targetKey, pending);
1506
+ if (pending.retryCount >= maxRetries) {
1507
+ // Retry budget exhausted — mark link dirty for reconciliation.
1508
+ if (pushRuntime.timer) {
1509
+ clearTimeout(pushRuntime.timer);
1510
+ }
1511
+ this._pushRuntimes.delete(targetKey);
1512
+ const link = this._activeLinks.get(targetKey);
1513
+ if (link && !link.needsReconcile) {
1514
+ link.needsReconcile = true;
1515
+ void this.ledger.saveLink(link).then(() => {
1516
+ this.emitEvent({ type: 'reconcile:needed', tenantDid: pending.did, remoteEndpoint: pending.dwnUrl, protocol: pending.protocol, reason: 'push-retry-exhausted' });
1517
+ this.scheduleReconcile(targetKey);
1518
+ });
1519
+ }
1520
+ return;
1521
+ }
1522
+ pushRuntime.entries.push(...pending.entries);
1523
+ pushRuntime.retryCount = pending.retryCount;
1524
+ const delayMs = (_a = SyncEngineLevel.PUSH_RETRY_BACKOFF_MS[pending.retryCount]) !== null && _a !== void 0 ? _a : 2000;
1525
+ if (pushRuntime.timer) {
1526
+ clearTimeout(pushRuntime.timer);
1527
+ }
1528
+ pushRuntime.timer = setTimeout(() => {
1529
+ pushRuntime.timer = undefined;
1530
+ void this.flushPendingPushesForLink(targetKey);
1531
+ }, delayMs);
1532
+ }
1533
+ createLinkReconciler(shouldContinue) {
1534
+ return new SyncLinkReconciler({
1535
+ getLocalRoot: (did, delegateDid, protocol) => __awaiter(this, void 0, void 0, function* () { return this.getLocalRoot(did, delegateDid, protocol); }),
1536
+ getRemoteRoot: (did, dwnUrl, delegateDid, protocol) => __awaiter(this, void 0, void 0, function* () { return this.getRemoteRoot(did, dwnUrl, delegateDid, protocol); }),
1537
+ diffWithRemote: (target) => __awaiter(this, void 0, void 0, function* () { return this.diffWithRemote(target); }),
1538
+ pullMessages: (params) => __awaiter(this, void 0, void 0, function* () { return this.pullMessages(params); }),
1539
+ pushMessages: (params) => __awaiter(this, void 0, void 0, function* () { return this.pushMessages(params); }),
1540
+ shouldContinue,
1541
+ });
1542
+ }
1543
+ /**
1544
+ * Schedule a per-link reconciliation after a short debounce. Coalesces
1545
+ * repeated requests for the same link.
1546
+ */
1547
+ scheduleReconcile(linkKey, delayMs = 1500) {
1548
+ if (this._reconcileTimers.has(linkKey)) {
1549
+ return;
1550
+ }
1551
+ if (this._reconcileInFlight.has(linkKey)) {
1552
+ return;
1553
+ }
1554
+ if (this._activeRepairs.has(linkKey)) {
1555
+ return;
1556
+ }
1557
+ const generation = this._engineGeneration;
1558
+ const timer = setTimeout(() => {
1559
+ this._reconcileTimers.delete(linkKey);
1560
+ if (this._engineGeneration !== generation) {
1561
+ return;
1562
+ }
1563
+ void this.reconcileLink(linkKey);
1564
+ }, delayMs);
1565
+ this._reconcileTimers.set(linkKey, timer);
1566
+ }
1567
+ /**
1568
+ * Run SMT reconciliation for a single link. Deduplicates concurrent calls.
1569
+ * On success, clears `needsReconcile`. On failure, schedules retry.
1570
+ */
1571
+ reconcileLink(linkKey) {
1572
+ return __awaiter(this, void 0, void 0, function* () {
1573
+ const existing = this._reconcileInFlight.get(linkKey);
1574
+ if (existing) {
1575
+ return existing;
1576
+ }
1577
+ const promise = this.doReconcileLink(linkKey).finally(() => {
1578
+ this._reconcileInFlight.delete(linkKey);
1579
+ });
1580
+ this._reconcileInFlight.set(linkKey, promise);
1581
+ return promise;
1522
1582
  });
1523
1583
  }
1584
+ /**
1585
+ * Internal reconciliation implementation for a single link. Runs the
1586
+ * same SMT diff + pull/push that `sync()` does, but scoped to one link.
1587
+ */
1588
+ doReconcileLink(linkKey) {
1589
+ return __awaiter(this, void 0, void 0, function* () {
1590
+ const link = this._activeLinks.get(linkKey);
1591
+ if (!link) {
1592
+ return;
1593
+ }
1594
+ // Only reconcile live links — repairing/degraded links have their own
1595
+ // recovery path. Reconciling during repair would race with SMT diff.
1596
+ if (link.status !== 'live') {
1597
+ return;
1598
+ }
1599
+ // Skip if a repair is in progress for this link.
1600
+ if (this._activeRepairs.has(linkKey)) {
1601
+ return;
1602
+ }
1603
+ const generation = this._engineGeneration;
1604
+ const { tenantDid: did, remoteEndpoint: dwnUrl, delegateDid, protocol } = link;
1605
+ try {
1606
+ const reconcileOutcome = yield this.createLinkReconciler(() => this._engineGeneration === generation).reconcile({ did, dwnUrl, delegateDid, protocol }, { verifyConvergence: true });
1607
+ if (reconcileOutcome.aborted) {
1608
+ return;
1609
+ }
1610
+ if (reconcileOutcome.converged) {
1611
+ yield this.ledger.clearNeedsReconcile(link);
1612
+ this.emitEvent({ type: 'reconcile:completed', tenantDid: did, remoteEndpoint: dwnUrl, protocol });
1613
+ }
1614
+ else {
1615
+ // Roots still differ — retry after a delay. This can happen when
1616
+ // pushMessages() had permanent failures, pullMessages() partially
1617
+ // failed, or new writes arrived during reconciliation.
1618
+ this.scheduleReconcile(linkKey, 5000);
1619
+ }
1620
+ }
1621
+ catch (error) {
1622
+ console.error(`SyncEngineLevel: Reconciliation failed for ${did} -> ${dwnUrl}`, error);
1623
+ // Schedule retry with longer delay.
1624
+ this.scheduleReconcile(linkKey, 5000);
1625
+ }
1626
+ });
1627
+ }
1628
+ getOrCreatePushRuntime(linkKey, params) {
1629
+ let pushRuntime = this._pushRuntimes.get(linkKey);
1630
+ if (!pushRuntime) {
1631
+ pushRuntime = Object.assign(Object.assign({}, params), { entries: [], retryCount: 0 });
1632
+ this._pushRuntimes.set(linkKey, pushRuntime);
1633
+ }
1634
+ return pushRuntime;
1635
+ }
1524
1636
  // ---------------------------------------------------------------------------
1525
1637
  // Cursor persistence
1526
1638
  // ---------------------------------------------------------------------------
1527
- buildCursorKey(did, dwnUrl, protocol) {
1528
- const base = `${did}${CURSOR_SEPARATOR}${dwnUrl}`;
1529
- return protocol ? `${base}${CURSOR_SEPARATOR}${protocol}` : base;
1639
+ /**
1640
+ * Build the runtime key for a replication link.
1641
+ *
1642
+ * Live-mode subscription methods (`openLivePullSubscription`,
1643
+ * `openLocalPushSubscription`) receive `linkKey` directly and never
1644
+ * call this. The remaining callers are poll-mode `sync()` and the
1645
+ * live-mode startup/error paths that already have `link.scopeId`.
1646
+ *
1647
+ * The `undefined` fallback (which produces a legacy cursor key) exists
1648
+ * only for the no-protocol full-tenant targets in poll mode.
1649
+ */
1650
+ buildLinkKey(did, dwnUrl, scopeIdOrProtocol) {
1651
+ return scopeIdOrProtocol ? buildLinkId(did, dwnUrl, scopeIdOrProtocol) : buildLegacyCursorKey(did, dwnUrl);
1530
1652
  }
1531
1653
  /**
1532
- * Retrieves a stored progress token. Handles migration from old string cursors:
1654
+ * @deprecated Used by poll-mode sync and one-time migration only. Live mode
1655
+ * uses ReplicationLedger checkpoints. Handles migration from old string cursors:
1533
1656
  * if the stored value is a bare string (pre-ProgressToken format), it is treated
1534
1657
  * as absent — the sync engine will do a full SMT reconciliation on first startup
1535
1658
  * after upgrade, which is correct and safe.
@@ -1550,8 +1673,11 @@ export class SyncEngineLevel {
1550
1673
  }
1551
1674
  }
1552
1675
  catch (_a) {
1553
- // Not valid JSON (old string cursor) — treat as absent.
1676
+ // Not valid JSON (old string cursor) — fall through to delete.
1554
1677
  }
1678
+ // Entry exists but is unparseable or has invalid/empty fields. Delete it
1679
+ // so subsequent startups don't re-check it on every launch.
1680
+ yield this.deleteLegacyCursor(key);
1555
1681
  return undefined;
1556
1682
  }
1557
1683
  catch (error) {
@@ -1563,10 +1689,21 @@ export class SyncEngineLevel {
1563
1689
  }
1564
1690
  });
1565
1691
  }
1566
- setCursor(key, cursor) {
1692
+ /**
1693
+ * Delete a legacy cursor from the old syncCursors sublevel.
1694
+ * Called as part of one-time migration to ReplicationLedger.
1695
+ */
1696
+ deleteLegacyCursor(key) {
1567
1697
  return __awaiter(this, void 0, void 0, function* () {
1568
1698
  const cursors = this._db.sublevel('syncCursors');
1569
- yield cursors.put(key, JSON.stringify(cursor));
1699
+ try {
1700
+ yield cursors.del(key);
1701
+ }
1702
+ catch (_a) {
1703
+ // Best-effort — ignore LEVEL_NOT_FOUND and transient I/O errors alike.
1704
+ // A failed delete leaves the bad entry for one more re-check on the
1705
+ // next startup, which is harmless.
1706
+ }
1570
1707
  });
1571
1708
  }
1572
1709
  // ---------------------------------------------------------------------------
@@ -1582,8 +1719,11 @@ export class SyncEngineLevel {
1582
1719
  return undefined;
1583
1720
  }
1584
1721
  // Check for inline base64url-encoded data (small records from EventLog).
1722
+ // Delete the transport-level field so the DWN schema validator does not
1723
+ // reject the message for having unevaluated properties.
1585
1724
  const encodedData = event.message.encodedData;
1586
1725
  if (encodedData) {
1726
+ delete event.message.encodedData;
1587
1727
  const bytes = Encoder.base64UrlToBytes(encodedData);
1588
1728
  return new ReadableStream({
1589
1729
  start(controller) {
@@ -2053,4 +2193,6 @@ SyncEngineLevel.MAX_BACKOFF_MULTIPLIER = 4;
2053
2193
  SyncEngineLevel.MAX_REPAIR_ATTEMPTS = 3;
2054
2194
  /** Backoff schedule for repair retries (milliseconds). */
2055
2195
  SyncEngineLevel.REPAIR_BACKOFF_MS = [1000, 3000, 10000];
2196
+ /** Push retry backoff schedule: immediate, 250ms, 1s, 2s, then give up. */
2197
+ SyncEngineLevel.PUSH_RETRY_BACKOFF_MS = [0, 250, 1000, 2000];
2056
2198
  //# sourceMappingURL=sync-engine-level.js.map