@enbox/agent 0.5.12 → 0.5.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,9 @@ import { createClosureContext, invalidateClosureCache } from './sync-closure-typ
24
24
  import { AgentPermissionsApi } from './permissions-api.js';
25
25
  import { DwnInterface } from './types/dwn.js';
26
26
  import { isRecordsWrite } from './utils.js';
27
+ import { SyncLinkReconciler } from './sync-link-reconciler.js';
27
28
  import { topologicalSort } from './sync-topological-sort.js';
29
+ import { buildLegacyCursorKey, buildLinkId } from './sync-link-id.js';
28
30
  import { fetchRemoteMessages, pullMessages, pushMessages } from './sync-messages.js';
29
31
  /**
30
32
  * Maximum bit prefix depth for the per-node tree walk (legacy fallback).
@@ -40,16 +42,12 @@ const MAX_DIFF_DEPTH = 16;
40
42
  */
41
43
  const BATCHED_DIFF_DEPTH = 8;
42
44
  /**
43
- /**
44
- * Key for the subscription cursor sublevel. Cursors are keyed by
45
- * `{did}^{dwnUrl}[^{protocol}]` and store an opaque EventLog cursor string.
46
- */
47
- const CURSOR_SEPARATOR = '^';
48
- /**
49
- * Debounce window for push-on-write. When the local EventLog emits events,
50
- * we batch them and push after this delay to avoid a push per individual write.
45
+ * Debounce window for batching writes that arrive while a push is in flight.
46
+ * The first write in a quiet window triggers an immediate push; subsequent
47
+ * writes arriving during the push are batched and flushed after this delay
48
+ * once the in-flight push completes.
51
49
  */
52
- const PUSH_DEBOUNCE_MS = 250;
50
+ const PUSH_DEBOUNCE_MS = 100;
53
51
  /**
54
52
  * Checks whether a message's protocolPath and contextId match the link's
55
53
  * subset scope prefixes. Returns true if the message is in scope.
@@ -122,7 +120,7 @@ export class SyncEngineLevel {
122
120
  * and bail if it has changed — this prevents stale work from mutating
123
121
  * state after teardown or mode switch.
124
122
  */
125
- this._syncGeneration = 0;
123
+ this._engineGeneration = 0;
126
124
  /** Active live pull subscriptions (remote -> local via MessagesSubscribe). */
127
125
  this._liveSubscriptions = [];
128
126
  /** Active local EventLog subscriptions for push-on-write (local -> remote). */
@@ -131,8 +129,8 @@ export class SyncEngineLevel {
131
129
  this._connectivityState = 'unknown';
132
130
  /** Registered event listeners for observability. */
133
131
  this._eventListeners = new Set();
134
- /** Entry in the pending push queue a message CID with its local EventLog token. */
135
- this._pendingPushCids = new Map();
132
+ /** Per-link push runtime: queue, debounce timer, retry state. */
133
+ this._pushRuntimes = new Map();
136
134
  /**
137
135
  * CIDs recently received via pull subscription, keyed by `cid|dwnUrl` to
138
136
  * scope suppression per remote endpoint. A message pulled from Provider A
@@ -163,6 +161,13 @@ export class SyncEngineLevel {
163
161
  * a valid boundary instead of starting live-only.
164
162
  */
165
163
  this._repairContext = new Map();
164
+ // ---------------------------------------------------------------------------
165
+ // Per-link reconciliation
166
+ // ---------------------------------------------------------------------------
167
+ /** Active reconcile timers, keyed by link key. */
168
+ this._reconcileTimers = new Map();
169
+ /** Active reconcile operations, keyed by link key (dedup). */
170
+ this._reconcileInFlight = new Map();
166
171
  this._agent = agent;
167
172
  this._permissionsApi = new AgentPermissionsApi({ agent: agent });
168
173
  this._db = (db) ? db : new Level(dataPath !== null && dataPath !== void 0 ? dataPath : 'DATA/AGENT/SYNC_STORE');
@@ -232,12 +237,14 @@ export class SyncEngineLevel {
232
237
  }
233
238
  clear() {
234
239
  return __awaiter(this, void 0, void 0, function* () {
240
+ yield this.teardownLiveSync();
235
241
  yield this._permissionsApi.clear();
236
242
  yield this._db.clear();
237
243
  });
238
244
  }
239
245
  close() {
240
246
  return __awaiter(this, void 0, void 0, function* () {
247
+ yield this.teardownLiveSync();
241
248
  yield this._db.close();
242
249
  });
243
250
  }
@@ -304,91 +311,60 @@ export class SyncEngineLevel {
304
311
  }
305
312
  this._syncLock = true;
306
313
  try {
307
- // Iterate over all registered identities and their DWN endpoints.
314
+ // Group targets by remote endpoint so each URL group can be reconciled
315
+ // concurrently. Within a group, targets are processed sequentially so
316
+ // that a single network failure skips the rest of that group.
308
317
  const syncTargets = yield this.getSyncTargets();
309
- const errored = new Set();
310
- let hadFailure = false;
318
+ const byUrl = new Map();
311
319
  for (const target of syncTargets) {
312
- const { did, delegateDid, dwnUrl, protocol } = target;
313
- if (errored.has(dwnUrl)) {
314
- continue;
320
+ let group = byUrl.get(target.dwnUrl);
321
+ if (!group) {
322
+ group = [];
323
+ byUrl.set(target.dwnUrl, group);
315
324
  }
316
- try {
317
- // Phase 1: Compare SMT roots between local and remote.
318
- const localRoot = yield this.getLocalRoot(did, delegateDid, protocol);
319
- const remoteRoot = yield this.getRemoteRoot(did, dwnUrl, delegateDid, protocol);
320
- if (localRoot === remoteRoot) {
321
- // Trees are identical nothing to sync for this target.
322
- continue;
323
- }
324
- // Phase 2: Compute the diff in a single round-trip using the
325
- // batched 'diff' action. This replaces the per-node tree walk
326
- // that previously required dozens of HTTP requests.
327
- const diff = yield this.diffWithRemote({
328
- did, dwnUrl, delegateDid, protocol,
329
- });
330
- // Phase 3: Pull missing messages (remote has, local doesn't).
331
- // The diff response may include inline message data — use it
332
- // directly instead of re-fetching via individual MessagesRead calls.
333
- if (!direction || direction === 'pull') {
334
- if (diff.onlyRemote.length > 0) {
335
- // Separate entries into three categories:
336
- // 1. Fully prefetched: have message + inline data (or no data needed)
337
- // 2. Need data fetch: have message but missing data for RecordsWrite
338
- // 3. Need full fetch: no message at all
339
- const prefetched = [];
340
- const needsFetchCids = [];
341
- for (const entry of diff.onlyRemote) {
342
- if (!entry.message) {
343
- // No message at all — need full fetch.
344
- needsFetchCids.push(entry.messageCid);
345
- }
346
- else if (entry.message.descriptor.interface === 'Records' &&
347
- entry.message.descriptor.method === 'Write' &&
348
- entry.message.descriptor.dataCid &&
349
- !entry.encodedData) {
350
- // RecordsWrite with data but data wasn't inlined (too large).
351
- // Need to fetch individually to get the data stream.
352
- needsFetchCids.push(entry.messageCid);
353
- }
354
- else {
355
- // Fully prefetched (message + data or no data needed).
356
- prefetched.push(entry);
357
- }
358
- }
359
- yield this.pullMessages({
360
- did, dwnUrl, delegateDid, protocol,
361
- messageCids: needsFetchCids,
362
- prefetched,
363
- });
364
- }
325
+ group.push(target);
326
+ }
327
+ let groupsSucceeded = 0;
328
+ let groupsFailed = 0;
329
+ const results = yield Promise.allSettled([...byUrl.entries()].map((_a) => __awaiter(this, [_a], void 0, function* ([dwnUrl, targets]) {
330
+ for (const target of targets) {
331
+ const { did, delegateDid, protocol } = target;
332
+ try {
333
+ yield this.createLinkReconciler().reconcile({
334
+ did, dwnUrl, delegateDid, protocol,
335
+ }, { direction });
365
336
  }
366
- // Phase 4: Push missing messages (local has, remote doesn't).
367
- if (!direction || direction === 'push') {
368
- if (diff.onlyLocal.length > 0) {
369
- yield this.pushMessages({ did, dwnUrl, delegateDid, protocol, messageCids: diff.onlyLocal });
370
- }
337
+ catch (error) {
338
+ // Skip remaining targets for this DWN endpoint.
339
+ groupsFailed++;
340
+ console.error(`SyncEngineLevel: Error syncing ${did} with ${dwnUrl}`, error);
341
+ return;
371
342
  }
372
343
  }
373
- catch (error) {
374
- // Skip this DWN endpoint for remaining targets and log the real cause.
375
- errored.add(dwnUrl);
376
- hadFailure = true;
377
- console.error(`SyncEngineLevel: Error syncing ${did} with ${dwnUrl}`, error);
344
+ groupsSucceeded++;
345
+ })));
346
+ // Check for unexpected rejections (should not happen given inner try/catch).
347
+ for (const result of results) {
348
+ if (result.status === 'rejected') {
349
+ groupsFailed++;
378
350
  }
379
351
  }
380
- // Track consecutive failures for backoff in poll mode.
381
- if (hadFailure) {
352
+ // Track connectivity based on per-group outcomes. If at least one
353
+ // group succeeded, stay online — partial reachability is still online.
354
+ if (groupsSucceeded > 0) {
355
+ this._consecutiveFailures = 0;
356
+ this._connectivityState = 'online';
357
+ }
358
+ else if (groupsFailed > 0) {
382
359
  this._consecutiveFailures++;
383
360
  if (this._connectivityState === 'online') {
384
361
  this._connectivityState = 'offline';
385
362
  }
386
363
  }
387
- else {
364
+ else if (syncTargets.length > 0) {
365
+ // All targets had matching roots (no reconciliation needed).
388
366
  this._consecutiveFailures = 0;
389
- if (syncTargets.length > 0) {
390
- this._connectivityState = 'online';
391
- }
367
+ this._connectivityState = 'online';
392
368
  }
393
369
  }
394
370
  finally {
@@ -428,6 +404,7 @@ export class SyncEngineLevel {
428
404
  */
429
405
  stopSync() {
430
406
  return __awaiter(this, arguments, void 0, function* (timeout = 2000) {
407
+ this._engineGeneration++;
431
408
  let elapsedTimeout = 0;
432
409
  while (this._syncLock) {
433
410
  if (elapsedTimeout >= timeout) {
@@ -448,7 +425,11 @@ export class SyncEngineLevel {
448
425
  // ---------------------------------------------------------------------------
449
426
  startPollSync(intervalMilliseconds) {
450
427
  return __awaiter(this, void 0, void 0, function* () {
428
+ const generation = this._engineGeneration;
451
429
  const intervalSync = () => __awaiter(this, void 0, void 0, function* () {
430
+ if (this._engineGeneration !== generation) {
431
+ return;
432
+ }
452
433
  if (this._syncLock) {
453
434
  return;
454
435
  }
@@ -465,6 +446,9 @@ export class SyncEngineLevel {
465
446
  const effectiveInterval = this._consecutiveFailures > 0
466
447
  ? intervalMilliseconds * backoffMultiplier
467
448
  : intervalMilliseconds;
449
+ if (this._engineGeneration !== generation) {
450
+ return;
451
+ }
468
452
  if (!this._syncIntervalId) {
469
453
  this._syncIntervalId = setInterval(intervalSync, effectiveInterval);
470
454
  }
@@ -499,8 +483,9 @@ export class SyncEngineLevel {
499
483
  console.error('SyncEngineLevel: Error during initial live-sync catch-up', error);
500
484
  }
501
485
  // Step 2: Initialize replication links and open live subscriptions.
486
+ // Each target's link initialization is independent — process concurrently.
502
487
  const syncTargets = yield this.getSyncTargets();
503
- for (const target of syncTargets) {
488
+ yield Promise.allSettled(syncTargets.map((target) => __awaiter(this, void 0, void 0, function* () {
504
489
  let link;
505
490
  try {
506
491
  // Get or create the link in the durable ledger.
@@ -516,18 +501,32 @@ export class SyncEngineLevel {
516
501
  protocol: target.protocol,
517
502
  });
518
503
  // Cache the link for fast access by subscription handlers.
519
- const linkKey = this.buildCursorKey(target.did, target.dwnUrl, target.protocol);
504
+ // Use scopeId from the link for consistent runtime identity.
505
+ const linkKey = this.buildLinkKey(target.did, target.dwnUrl, link.scopeId);
506
+ // One-time migration: if the link has no pull checkpoint, check for
507
+ // a legacy cursor in the old syncCursors sublevel. The legacy key
508
+ // used protocol, not scopeId, so we must build it the old way.
509
+ if (!link.pull.contiguousAppliedToken) {
510
+ const legacyKey = buildLegacyCursorKey(target.did, target.dwnUrl, target.protocol);
511
+ const legacyCursor = yield this.getCursor(legacyKey);
512
+ if (legacyCursor) {
513
+ ReplicationLedger.resetCheckpoint(link.pull, legacyCursor);
514
+ yield this.ledger.saveLink(link);
515
+ yield this.deleteLegacyCursor(legacyKey);
516
+ }
517
+ }
520
518
  this._activeLinks.set(linkKey, link);
521
519
  // Open subscriptions — only transition to live if both succeed.
522
520
  // If pull succeeds but push fails, close the pull subscription to
523
521
  // avoid a resource leak with inconsistent state.
524
- yield this.openLivePullSubscription(target);
522
+ const targetWithKey = Object.assign(Object.assign({}, target), { linkKey });
523
+ yield this.openLivePullSubscription(targetWithKey);
525
524
  try {
526
- yield this.openLocalPushSubscription(target);
525
+ yield this.openLocalPushSubscription(targetWithKey);
527
526
  }
528
527
  catch (pushError) {
529
528
  // Close the already-opened pull subscription.
530
- const pullSub = this._liveSubscriptions.find(s => s.did === target.did && s.dwnUrl === target.dwnUrl && s.protocol === target.protocol);
529
+ const pullSub = this._liveSubscriptions.find((s) => s.linkKey === linkKey);
531
530
  if (pullSub) {
532
531
  try {
533
532
  yield pullSub.close();
@@ -539,9 +538,16 @@ export class SyncEngineLevel {
539
538
  }
540
539
  this.emitEvent({ type: 'link:status-change', tenantDid: target.did, remoteEndpoint: target.dwnUrl, protocol: target.protocol, from: 'initializing', to: 'live' });
541
540
  yield this.ledger.setStatus(link, 'live');
541
+ // If the link was marked dirty in a previous session, schedule
542
+ // immediate reconciliation now that subscriptions are open.
543
+ if (link.needsReconcile) {
544
+ this.scheduleReconcile(linkKey, 1000);
545
+ }
542
546
  }
543
547
  catch (error) {
544
- const linkKey = this.buildCursorKey(target.did, target.dwnUrl, target.protocol);
548
+ const linkKey = link
549
+ ? this.buildLinkKey(target.did, target.dwnUrl, link.scopeId)
550
+ : buildLegacyCursorKey(target.did, target.dwnUrl, target.protocol);
545
551
  // Detect ProgressGap (410) — the cursor is stale, link needs SMT repair.
546
552
  if (error.isProgressGap && link) {
547
553
  console.warn(`SyncEngineLevel: ProgressGap detected for ${target.did} -> ${target.dwnUrl}, initiating repair`);
@@ -550,7 +556,7 @@ export class SyncEngineLevel {
550
556
  yield this.transitionToRepairing(linkKey, link, {
551
557
  resumeToken: gapInfo === null || gapInfo === void 0 ? void 0 : gapInfo.latestAvailable,
552
558
  });
553
- continue;
559
+ return;
554
560
  }
555
561
  console.error(`SyncEngineLevel: Failed to open live subscription for ${target.did} -> ${target.dwnUrl}`, error);
556
562
  // Clean up in-memory state for the failed link so it doesn't appear
@@ -562,7 +568,7 @@ export class SyncEngineLevel {
562
568
  this._connectivityState = 'unknown';
563
569
  }
564
570
  }
565
- }
571
+ })));
566
572
  // Step 3: Schedule infrequent SMT integrity check.
567
573
  const integrityCheck = () => __awaiter(this, void 0, void 0, function* () {
568
574
  if (this._syncLock) {
@@ -672,11 +678,11 @@ export class SyncEngineLevel {
672
678
  const attempts = (_a = this._repairAttempts.get(linkKey)) !== null && _a !== void 0 ? _a : 1;
673
679
  const backoff = SyncEngineLevel.REPAIR_BACKOFF_MS;
674
680
  const delayMs = backoff[Math.min(attempts - 1, backoff.length - 1)];
675
- const timerGeneration = this._syncGeneration;
681
+ const timerGeneration = this._engineGeneration;
676
682
  const timer = setTimeout(() => __awaiter(this, void 0, void 0, function* () {
677
683
  this._repairRetryTimers.delete(linkKey);
678
684
  // Bail if teardown occurred since this timer was scheduled.
679
- if (this._syncGeneration !== timerGeneration) {
685
+ if (this._engineGeneration !== timerGeneration) {
680
686
  return;
681
687
  }
682
688
  // Verify link still exists and is still repairing.
@@ -708,6 +714,14 @@ export class SyncEngineLevel {
708
714
  }
709
715
  const promise = this.doRepairLink(linkKey).finally(() => {
710
716
  this._activeRepairs.delete(linkKey);
717
+ // Post-repair reconcile: if doRepairLink() marked needsReconcile
718
+ // (to close the gap between diff snapshot and new push subscription),
719
+ // schedule reconciliation NOW — after _activeRepairs is cleared so
720
+ // scheduleReconcile() won't skip it.
721
+ const link = this._activeLinks.get(linkKey);
722
+ if ((link === null || link === void 0 ? void 0 : link.needsReconcile) && link.status === 'live') {
723
+ this.scheduleReconcile(linkKey, 500);
724
+ }
711
725
  });
712
726
  this._activeRepairs.set(linkKey, promise);
713
727
  return promise;
@@ -728,7 +742,7 @@ export class SyncEngineLevel {
728
742
  // Capture the sync generation at repair start. If teardown occurs during
729
743
  // any await, the generation will have incremented and we bail before
730
744
  // mutating state — preventing the race where repair continues after teardown.
731
- const generation = this._syncGeneration;
745
+ const generation = this._engineGeneration;
732
746
  const { tenantDid: did, remoteEndpoint: dwnUrl, delegateDid, protocol } = link;
733
747
  this.emitEvent({ type: 'repair:started', tenantDid: did, remoteEndpoint: dwnUrl, protocol, attempt: ((_a = this._repairAttempts.get(linkKey)) !== null && _a !== void 0 ? _a : 0) + 1 });
734
748
  const attempts = ((_b = this._repairAttempts.get(linkKey)) !== null && _b !== void 0 ? _b : 0) + 1;
@@ -736,7 +750,7 @@ export class SyncEngineLevel {
736
750
  // Step 1: Close existing subscriptions FIRST to stop old events from
737
751
  // mutating local state while repair runs.
738
752
  yield this.closeLinkSubscriptions(link);
739
- if (this._syncGeneration !== generation) {
753
+ if (this._engineGeneration !== generation) {
740
754
  return;
741
755
  } // Teardown occurred.
742
756
  // Step 2: Clear runtime ordinals immediately — stale state must not
@@ -747,71 +761,60 @@ export class SyncEngineLevel {
747
761
  rt.nextCommitOrdinal = 0;
748
762
  try {
749
763
  // Step 3: Run SMT reconciliation for this link.
750
- const localRoot = yield this.getLocalRoot(did, delegateDid, protocol);
751
- if (this._syncGeneration !== generation) {
752
- return;
753
- }
754
- const remoteRoot = yield this.getRemoteRoot(did, dwnUrl, delegateDid, protocol);
755
- if (this._syncGeneration !== generation) {
764
+ const reconcileOutcome = yield this.createLinkReconciler(() => this._engineGeneration === generation).reconcile({ did, dwnUrl, delegateDid, protocol });
765
+ if (reconcileOutcome.aborted) {
756
766
  return;
757
767
  }
758
- if (localRoot !== remoteRoot) {
759
- const diff = yield this.diffWithRemote({ did, dwnUrl, delegateDid, protocol });
760
- if (this._syncGeneration !== generation) {
761
- return;
762
- }
763
- if (diff.onlyRemote.length > 0) {
764
- const prefetched = [];
765
- const needsFetchCids = [];
766
- for (const entry of diff.onlyRemote) {
767
- if (!entry.message || (entry.message.descriptor.interface === 'Records' &&
768
- entry.message.descriptor.method === 'Write' &&
769
- entry.message.descriptor.dataCid && !entry.encodedData)) {
770
- needsFetchCids.push(entry.messageCid);
771
- }
772
- else {
773
- prefetched.push(entry);
774
- }
775
- }
776
- yield this.pullMessages({ did, dwnUrl, delegateDid, protocol, messageCids: needsFetchCids, prefetched });
777
- if (this._syncGeneration !== generation) {
778
- return;
779
- }
780
- }
781
- if (diff.onlyLocal.length > 0) {
782
- yield this.pushMessages({ did, dwnUrl, delegateDid, protocol, messageCids: diff.onlyLocal });
783
- if (this._syncGeneration !== generation) {
784
- return;
785
- }
786
- }
787
- }
788
- // Step 4: Determine the post-repair resume token.
768
+ // Step 4: Determine the post-repair pull resume token.
789
769
  // - If repair was triggered by ProgressGap, use the stored resumeToken
790
770
  // (from gapInfo.latestAvailable) so the reopened subscription replays
791
771
  // from a valid boundary, closing the race window between SMT and resubscribe.
792
772
  // - Otherwise, use the existing contiguousAppliedToken if still valid.
793
- // - Push checkpoint is NOT reset during repair: push frontier tracks what
794
- // the local EventLog has delivered to the remote. SMT repair handles
795
- // pull-side convergence; push-side convergence is handled by the diff's
796
- // onlyLocal push. The push checkpoint remains the local authority.
773
+ // Push is opportunistic no push checkpoint to reset.
797
774
  const repairCtx = this._repairContext.get(linkKey);
798
775
  const resumeToken = (_c = repairCtx === null || repairCtx === void 0 ? void 0 : repairCtx.resumeToken) !== null && _c !== void 0 ? _c : link.pull.contiguousAppliedToken;
799
776
  ReplicationLedger.resetCheckpoint(link.pull, resumeToken);
800
777
  yield this.ledger.saveLink(link);
801
- if (this._syncGeneration !== generation) {
778
+ if (this._engineGeneration !== generation) {
779
+ return;
780
+ }
781
+ // Step 5: Reopen subscriptions.
782
+ // Mark needsReconcile BEFORE reopening — local push starts from "now",
783
+ // so any writes between the diff snapshot (step 3) and the new push
784
+ // subscription are invisible to both mechanisms. A short post-reopen
785
+ // reconcile will close this gap (cheap: SMT root comparison short-circuits
786
+ // if roots already match).
787
+ link.needsReconcile = true;
788
+ yield this.ledger.saveLink(link);
789
+ if (this._engineGeneration !== generation) {
802
790
  return;
803
791
  }
804
- // Step 5: Reopen subscriptions with the repaired checkpoints.
805
- const target = { did, dwnUrl, delegateDid, protocol };
806
- yield this.openLivePullSubscription(target);
807
- if (this._syncGeneration !== generation) {
792
+ const target = { did, dwnUrl, delegateDid, protocol, linkKey };
793
+ try {
794
+ yield this.openLivePullSubscription(target);
795
+ }
796
+ catch (pullErr) {
797
+ if (pullErr.isProgressGap) {
798
+ console.warn(`SyncEngineLevel: Stale pull resume token for ${did} -> ${dwnUrl}, resetting to start fresh`);
799
+ ReplicationLedger.resetCheckpoint(link.pull);
800
+ yield this.ledger.saveLink(link);
801
+ if (this._engineGeneration !== generation) {
802
+ return;
803
+ }
804
+ yield this.openLivePullSubscription(target);
805
+ }
806
+ else {
807
+ throw pullErr;
808
+ }
809
+ }
810
+ if (this._engineGeneration !== generation) {
808
811
  return;
809
812
  }
810
813
  try {
811
- yield this.openLocalPushSubscription(Object.assign(Object.assign({}, target), { pushCursor: link.push.contiguousAppliedToken }));
814
+ yield this.openLocalPushSubscription(target);
812
815
  }
813
816
  catch (pushError) {
814
- const pullSub = this._liveSubscriptions.find(s => s.did === did && s.dwnUrl === dwnUrl && s.protocol === protocol);
817
+ const pullSub = this._liveSubscriptions.find((s) => s.linkKey === linkKey);
815
818
  if (pullSub) {
816
819
  try {
817
820
  yield pullSub.close();
@@ -821,9 +824,13 @@ export class SyncEngineLevel {
821
824
  }
822
825
  throw pushError;
823
826
  }
824
- if (this._syncGeneration !== generation) {
827
+ if (this._engineGeneration !== generation) {
825
828
  return;
826
829
  }
830
+ // Note: post-repair reconcile to close the repair-window gap is
831
+ // scheduled by repairLink() AFTER _activeRepairs is cleared — not
832
+ // here, because scheduleReconcile() would skip it while _activeRepairs
833
+ // still contains this link.
827
834
  // Step 6: Clean up repair context and transition to live.
828
835
  this._repairContext.delete(linkKey);
829
836
  this._repairAttempts.delete(linkKey);
@@ -843,7 +850,7 @@ export class SyncEngineLevel {
843
850
  }
844
851
  catch (error) {
845
852
  // If teardown occurred during repair, don't retry or enter degraded_poll.
846
- if (this._syncGeneration !== generation) {
853
+ if (this._engineGeneration !== generation) {
847
854
  return;
848
855
  }
849
856
  console.error(`SyncEngineLevel: Repair failed for ${did} -> ${dwnUrl} (attempt ${attempts})`, error);
@@ -863,9 +870,10 @@ export class SyncEngineLevel {
863
870
  */
864
871
  closeLinkSubscriptions(link) {
865
872
  return __awaiter(this, void 0, void 0, function* () {
866
- const { tenantDid: did, remoteEndpoint: dwnUrl, protocol } = link;
873
+ const { tenantDid: did, remoteEndpoint: dwnUrl } = link;
874
+ const linkKey = this.buildLinkKey(did, dwnUrl, link.scopeId);
867
875
  // Close pull subscription.
868
- const pullSub = this._liveSubscriptions.find(s => s.did === did && s.dwnUrl === dwnUrl && s.protocol === protocol);
876
+ const pullSub = this._liveSubscriptions.find((s) => s.linkKey === linkKey);
869
877
  if (pullSub) {
870
878
  try {
871
879
  yield pullSub.close();
@@ -874,7 +882,7 @@ export class SyncEngineLevel {
874
882
  this._liveSubscriptions = this._liveSubscriptions.filter(s => s !== pullSub);
875
883
  }
876
884
  // Close local push subscription.
877
- const pushSub = this._localSubscriptions.find(s => s.did === did && s.dwnUrl === dwnUrl && s.protocol === protocol);
885
+ const pushSub = this._localSubscriptions.find((s) => s.linkKey === linkKey);
878
886
  if (pushSub) {
879
887
  try {
880
888
  yield pushSub.close();
@@ -910,10 +918,10 @@ export class SyncEngineLevel {
910
918
  const baseInterval = 15000;
911
919
  const jitter = Math.floor(Math.random() * 15000);
912
920
  const interval = baseInterval + jitter;
913
- const pollGeneration = this._syncGeneration;
921
+ const pollGeneration = this._engineGeneration;
914
922
  const timer = setInterval(() => __awaiter(this, void 0, void 0, function* () {
915
923
  // Bail if teardown occurred since this timer was created.
916
- if (this._syncGeneration !== pollGeneration) {
924
+ if (this._engineGeneration !== pollGeneration) {
917
925
  clearInterval(timer);
918
926
  this._degradedPollTimers.delete(linkKey);
919
927
  return;
@@ -956,14 +964,14 @@ export class SyncEngineLevel {
956
964
  // Increment generation to invalidate all in-flight async operations
957
965
  // (repairs, retry timers, degraded-poll ticks). Any async work that
958
966
  // captured the previous generation will bail on its next checkpoint.
959
- this._syncGeneration++;
960
- // Clear the push debounce timer.
961
- if (this._pushDebounceTimer) {
962
- clearTimeout(this._pushDebounceTimer);
963
- this._pushDebounceTimer = undefined;
964
- }
965
- // Flush any pending push CIDs.
966
- this._pendingPushCids.clear();
967
+ this._engineGeneration++;
968
+ // Clear per-link push runtime state.
969
+ for (const pushRuntime of this._pushRuntimes.values()) {
970
+ if (pushRuntime.timer) {
971
+ clearTimeout(pushRuntime.timer);
972
+ }
973
+ }
974
+ this._pushRuntimes.clear();
967
975
  // Close all live pull subscriptions.
968
976
  for (const sub of this._liveSubscriptions) {
969
977
  try {
@@ -996,8 +1004,15 @@ export class SyncEngineLevel {
996
1004
  }
997
1005
  this._repairRetryTimers.clear();
998
1006
  this._repairContext.clear();
1007
+ // Clear reconcile timers and in-flight operations.
1008
+ for (const timer of this._reconcileTimers.values()) {
1009
+ clearTimeout(timer);
1010
+ }
1011
+ this._reconcileTimers.clear();
1012
+ this._reconcileInFlight.clear();
999
1013
  // Clear closure evaluation contexts.
1000
1014
  this._closureContexts.clear();
1015
+ this._recentlyPulledCids.clear();
1001
1016
  // Clear the in-memory link and runtime state.
1002
1017
  this._activeLinks.clear();
1003
1018
  this._linkRuntimes.clear();
@@ -1012,12 +1027,24 @@ export class SyncEngineLevel {
1012
1027
  */
1013
1028
  openLivePullSubscription(target) {
1014
1029
  return __awaiter(this, void 0, void 0, function* () {
1015
- var _a, _b;
1030
+ var _a;
1016
1031
  const { did, delegateDid, dwnUrl, protocol } = target;
1017
- // Resolve the cursor from the link's pull checkpoint (preferred) or legacy storage.
1018
- const cursorKey = this.buildCursorKey(did, dwnUrl, protocol);
1032
+ // Resolve the cursor from the link's durable pull checkpoint.
1033
+ // Legacy syncCursors migration happens at link load time in startLiveSync().
1034
+ const cursorKey = target.linkKey;
1019
1035
  const link = this._activeLinks.get(cursorKey);
1020
- const cursor = (_a = link === null || link === void 0 ? void 0 : link.pull.contiguousAppliedToken) !== null && _a !== void 0 ? _a : yield this.getCursor(cursorKey);
1036
+ let cursor = link === null || link === void 0 ? void 0 : link.pull.contiguousAppliedToken;
1037
+ // Guard against corrupted tokens with empty fields — these would fail
1038
+ // MessagesSubscribe JSON schema validation (minLength: 1). Discard and
1039
+ // start from the beginning rather than crash the subscription.
1040
+ if (cursor && (!cursor.streamId || !cursor.messageCid || !cursor.epoch || !cursor.position)) {
1041
+ console.warn(`SyncEngineLevel: Discarding stored cursor with empty field(s) for ${did} -> ${dwnUrl}`);
1042
+ cursor = undefined;
1043
+ if (link) {
1044
+ ReplicationLedger.resetCheckpoint(link.pull);
1045
+ yield this.ledger.saveLink(link);
1046
+ }
1047
+ }
1021
1048
  // Build the MessagesSubscribe filters.
1022
1049
  // When the link has protocolPathPrefixes, include them in the filter so the
1023
1050
  // EventLog delivers only matching events (server-side filtering). This replaces
@@ -1026,7 +1053,7 @@ export class SyncEngineLevel {
1026
1053
  // MessagesFilter.protocolPathPrefix is a single string. Multiple prefixes
1027
1054
  // would need multiple filters (OR semantics) — for now we use the first one.
1028
1055
  const protocolPathPrefix = (link === null || link === void 0 ? void 0 : link.scope.kind) === 'protocol'
1029
- ? (_b = link.scope.protocolPathPrefixes) === null || _b === void 0 ? void 0 : _b[0]
1056
+ ? (_a = link.scope.protocolPathPrefixes) === null || _a === void 0 ? void 0 : _a[0]
1030
1057
  : undefined;
1031
1058
  const filters = protocol
1032
1059
  ? [Object.assign({ protocol }, (protocolPathPrefix ? { protocolPathPrefix } : {}))]
@@ -1046,11 +1073,15 @@ export class SyncEngineLevel {
1046
1073
  });
1047
1074
  permissionGrantId = grant.grant.id;
1048
1075
  }
1076
+ const handlerGeneration = this._engineGeneration;
1049
1077
  // Define the subscription handler that processes incoming events.
1050
1078
  // NOTE: The WebSocket client fires handlers without awaiting (fire-and-forget),
1051
1079
  // so multiple handlers can be in-flight concurrently. The ordinal tracker
1052
1080
  // ensures the checkpoint advances only when all earlier deliveries are committed.
1053
1081
  const subscriptionHandler = (subMessage) => __awaiter(this, void 0, void 0, function* () {
1082
+ if (this._engineGeneration !== handlerGeneration) {
1083
+ return;
1084
+ }
1054
1085
  if (subMessage.type === 'eose') {
1055
1086
  // End-of-stored-events — catch-up complete.
1056
1087
  if (link) {
@@ -1072,9 +1103,6 @@ export class SyncEngineLevel {
1072
1103
  this.drainCommittedPull(cursorKey);
1073
1104
  yield this.ledger.saveLink(link);
1074
1105
  }
1075
- else {
1076
- yield this.setCursor(cursorKey, subMessage.cursor);
1077
- }
1078
1106
  // Transport is reachable — set connectivity to online.
1079
1107
  if (link) {
1080
1108
  const prevEoseConnectivity = link.connectivity;
@@ -1082,6 +1110,10 @@ export class SyncEngineLevel {
1082
1110
  if (prevEoseConnectivity !== 'online') {
1083
1111
  this.emitEvent({ type: 'link:connectivity-change', tenantDid: did, remoteEndpoint: dwnUrl, protocol, from: prevEoseConnectivity, to: 'online' });
1084
1112
  }
1113
+ // If the link was marked dirty, schedule reconciliation now that it's healthy.
1114
+ if (link.needsReconcile) {
1115
+ this.scheduleReconcile(cursorKey, 500);
1116
+ }
1085
1117
  }
1086
1118
  else {
1087
1119
  this._connectivityState = 'online';
@@ -1211,10 +1243,6 @@ export class SyncEngineLevel {
1211
1243
  yield this.transitionToRepairing(cursorKey, link);
1212
1244
  }
1213
1245
  }
1214
- else if (!link) {
1215
- // Legacy path: no link available, use simple cursor persistence.
1216
- yield this.setCursor(cursorKey, subMessage.cursor);
1217
- }
1218
1246
  }
1219
1247
  catch (error) {
1220
1248
  console.error(`SyncEngineLevel: Error processing live-pull event for ${did}`, error);
@@ -1249,7 +1277,11 @@ export class SyncEngineLevel {
1249
1277
  const resubscribeFactory = (resumeCursor) => __awaiter(this, void 0, void 0, function* () {
1250
1278
  var _a;
1251
1279
  // On reconnect, use the latest durable checkpoint position if available.
1252
- const effectiveCursor = (_a = resumeCursor !== null && resumeCursor !== void 0 ? resumeCursor : link === null || link === void 0 ? void 0 : link.pull.contiguousAppliedToken) !== null && _a !== void 0 ? _a : cursor;
1280
+ // Discard tokens with empty fields to avoid schema validation failures.
1281
+ let effectiveCursor = (_a = resumeCursor !== null && resumeCursor !== void 0 ? resumeCursor : link === null || link === void 0 ? void 0 : link.pull.contiguousAppliedToken) !== null && _a !== void 0 ? _a : cursor;
1282
+ if (effectiveCursor && (!effectiveCursor.streamId || !effectiveCursor.messageCid || !effectiveCursor.epoch || !effectiveCursor.position)) {
1283
+ effectiveCursor = undefined;
1284
+ }
1253
1285
  const resumeRequest = Object.assign(Object.assign({}, subscribeRequest), { messageParams: Object.assign(Object.assign({}, subscribeRequest.messageParams), { cursor: effectiveCursor }) });
1254
1286
  const { message: resumeMsg } = yield this.agent.dwn.processRequest(resumeRequest);
1255
1287
  if (!resumeMsg) {
@@ -1281,6 +1313,7 @@ export class SyncEngineLevel {
1281
1313
  throw new Error(`SyncEngineLevel: MessagesSubscribe failed for ${did} -> ${dwnUrl}: ${reply.status.code} ${reply.status.detail}`);
1282
1314
  }
1283
1315
  this._liveSubscriptions.push({
1316
+ linkKey: cursorKey,
1284
1317
  did,
1285
1318
  dwnUrl,
1286
1319
  delegateDid,
@@ -1288,7 +1321,7 @@ export class SyncEngineLevel {
1288
1321
  close: () => __awaiter(this, void 0, void 0, function* () { yield reply.subscription.close(); }),
1289
1322
  });
1290
1323
  // Set per-link connectivity to online after successful subscription setup.
1291
- const pullLink = this._activeLinks.get(this.buildCursorKey(did, dwnUrl, protocol));
1324
+ const pullLink = this._activeLinks.get(cursorKey);
1292
1325
  if (pullLink) {
1293
1326
  const prevPullConnectivity = pullLink.connectivity;
1294
1327
  pullLink.connectivity = 'online';
@@ -1307,6 +1340,7 @@ export class SyncEngineLevel {
1307
1340
  */
1308
1341
  openLocalPushSubscription(target) {
1309
1342
  return __awaiter(this, void 0, void 0, function* () {
1343
+ var _a;
1310
1344
  const { did, delegateDid, dwnUrl, protocol } = target;
1311
1345
  // Build filters scoped to the protocol (if any).
1312
1346
  const filters = protocol ? [{ protocol }] : [];
@@ -1322,35 +1356,24 @@ export class SyncEngineLevel {
1322
1356
  });
1323
1357
  permissionGrantId = grant.grant.id;
1324
1358
  }
1359
+ const handlerGeneration = this._engineGeneration;
1325
1360
  // Subscribe to the local DWN's EventLog.
1326
1361
  const subscriptionHandler = (subMessage) => __awaiter(this, void 0, void 0, function* () {
1362
+ if (this._engineGeneration !== handlerGeneration) {
1363
+ return;
1364
+ }
1327
1365
  if (subMessage.type !== 'event') {
1328
1366
  return;
1329
1367
  }
1330
- // Subset scope filtering for push: only push events that match the
1331
- // link's scope prefixes. Events outside the scope are not our responsibility.
1332
- // Skipped events MUST advance the push checkpoint to prevent infinite
1333
- // replay after repair/reconnect (same reason as the pull side).
1334
- const pushLink = this._activeLinks.get(this.buildCursorKey(did, dwnUrl, protocol));
1368
+ // Subset scope filtering: only push events that match the link's
1369
+ // scope prefixes. Events outside the scope are not our responsibility.
1370
+ const pushLinkKey = target.linkKey;
1371
+ const pushLink = this._activeLinks.get(pushLinkKey);
1335
1372
  if (pushLink && !isEventInScope(subMessage.event.message, pushLink.scope)) {
1336
- // Guard: only mutate durable state when the link is live/initializing.
1337
- // During repair/degraded_poll, orchestration owns checkpoint progression.
1338
- if (pushLink.status !== 'live' && pushLink.status !== 'initializing') {
1339
- return;
1340
- }
1341
- // Validate token domain before committing — a stream/epoch mismatch
1342
- // on the local EventLog should trigger repair, not silently overwrite.
1343
- if (!ReplicationLedger.validateTokenDomain(pushLink.push, subMessage.cursor)) {
1344
- yield this.transitionToRepairing(this.buildCursorKey(did, dwnUrl, protocol), pushLink);
1345
- return;
1346
- }
1347
- ReplicationLedger.setReceivedToken(pushLink.push, subMessage.cursor);
1348
- ReplicationLedger.commitContiguousToken(pushLink.push, subMessage.cursor);
1349
- yield this.ledger.saveLink(pushLink);
1350
1373
  return;
1351
1374
  }
1352
1375
  // Accumulate the message CID for a debounced push.
1353
- const targetKey = this.buildCursorKey(did, dwnUrl, protocol);
1376
+ const targetKey = pushLinkKey;
1354
1377
  const cid = yield Message.getCid(subMessage.event.message);
1355
1378
  if (cid === undefined) {
1356
1379
  return;
@@ -1361,30 +1384,26 @@ export class SyncEngineLevel {
1361
1384
  if (this.isRecentlyPulled(cid, dwnUrl)) {
1362
1385
  return;
1363
1386
  }
1364
- let pending = this._pendingPushCids.get(targetKey);
1365
- if (!pending) {
1366
- pending = { did, dwnUrl, delegateDid, protocol, entries: [] };
1367
- this._pendingPushCids.set(targetKey, pending);
1368
- }
1369
- pending.entries.push({ cid, localToken: subMessage.cursor });
1370
- // Debounce the push.
1371
- if (this._pushDebounceTimer) {
1372
- clearTimeout(this._pushDebounceTimer);
1387
+ const pushRuntime = this.getOrCreatePushRuntime(targetKey, {
1388
+ did, dwnUrl, delegateDid, protocol,
1389
+ });
1390
+ pushRuntime.entries.push({ cid });
1391
+ // Immediate-first: if no push is in flight and no batch timer is
1392
+ // pending, push immediately. Otherwise, the pending batch timer
1393
+ // or the post-flush drain will pick up the new entry.
1394
+ if (!pushRuntime.flushing && !pushRuntime.timer) {
1395
+ void this.flushPendingPushesForLink(targetKey);
1373
1396
  }
1374
- this._pushDebounceTimer = setTimeout(() => {
1375
- void this.flushPendingPushes();
1376
- }, PUSH_DEBOUNCE_MS);
1377
1397
  });
1378
- // Process the local subscription request.
1379
- // When a push cursor is provided (e.g., after repair), the local subscription
1380
- // replays events from that position, closing the race window where local
1381
- // writes during repair would otherwise be missed by push-on-write.
1398
+ // Subscribe to the local DWN EventLog from "now" — opportunistic push
1399
+ // does not replay from a stored cursor. Any writes missed during outages
1400
+ // are recovered by the post-repair reconciliation path.
1382
1401
  const response = yield this.agent.dwn.processRequest({
1383
1402
  author: did,
1384
1403
  target: did,
1385
1404
  messageType: DwnInterface.MessagesSubscribe,
1386
1405
  granteeDid: delegateDid,
1387
- messageParams: { filters, permissionGrantId, cursor: target.pushCursor },
1406
+ messageParams: { filters, permissionGrantId },
1388
1407
  subscriptionHandler: subscriptionHandler,
1389
1408
  });
1390
1409
  const reply = response.reply;
@@ -1392,6 +1411,7 @@ export class SyncEngineLevel {
1392
1411
  throw new Error(`SyncEngineLevel: Local MessagesSubscribe failed for ${did}: ${reply.status.code} ${reply.status.detail}`);
1393
1412
  }
1394
1413
  this._localSubscriptions.push({
1414
+ linkKey: (_a = target.linkKey) !== null && _a !== void 0 ? _a : buildLegacyCursorKey(did, dwnUrl, protocol),
1395
1415
  did,
1396
1416
  dwnUrl,
1397
1417
  delegateDid,
@@ -1405,104 +1425,234 @@ export class SyncEngineLevel {
1405
1425
  */
1406
1426
  flushPendingPushes() {
1407
1427
  return __awaiter(this, void 0, void 0, function* () {
1408
- this._pushDebounceTimer = undefined;
1409
- const batches = [...this._pendingPushCids.entries()];
1410
- this._pendingPushCids.clear();
1411
- // Push to all endpoints in parallel — each target is independent.
1412
- yield Promise.all(batches.map((_a) => __awaiter(this, [_a], void 0, function* ([targetKey, pending]) {
1413
- const { did, dwnUrl, delegateDid, protocol, entries: pushEntries } = pending;
1414
- if (pushEntries.length === 0) {
1415
- return;
1428
+ yield Promise.all([...this._pushRuntimes.keys()].map((linkKey) => __awaiter(this, void 0, void 0, function* () {
1429
+ yield this.flushPendingPushesForLink(linkKey);
1430
+ })));
1431
+ });
1432
+ }
1433
+ flushPendingPushesForLink(linkKey) {
1434
+ return __awaiter(this, void 0, void 0, function* () {
1435
+ const pushRuntime = this._pushRuntimes.get(linkKey);
1436
+ if (!pushRuntime) {
1437
+ return;
1438
+ }
1439
+ const { did, dwnUrl, delegateDid, protocol, entries: pushEntries, retryCount } = pushRuntime;
1440
+ pushRuntime.entries = [];
1441
+ if (pushEntries.length === 0) {
1442
+ if (!pushRuntime.timer && !pushRuntime.flushing && retryCount === 0) {
1443
+ this._pushRuntimes.delete(linkKey);
1416
1444
  }
1417
- const cids = pushEntries.map(e => e.cid);
1418
- try {
1419
- const result = yield pushMessages({
1445
+ return;
1446
+ }
1447
+ const cids = pushEntries.map((entry) => entry.cid);
1448
+ pushRuntime.flushing = true;
1449
+ try {
1450
+ const result = yield pushMessages({
1451
+ did, dwnUrl, delegateDid, protocol,
1452
+ messageCids: cids,
1453
+ agent: this.agent,
1454
+ permissionsApi: this._permissionsApi,
1455
+ });
1456
+ if (result.failed.length > 0) {
1457
+ const failedSet = new Set(result.failed);
1458
+ const failedEntries = pushEntries.filter((entry) => failedSet.has(entry.cid));
1459
+ this.requeueOrReconcile(linkKey, {
1420
1460
  did, dwnUrl, delegateDid, protocol,
1421
- messageCids: cids,
1422
- agent: this.agent,
1423
- permissionsApi: this._permissionsApi,
1461
+ entries: failedEntries,
1462
+ retryCount: retryCount + 1,
1424
1463
  });
1425
- // Advance the push checkpoint for successfully pushed entries.
1426
- // Push is sequential (single batch, in-order processing) so we can
1427
- // commit directly without ordinal tracking — there's no concurrent
1428
- // completion to reorder.
1429
- const link = this._activeLinks.get(targetKey);
1430
- if (link) {
1431
- const succeededSet = new Set(result.succeeded);
1432
- // Track highest contiguous success: if a CID fails, we stop advancing.
1433
- let hitFailure = false;
1434
- for (const entry of pushEntries) {
1435
- if (hitFailure) {
1436
- break;
1437
- }
1438
- if (succeededSet.has(entry.cid) && entry.localToken) {
1439
- if (!ReplicationLedger.validateTokenDomain(link.push, entry.localToken)) {
1440
- console.warn(`SyncEngineLevel: Push checkpoint domain mismatch for ${did} -> ${dwnUrl}, transitioning to repairing`);
1441
- yield this.transitionToRepairing(targetKey, link);
1442
- break;
1443
- }
1444
- ReplicationLedger.setReceivedToken(link.push, entry.localToken);
1445
- ReplicationLedger.commitContiguousToken(link.push, entry.localToken);
1446
- }
1447
- else {
1448
- // This CID failed or had no token — stop advancing.
1449
- hitFailure = true;
1450
- }
1451
- }
1452
- yield this.ledger.saveLink(link);
1453
- }
1454
- // Re-queue only TRANSIENT failures for retry. Permanent failures (400/401/403)
1455
- // are dropped — they will never succeed regardless of retry.
1456
- if (result.failed.length > 0) {
1457
- console.error(`SyncEngineLevel: Push-on-write failed for ${did} -> ${dwnUrl}: ` +
1458
- `${result.failed.length} transient failures of ${cids.length} messages`);
1459
- const failedSet = new Set(result.failed);
1460
- const failedEntries = pushEntries.filter(e => failedSet.has(e.cid));
1461
- let requeued = this._pendingPushCids.get(targetKey);
1462
- if (!requeued) {
1463
- requeued = { did, dwnUrl, delegateDid, protocol, entries: [] };
1464
- this._pendingPushCids.set(targetKey, requeued);
1465
- }
1466
- requeued.entries.push(...failedEntries);
1467
- // Schedule a retry after a short delay.
1468
- if (!this._pushDebounceTimer) {
1469
- this._pushDebounceTimer = setTimeout(() => {
1470
- void this.flushPendingPushes();
1471
- }, PUSH_DEBOUNCE_MS * 4);
1472
- }
1473
- }
1474
- // Permanent failures are logged by pushMessages but NOT re-queued.
1475
- // They will be rediscovered by the next SMT integrity check if the
1476
- // local/remote state has changed, but won't spin in a retry loop.
1477
1464
  }
1478
- catch (error) {
1479
- // Truly unexpected error (not per-message failure). Re-queue entire
1480
- // batch so entries aren't silently dropped from the debounce queue.
1481
- console.error(`SyncEngineLevel: Push-on-write failed for ${did} -> ${dwnUrl}`, error);
1482
- let requeued = this._pendingPushCids.get(targetKey);
1483
- if (!requeued) {
1484
- requeued = { did, dwnUrl, delegateDid, protocol, entries: [] };
1485
- this._pendingPushCids.set(targetKey, requeued);
1486
- }
1487
- requeued.entries.push(...pushEntries);
1488
- if (!this._pushDebounceTimer) {
1489
- this._pushDebounceTimer = setTimeout(() => {
1490
- void this.flushPendingPushes();
1491
- }, PUSH_DEBOUNCE_MS * 4);
1465
+ else {
1466
+ // Successful push reset retry count so subsequent unrelated
1467
+ // batches on this link start with a fresh budget.
1468
+ pushRuntime.retryCount = 0;
1469
+ if (!pushRuntime.timer && pushRuntime.entries.length === 0) {
1470
+ this._pushRuntimes.delete(linkKey);
1492
1471
  }
1493
1472
  }
1494
- })));
1473
+ }
1474
+ catch (error) {
1475
+ console.error(`SyncEngineLevel: Push batch failed for ${did} -> ${dwnUrl}`, error);
1476
+ this.requeueOrReconcile(linkKey, {
1477
+ did, dwnUrl, delegateDid, protocol,
1478
+ entries: pushEntries,
1479
+ retryCount: retryCount + 1,
1480
+ });
1481
+ }
1482
+ finally {
1483
+ pushRuntime.flushing = false;
1484
+ // If new entries accumulated while this push was in flight, schedule
1485
+ // a short drain to flush them. This gives a brief batching window
1486
+ // for burst writes while keeping single-write latency low.
1487
+ const rt = this._pushRuntimes.get(linkKey);
1488
+ if (rt && rt.entries.length > 0 && !rt.timer) {
1489
+ rt.timer = setTimeout(() => {
1490
+ rt.timer = undefined;
1491
+ void this.flushPendingPushesForLink(linkKey);
1492
+ }, PUSH_DEBOUNCE_MS);
1493
+ }
1494
+ }
1495
+ });
1496
+ }
1497
+ /**
1498
+ * Re-queues a failed push batch for retry, or marks the link
1499
+ * `needsReconcile` if retries are exhausted. Bounded to prevent
1500
+ * infinite retry loops.
1501
+ */
1502
+ requeueOrReconcile(targetKey, pending) {
1503
+ var _a;
1504
+ const maxRetries = SyncEngineLevel.PUSH_RETRY_BACKOFF_MS.length;
1505
+ const pushRuntime = this.getOrCreatePushRuntime(targetKey, pending);
1506
+ if (pending.retryCount >= maxRetries) {
1507
+ // Retry budget exhausted — mark link dirty for reconciliation.
1508
+ if (pushRuntime.timer) {
1509
+ clearTimeout(pushRuntime.timer);
1510
+ }
1511
+ this._pushRuntimes.delete(targetKey);
1512
+ const link = this._activeLinks.get(targetKey);
1513
+ if (link && !link.needsReconcile) {
1514
+ link.needsReconcile = true;
1515
+ void this.ledger.saveLink(link).then(() => {
1516
+ this.emitEvent({ type: 'reconcile:needed', tenantDid: pending.did, remoteEndpoint: pending.dwnUrl, protocol: pending.protocol, reason: 'push-retry-exhausted' });
1517
+ this.scheduleReconcile(targetKey);
1518
+ });
1519
+ }
1520
+ return;
1521
+ }
1522
+ pushRuntime.entries.push(...pending.entries);
1523
+ pushRuntime.retryCount = pending.retryCount;
1524
+ const delayMs = (_a = SyncEngineLevel.PUSH_RETRY_BACKOFF_MS[pending.retryCount]) !== null && _a !== void 0 ? _a : 2000;
1525
+ if (pushRuntime.timer) {
1526
+ clearTimeout(pushRuntime.timer);
1527
+ }
1528
+ pushRuntime.timer = setTimeout(() => {
1529
+ pushRuntime.timer = undefined;
1530
+ void this.flushPendingPushesForLink(targetKey);
1531
+ }, delayMs);
1532
+ }
1533
+ createLinkReconciler(shouldContinue) {
1534
+ return new SyncLinkReconciler({
1535
+ getLocalRoot: (did, delegateDid, protocol) => __awaiter(this, void 0, void 0, function* () { return this.getLocalRoot(did, delegateDid, protocol); }),
1536
+ getRemoteRoot: (did, dwnUrl, delegateDid, protocol) => __awaiter(this, void 0, void 0, function* () { return this.getRemoteRoot(did, dwnUrl, delegateDid, protocol); }),
1537
+ diffWithRemote: (target) => __awaiter(this, void 0, void 0, function* () { return this.diffWithRemote(target); }),
1538
+ pullMessages: (params) => __awaiter(this, void 0, void 0, function* () { return this.pullMessages(params); }),
1539
+ pushMessages: (params) => __awaiter(this, void 0, void 0, function* () { return this.pushMessages(params); }),
1540
+ shouldContinue,
1541
+ });
1542
+ }
1543
+ /**
1544
+ * Schedule a per-link reconciliation after a short debounce. Coalesces
1545
+ * repeated requests for the same link.
1546
+ */
1547
+ scheduleReconcile(linkKey, delayMs = 1500) {
1548
+ if (this._reconcileTimers.has(linkKey)) {
1549
+ return;
1550
+ }
1551
+ if (this._reconcileInFlight.has(linkKey)) {
1552
+ return;
1553
+ }
1554
+ if (this._activeRepairs.has(linkKey)) {
1555
+ return;
1556
+ }
1557
+ const generation = this._engineGeneration;
1558
+ const timer = setTimeout(() => {
1559
+ this._reconcileTimers.delete(linkKey);
1560
+ if (this._engineGeneration !== generation) {
1561
+ return;
1562
+ }
1563
+ void this.reconcileLink(linkKey);
1564
+ }, delayMs);
1565
+ this._reconcileTimers.set(linkKey, timer);
1566
+ }
1567
+ /**
1568
+ * Run SMT reconciliation for a single link. Deduplicates concurrent calls.
1569
+ * On success, clears `needsReconcile`. On failure, schedules retry.
1570
+ */
1571
+ reconcileLink(linkKey) {
1572
+ return __awaiter(this, void 0, void 0, function* () {
1573
+ const existing = this._reconcileInFlight.get(linkKey);
1574
+ if (existing) {
1575
+ return existing;
1576
+ }
1577
+ const promise = this.doReconcileLink(linkKey).finally(() => {
1578
+ this._reconcileInFlight.delete(linkKey);
1579
+ });
1580
+ this._reconcileInFlight.set(linkKey, promise);
1581
+ return promise;
1495
1582
  });
1496
1583
  }
1584
+ /**
1585
+ * Internal reconciliation implementation for a single link. Runs the
1586
+ * same SMT diff + pull/push that `sync()` does, but scoped to one link.
1587
+ */
1588
+ doReconcileLink(linkKey) {
1589
+ return __awaiter(this, void 0, void 0, function* () {
1590
+ const link = this._activeLinks.get(linkKey);
1591
+ if (!link) {
1592
+ return;
1593
+ }
1594
+ // Only reconcile live links — repairing/degraded links have their own
1595
+ // recovery path. Reconciling during repair would race with SMT diff.
1596
+ if (link.status !== 'live') {
1597
+ return;
1598
+ }
1599
+ // Skip if a repair is in progress for this link.
1600
+ if (this._activeRepairs.has(linkKey)) {
1601
+ return;
1602
+ }
1603
+ const generation = this._engineGeneration;
1604
+ const { tenantDid: did, remoteEndpoint: dwnUrl, delegateDid, protocol } = link;
1605
+ try {
1606
+ const reconcileOutcome = yield this.createLinkReconciler(() => this._engineGeneration === generation).reconcile({ did, dwnUrl, delegateDid, protocol }, { verifyConvergence: true });
1607
+ if (reconcileOutcome.aborted) {
1608
+ return;
1609
+ }
1610
+ if (reconcileOutcome.converged) {
1611
+ yield this.ledger.clearNeedsReconcile(link);
1612
+ this.emitEvent({ type: 'reconcile:completed', tenantDid: did, remoteEndpoint: dwnUrl, protocol });
1613
+ }
1614
+ else {
1615
+ // Roots still differ — retry after a delay. This can happen when
1616
+ // pushMessages() had permanent failures, pullMessages() partially
1617
+ // failed, or new writes arrived during reconciliation.
1618
+ this.scheduleReconcile(linkKey, 5000);
1619
+ }
1620
+ }
1621
+ catch (error) {
1622
+ console.error(`SyncEngineLevel: Reconciliation failed for ${did} -> ${dwnUrl}`, error);
1623
+ // Schedule retry with longer delay.
1624
+ this.scheduleReconcile(linkKey, 5000);
1625
+ }
1626
+ });
1627
+ }
1628
+ getOrCreatePushRuntime(linkKey, params) {
1629
+ let pushRuntime = this._pushRuntimes.get(linkKey);
1630
+ if (!pushRuntime) {
1631
+ pushRuntime = Object.assign(Object.assign({}, params), { entries: [], retryCount: 0 });
1632
+ this._pushRuntimes.set(linkKey, pushRuntime);
1633
+ }
1634
+ return pushRuntime;
1635
+ }
1497
1636
  // ---------------------------------------------------------------------------
1498
1637
  // Cursor persistence
1499
1638
  // ---------------------------------------------------------------------------
1500
- buildCursorKey(did, dwnUrl, protocol) {
1501
- const base = `${did}${CURSOR_SEPARATOR}${dwnUrl}`;
1502
- return protocol ? `${base}${CURSOR_SEPARATOR}${protocol}` : base;
1639
+ /**
1640
+ * Build the runtime key for a replication link.
1641
+ *
1642
+ * Live-mode subscription methods (`openLivePullSubscription`,
1643
+ * `openLocalPushSubscription`) receive `linkKey` directly and never
1644
+ * call this. The remaining callers are poll-mode `sync()` and the
1645
+ * live-mode startup/error paths that already have `link.scopeId`.
1646
+ *
1647
+ * The `undefined` fallback (which produces a legacy cursor key) exists
1648
+ * only for the no-protocol full-tenant targets in poll mode.
1649
+ */
1650
+ buildLinkKey(did, dwnUrl, scopeIdOrProtocol) {
1651
+ return scopeIdOrProtocol ? buildLinkId(did, dwnUrl, scopeIdOrProtocol) : buildLegacyCursorKey(did, dwnUrl);
1503
1652
  }
1504
1653
  /**
1505
- * Retrieves a stored progress token. Handles migration from old string cursors:
1654
+ * @deprecated Used by poll-mode sync and one-time migration only. Live mode
1655
+ * uses ReplicationLedger checkpoints. Handles migration from old string cursors:
1506
1656
  * if the stored value is a bare string (pre-ProgressToken format), it is treated
1507
1657
  * as absent — the sync engine will do a full SMT reconciliation on first startup
1508
1658
  * after upgrade, which is correct and safe.
@@ -1515,16 +1665,19 @@ export class SyncEngineLevel {
1515
1665
  try {
1516
1666
  const parsed = JSON.parse(raw);
1517
1667
  if (parsed && typeof parsed === 'object' &&
1518
- typeof parsed.streamId === 'string' &&
1519
- typeof parsed.epoch === 'string' &&
1520
- typeof parsed.position === 'string' &&
1521
- typeof parsed.messageCid === 'string') {
1668
+ typeof parsed.streamId === 'string' && parsed.streamId.length > 0 &&
1669
+ typeof parsed.epoch === 'string' && parsed.epoch.length > 0 &&
1670
+ typeof parsed.position === 'string' && parsed.position.length > 0 &&
1671
+ typeof parsed.messageCid === 'string' && parsed.messageCid.length > 0) {
1522
1672
  return parsed;
1523
1673
  }
1524
1674
  }
1525
1675
  catch (_a) {
1526
- // Not valid JSON (old string cursor) — treat as absent.
1676
+ // Not valid JSON (old string cursor) — fall through to delete.
1527
1677
  }
1678
+ // Entry exists but is unparseable or has invalid/empty fields. Delete it
1679
+ // so subsequent startups don't re-check it on every launch.
1680
+ yield this.deleteLegacyCursor(key);
1528
1681
  return undefined;
1529
1682
  }
1530
1683
  catch (error) {
@@ -1536,10 +1689,21 @@ export class SyncEngineLevel {
1536
1689
  }
1537
1690
  });
1538
1691
  }
1539
- setCursor(key, cursor) {
1692
+ /**
1693
+ * Delete a legacy cursor from the old syncCursors sublevel.
1694
+ * Called as part of one-time migration to ReplicationLedger.
1695
+ */
1696
+ deleteLegacyCursor(key) {
1540
1697
  return __awaiter(this, void 0, void 0, function* () {
1541
1698
  const cursors = this._db.sublevel('syncCursors');
1542
- yield cursors.put(key, JSON.stringify(cursor));
1699
+ try {
1700
+ yield cursors.del(key);
1701
+ }
1702
+ catch (_a) {
1703
+ // Best-effort — ignore LEVEL_NOT_FOUND and transient I/O errors alike.
1704
+ // A failed delete leaves the bad entry for one more re-check on the
1705
+ // next startup, which is harmless.
1706
+ }
1543
1707
  });
1544
1708
  }
1545
1709
  // ---------------------------------------------------------------------------
@@ -1555,8 +1719,11 @@ export class SyncEngineLevel {
1555
1719
  return undefined;
1556
1720
  }
1557
1721
  // Check for inline base64url-encoded data (small records from EventLog).
1722
+ // Delete the transport-level field so the DWN schema validator does not
1723
+ // reject the message for having unevaluated properties.
1558
1724
  const encodedData = event.message.encodedData;
1559
1725
  if (encodedData) {
1726
+ delete event.message.encodedData;
1560
1727
  const bytes = Encoder.base64UrlToBytes(encodedData);
1561
1728
  return new ReadableStream({
1562
1729
  start(controller) {
@@ -2026,4 +2193,6 @@ SyncEngineLevel.MAX_BACKOFF_MULTIPLIER = 4;
2026
2193
  SyncEngineLevel.MAX_REPAIR_ATTEMPTS = 3;
2027
2194
  /** Backoff schedule for repair retries (milliseconds). */
2028
2195
  SyncEngineLevel.REPAIR_BACKOFF_MS = [1000, 3000, 10000];
2196
+ /** Push retry backoff schedule: immediate, 250ms, 1s, 2s, then give up. */
2197
+ SyncEngineLevel.PUSH_RETRY_BACKOFF_MS = [0, 250, 1000, 2000];
2029
2198
  //# sourceMappingURL=sync-engine-level.js.map