@enbox/agent 0.5.9 → 0.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.mjs +9 -9
- package/dist/browser.mjs.map +4 -4
- package/dist/esm/dwn-api.js.map +1 -1
- package/dist/esm/dwn-record-upgrade.js +1 -1
- package/dist/esm/dwn-record-upgrade.js.map +1 -1
- package/dist/esm/index.js +4 -0
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/sync-closure-resolver.js +855 -0
- package/dist/esm/sync-closure-resolver.js.map +1 -0
- package/dist/esm/sync-closure-types.js +189 -0
- package/dist/esm/sync-closure-types.js.map +1 -0
- package/dist/esm/sync-engine-level.js +977 -224
- package/dist/esm/sync-engine-level.js.map +1 -1
- package/dist/esm/sync-messages.js +19 -5
- package/dist/esm/sync-messages.js.map +1 -1
- package/dist/esm/sync-replication-ledger.js +220 -0
- package/dist/esm/sync-replication-ledger.js.map +1 -0
- package/dist/esm/types/sync.js +54 -1
- package/dist/esm/types/sync.js.map +1 -1
- package/dist/types/dwn-api.d.ts.map +1 -1
- package/dist/types/index.d.ts +5 -0
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/sync-closure-resolver.d.ts +19 -0
- package/dist/types/sync-closure-resolver.d.ts.map +1 -0
- package/dist/types/sync-closure-types.d.ts +122 -0
- package/dist/types/sync-closure-types.d.ts.map +1 -0
- package/dist/types/sync-engine-level.d.ts +137 -11
- package/dist/types/sync-engine-level.d.ts.map +1 -1
- package/dist/types/sync-messages.d.ts +6 -1
- package/dist/types/sync-messages.d.ts.map +1 -1
- package/dist/types/sync-replication-ledger.d.ts +72 -0
- package/dist/types/sync-replication-ledger.d.ts.map +1 -0
- package/dist/types/types/sync.d.ts +188 -0
- package/dist/types/types/sync.d.ts.map +1 -1
- package/package.json +3 -3
- package/src/dwn-api.ts +2 -1
- package/src/dwn-record-upgrade.ts +1 -1
- package/src/index.ts +5 -0
- package/src/sync-closure-resolver.ts +919 -0
- package/src/sync-closure-types.ts +270 -0
- package/src/sync-engine-level.ts +1062 -255
- package/src/sync-messages.ts +21 -6
- package/src/sync-replication-ledger.ts +197 -0
- package/src/types/sync.ts +202 -0
package/src/sync-engine-level.ts
CHANGED
|
@@ -1,16 +1,22 @@
|
|
|
1
1
|
import type { AbstractLevel } from 'abstract-level';
|
|
2
2
|
|
|
3
3
|
import type { DwnSubscriptionHandler, ResubscribeFactory } from '@enbox/dwn-clients';
|
|
4
|
-
import type { GenericMessage, MessageEvent, MessagesSubscribeReply, MessagesSyncDiffEntry, MessagesSyncReply, StateIndex, SubscriptionMessage } from '@enbox/dwn-sdk-js';
|
|
4
|
+
import type { GenericMessage, MessageEvent, MessagesSubscribeReply, MessagesSyncDiffEntry, MessagesSyncReply, ProgressToken, StateIndex, SubscriptionMessage } from '@enbox/dwn-sdk-js';
|
|
5
5
|
|
|
6
6
|
import ms from 'ms';
|
|
7
7
|
|
|
8
8
|
import { Level } from 'level';
|
|
9
9
|
import { Encoder, hashToHex, initDefaultHashes, Message } from '@enbox/dwn-sdk-js';
|
|
10
10
|
|
|
11
|
+
import type { ClosureEvaluationContext } from './sync-closure-types.js';
|
|
11
12
|
import type { PermissionsApi } from './types/permissions.js';
|
|
12
13
|
import type { EnboxAgent, EnboxPlatformAgent } from './types/agent.js';
|
|
13
|
-
import type { StartSyncParams, SyncConnectivityState, SyncEngine, SyncIdentityOptions, SyncMode } from './types/sync.js';
|
|
14
|
+
import type { PushResult, ReplicationLinkState, StartSyncParams, SyncConnectivityState, SyncEngine, SyncEvent, SyncEventListener, SyncIdentityOptions, SyncMode, SyncScope } from './types/sync.js';
|
|
15
|
+
|
|
16
|
+
import { evaluateClosure } from './sync-closure-resolver.js';
|
|
17
|
+
import { MAX_PENDING_TOKENS } from './types/sync.js';
|
|
18
|
+
import { ReplicationLedger } from './sync-replication-ledger.js';
|
|
19
|
+
import { createClosureContext, invalidateClosureCache } from './sync-closure-types.js';
|
|
14
20
|
|
|
15
21
|
import { AgentPermissionsApi } from './permissions-api.js';
|
|
16
22
|
import { DwnInterface } from './types/dwn.js';
|
|
@@ -40,56 +46,6 @@ const MAX_DIFF_DEPTH = 16;
|
|
|
40
46
|
const BATCHED_DIFF_DEPTH = 8;
|
|
41
47
|
|
|
42
48
|
/**
|
|
43
|
-
* Maximum number of concurrent remote HTTP requests during a tree diff.
|
|
44
|
-
* The binary tree walk fans out in parallel — without a limit, depth N
|
|
45
|
-
* produces 2^N concurrent requests, which can exhaust server rate limits.
|
|
46
|
-
*/
|
|
47
|
-
const REMOTE_CONCURRENCY = 4;
|
|
48
|
-
|
|
49
|
-
/**
|
|
50
|
-
* Counting semaphore for bounding concurrent async operations.
|
|
51
|
-
* Used by the tree walk to limit in-flight remote HTTP requests.
|
|
52
|
-
*/
|
|
53
|
-
class Semaphore {
|
|
54
|
-
private _permits: number;
|
|
55
|
-
private readonly _waiting: (() => void)[] = [];
|
|
56
|
-
|
|
57
|
-
constructor(permits: number) {
|
|
58
|
-
this._permits = permits;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
/** Wait until a permit is available, then consume one. */
|
|
62
|
-
async acquire(): Promise<void> {
|
|
63
|
-
if (this._permits > 0) {
|
|
64
|
-
this._permits--;
|
|
65
|
-
return;
|
|
66
|
-
}
|
|
67
|
-
return new Promise<void>((resolve) => {
|
|
68
|
-
this._waiting.push(resolve);
|
|
69
|
-
});
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/** Release a permit, waking the next waiter if any. */
|
|
73
|
-
release(): void {
|
|
74
|
-
const next = this._waiting.shift();
|
|
75
|
-
if (next) {
|
|
76
|
-
next();
|
|
77
|
-
} else {
|
|
78
|
-
this._permits++;
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
/** Acquire a permit, run the task, then release regardless of outcome. */
|
|
83
|
-
async run<T>(fn: () => Promise<T>): Promise<T> {
|
|
84
|
-
await this.acquire();
|
|
85
|
-
try {
|
|
86
|
-
return await fn();
|
|
87
|
-
} finally {
|
|
88
|
-
this.release();
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
49
|
/**
|
|
94
50
|
* Key for the subscription cursor sublevel. Cursors are keyed by
|
|
95
51
|
* `{did}^{dwnUrl}[^{protocol}]` and store an opaque EventLog cursor string.
|
|
@@ -120,6 +76,80 @@ type LocalSubscription = {
|
|
|
120
76
|
close: () => Promise<void>;
|
|
121
77
|
};
|
|
122
78
|
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
// Per-link in-memory delivery-order tracking (not persisted to ledger)
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Tracks an in-flight delivery that has been started but may not yet be
|
|
85
|
+
* durably committed. Used by the pull path to handle async completion
|
|
86
|
+
* reordering — subscription callbacks are fire-and-forget, so event B
|
|
87
|
+
* can complete before event A even though A was delivered first.
|
|
88
|
+
*/
|
|
89
|
+
type InFlightCommit = {
|
|
90
|
+
/** Monotonic delivery ordinal for this link. */
|
|
91
|
+
ordinal: number;
|
|
92
|
+
/** The token associated with this delivery. */
|
|
93
|
+
token: ProgressToken;
|
|
94
|
+
/** Whether processRawMessage has completed successfully. */
|
|
95
|
+
committed: boolean;
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Checks whether a message's protocolPath and contextId match the link's
|
|
100
|
+
* subset scope prefixes. Returns true if the message is in scope.
|
|
101
|
+
*
|
|
102
|
+
* When the scope has no prefixes (or is kind:'full'), all messages match.
|
|
103
|
+
* When protocolPathPrefixes or contextIdPrefixes are specified, the message
|
|
104
|
+
* must match at least one prefix in each specified set.
|
|
105
|
+
*
|
|
106
|
+
* This is agent-side filtering for subset scopes. The underlying
|
|
107
|
+
* MessagesSubscribe filter only supports protocol-level scoping today —
|
|
108
|
+
* protocolPath/contextId prefix filtering at the EventLog level is a
|
|
109
|
+
* follow-up (requires dwn-sdk-js MessagesFilter extension).
|
|
110
|
+
*/
|
|
111
|
+
function isEventInScope(message: GenericMessage, scope: SyncScope): boolean {
|
|
112
|
+
if (scope.kind === 'full') { return true; }
|
|
113
|
+
if (!scope.protocolPathPrefixes && !scope.contextIdPrefixes) { return true; }
|
|
114
|
+
|
|
115
|
+
const desc = message.descriptor as Record<string, unknown>;
|
|
116
|
+
|
|
117
|
+
// Check protocolPath prefix.
|
|
118
|
+
if (scope.protocolPathPrefixes && scope.protocolPathPrefixes.length > 0) {
|
|
119
|
+
const protocolPath = desc.protocolPath as string | undefined;
|
|
120
|
+
if (!protocolPath) { return false; }
|
|
121
|
+
const matches = scope.protocolPathPrefixes.some(
|
|
122
|
+
prefix => protocolPath === prefix || protocolPath.startsWith(prefix + '/')
|
|
123
|
+
);
|
|
124
|
+
if (!matches) { return false; }
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Check contextId prefix.
|
|
128
|
+
if (scope.contextIdPrefixes && scope.contextIdPrefixes.length > 0) {
|
|
129
|
+
const contextId = (message as any).contextId as string | undefined;
|
|
130
|
+
if (!contextId) { return false; }
|
|
131
|
+
const matches = scope.contextIdPrefixes.some(
|
|
132
|
+
prefix => contextId === prefix || contextId.startsWith(prefix + '/')
|
|
133
|
+
);
|
|
134
|
+
if (!matches) { return false; }
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return true;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Per-link runtime state held in memory. Not persisted — on crash,
|
|
142
|
+
* replay restarts from `contiguousAppliedToken` (idempotent apply).
|
|
143
|
+
*/
|
|
144
|
+
type LinkRuntimeState = {
|
|
145
|
+
/** Next ordinal to assign when a pull event is delivered. */
|
|
146
|
+
nextDeliveryOrdinal: number;
|
|
147
|
+
/** Next ordinal to check when draining committed entries. */
|
|
148
|
+
nextCommitOrdinal: number;
|
|
149
|
+
/** In-flight deliveries keyed by ordinal. */
|
|
150
|
+
inflight: Map<number, InFlightCommit>;
|
|
151
|
+
};
|
|
152
|
+
|
|
123
153
|
export class SyncEngineLevel implements SyncEngine {
|
|
124
154
|
/**
|
|
125
155
|
* Holds the instance of a `EnboxPlatformAgent` that represents the current execution context for
|
|
@@ -138,6 +168,29 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
138
168
|
private _syncIntervalId?: ReturnType<typeof setInterval>;
|
|
139
169
|
private _syncLock = false;
|
|
140
170
|
|
|
171
|
+
/**
|
|
172
|
+
* Durable replication ledger — persists per-link checkpoint state.
|
|
173
|
+
* Used by live sync to track pull/push progression independently per link.
|
|
174
|
+
* Poll-mode sync still uses the legacy `getCursor`/`setCursor` path.
|
|
175
|
+
* Lazily initialized on first use to avoid sublevel() calls on mock dbs.
|
|
176
|
+
*/
|
|
177
|
+
private _ledger?: ReplicationLedger;
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* In-memory cache of active links, keyed by `{did}^{dwnUrl}^{protocol}`.
|
|
181
|
+
* Populated from the ledger on `startLiveSync`, used by subscription handlers
|
|
182
|
+
* to avoid async ledger lookups on every event.
|
|
183
|
+
*/
|
|
184
|
+
private _activeLinks: Map<string, ReplicationLinkState> = new Map();
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Per-link in-memory delivery-order tracking for the pull path. Keyed by
|
|
188
|
+
* the same link key as `_activeLinks`. Not persisted — on crash, replay
|
|
189
|
+
* restarts from `contiguousAppliedToken` and idempotent apply handles
|
|
190
|
+
* re-delivered events.
|
|
191
|
+
*/
|
|
192
|
+
private _linkRuntimes: Map<string, LinkRuntimeState> = new Map();
|
|
193
|
+
|
|
141
194
|
/**
|
|
142
195
|
* Hex-encoded default hashes for empty subtrees at each depth, keyed by depth.
|
|
143
196
|
* Lazily initialized on first use. Used by `walkTreeDiff` to detect empty subtrees
|
|
@@ -152,6 +205,14 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
152
205
|
/** Current sync mode, set by `startSync`. */
|
|
153
206
|
private _syncMode: SyncMode = 'poll';
|
|
154
207
|
|
|
208
|
+
/**
|
|
209
|
+
* Monotonic session generation counter. Incremented on every teardown.
|
|
210
|
+
* Async operations (repair, retry timers) capture the generation at start
|
|
211
|
+
* and bail if it has changed — this prevents stale work from mutating
|
|
212
|
+
* state after teardown or mode switch.
|
|
213
|
+
*/
|
|
214
|
+
private _syncGeneration = 0;
|
|
215
|
+
|
|
155
216
|
/** Active live pull subscriptions (remote -> local via MessagesSubscribe). */
|
|
156
217
|
private _liveSubscriptions: LiveSubscription[] = [];
|
|
157
218
|
|
|
@@ -164,8 +225,35 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
164
225
|
/** Debounce timer for batched push-on-write. */
|
|
165
226
|
private _pushDebounceTimer?: ReturnType<typeof setTimeout>;
|
|
166
227
|
|
|
167
|
-
/**
|
|
168
|
-
private
|
|
228
|
+
/** Registered event listeners for observability. */
|
|
229
|
+
private _eventListeners: Set<SyncEventListener> = new Set();
|
|
230
|
+
|
|
231
|
+
/** Entry in the pending push queue — a message CID with its local EventLog token. */
|
|
232
|
+
private _pendingPushCids: Map<string, {
|
|
233
|
+
did: string; dwnUrl: string; delegateDid?: string; protocol?: string;
|
|
234
|
+
entries: { cid: string; localToken?: ProgressToken }[];
|
|
235
|
+
}> = new Map();
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* CIDs recently received via pull subscription, keyed by `cid|dwnUrl` to
|
|
239
|
+
* scope suppression per remote endpoint. A message pulled from Provider A
|
|
240
|
+
* is only suppressed for push back to Provider A — it still fans out to
|
|
241
|
+
* Provider B and C. TTL: 60 seconds. Cap: 10,000 entries.
|
|
242
|
+
*/
|
|
243
|
+
private _recentlyPulledCids: Map<string, number> = new Map();
|
|
244
|
+
|
|
245
|
+
/** TTL for echo-loop suppression entries (60 seconds). */
|
|
246
|
+
private static readonly ECHO_SUPPRESS_TTL_MS = 60_000;
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Per-tenant closure evaluation contexts for the current live sync session.
|
|
250
|
+
* Caches ProtocolsConfigure and grant lookups across events for the same
|
|
251
|
+
* tenant. Keyed by tenantDid to prevent cross-tenant cache pollution.
|
|
252
|
+
*/
|
|
253
|
+
private _closureContexts: Map<string, ClosureEvaluationContext> = new Map();
|
|
254
|
+
|
|
255
|
+
/** Maximum entries in the echo-loop suppression cache. */
|
|
256
|
+
private static readonly ECHO_SUPPRESS_MAX_ENTRIES = 10_000;
|
|
169
257
|
|
|
170
258
|
/** Count of consecutive SMT sync failures (for backoff in poll mode). */
|
|
171
259
|
private _consecutiveFailures = 0;
|
|
@@ -182,6 +270,14 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
182
270
|
this._db = (db) ? db : new Level<string, string>(dataPath ?? 'DATA/AGENT/SYNC_STORE');
|
|
183
271
|
}
|
|
184
272
|
|
|
273
|
+
/** Lazy accessor for the replication ledger. */
|
|
274
|
+
private get ledger(): ReplicationLedger {
|
|
275
|
+
if (!this._ledger) {
|
|
276
|
+
this._ledger = new ReplicationLedger(this._db);
|
|
277
|
+
}
|
|
278
|
+
return this._ledger;
|
|
279
|
+
}
|
|
280
|
+
|
|
185
281
|
/**
|
|
186
282
|
* Retrieves the `EnboxPlatformAgent` execution context.
|
|
187
283
|
*
|
|
@@ -202,7 +298,39 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
202
298
|
}
|
|
203
299
|
|
|
204
300
|
get connectivityState(): SyncConnectivityState {
|
|
205
|
-
|
|
301
|
+
// Aggregate per-link connectivity: if any link is online, report online.
|
|
302
|
+
// If all are offline, report offline. If all unknown, report unknown.
|
|
303
|
+
// Falls back to the global _connectivityState for poll-mode (no active links).
|
|
304
|
+
if (this._activeLinks.size === 0) {
|
|
305
|
+
return this._connectivityState;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
let hasOnline = false;
|
|
309
|
+
let hasOffline = false;
|
|
310
|
+
for (const link of this._activeLinks.values()) {
|
|
311
|
+
if (link.connectivity === 'online') { hasOnline = true; }
|
|
312
|
+
if (link.connectivity === 'offline') { hasOffline = true; }
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
if (hasOnline) { return 'online'; }
|
|
316
|
+
if (hasOffline) { return 'offline'; }
|
|
317
|
+
return 'unknown';
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
public on(listener: SyncEventListener): () => void {
|
|
321
|
+
this._eventListeners.add(listener);
|
|
322
|
+
return (): void => { this._eventListeners.delete(listener); };
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
/** Emit a sync event to all registered listeners. */
|
|
326
|
+
private emitEvent(event: SyncEvent): void {
|
|
327
|
+
for (const listener of this._eventListeners) {
|
|
328
|
+
try {
|
|
329
|
+
listener(event);
|
|
330
|
+
} catch {
|
|
331
|
+
// Don't let listener errors propagate into sync engine logic.
|
|
332
|
+
}
|
|
333
|
+
}
|
|
206
334
|
}
|
|
207
335
|
|
|
208
336
|
public async clear(): Promise<void> {
|
|
@@ -490,14 +618,73 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
490
618
|
console.error('SyncEngineLevel: Error during initial live-sync catch-up', error);
|
|
491
619
|
}
|
|
492
620
|
|
|
493
|
-
// Step 2:
|
|
621
|
+
// Step 2: Initialize replication links and open live subscriptions.
|
|
494
622
|
const syncTargets = await this.getSyncTargets();
|
|
495
623
|
for (const target of syncTargets) {
|
|
624
|
+
let link: ReplicationLinkState | undefined;
|
|
496
625
|
try {
|
|
626
|
+
// Get or create the link in the durable ledger.
|
|
627
|
+
// Use protocol-scoped scope when a protocol is specified, otherwise full-tenant.
|
|
628
|
+
const linkScope: SyncScope = target.protocol
|
|
629
|
+
? { kind: 'protocol', protocol: target.protocol }
|
|
630
|
+
: { kind: 'full' };
|
|
631
|
+
link = await this.ledger.getOrCreateLink({
|
|
632
|
+
tenantDid : target.did,
|
|
633
|
+
remoteEndpoint : target.dwnUrl,
|
|
634
|
+
scope : linkScope,
|
|
635
|
+
delegateDid : target.delegateDid,
|
|
636
|
+
protocol : target.protocol,
|
|
637
|
+
});
|
|
638
|
+
|
|
639
|
+
// Cache the link for fast access by subscription handlers.
|
|
640
|
+
const linkKey = this.buildCursorKey(target.did, target.dwnUrl, target.protocol);
|
|
641
|
+
this._activeLinks.set(linkKey, link);
|
|
642
|
+
|
|
643
|
+
// Open subscriptions — only transition to live if both succeed.
|
|
644
|
+
// If pull succeeds but push fails, close the pull subscription to
|
|
645
|
+
// avoid a resource leak with inconsistent state.
|
|
497
646
|
await this.openLivePullSubscription(target);
|
|
498
|
-
|
|
647
|
+
try {
|
|
648
|
+
await this.openLocalPushSubscription(target);
|
|
649
|
+
} catch (pushError) {
|
|
650
|
+
// Close the already-opened pull subscription.
|
|
651
|
+
const pullSub = this._liveSubscriptions.find(
|
|
652
|
+
s => s.did === target.did && s.dwnUrl === target.dwnUrl && s.protocol === target.protocol
|
|
653
|
+
);
|
|
654
|
+
if (pullSub) {
|
|
655
|
+
try { await pullSub.close(); } catch { /* best effort */ }
|
|
656
|
+
this._liveSubscriptions = this._liveSubscriptions.filter(s => s !== pullSub);
|
|
657
|
+
}
|
|
658
|
+
throw pushError;
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
this.emitEvent({ type: 'link:status-change', tenantDid: target.did, remoteEndpoint: target.dwnUrl, protocol: target.protocol, from: 'initializing', to: 'live' });
|
|
662
|
+
await this.ledger.setStatus(link!, 'live');
|
|
499
663
|
} catch (error: any) {
|
|
664
|
+
const linkKey = this.buildCursorKey(target.did, target.dwnUrl, target.protocol);
|
|
665
|
+
|
|
666
|
+
// Detect ProgressGap (410) — the cursor is stale, link needs SMT repair.
|
|
667
|
+
if ((error as any).isProgressGap && link) {
|
|
668
|
+
console.warn(`SyncEngineLevel: ProgressGap detected for ${target.did} -> ${target.dwnUrl}, initiating repair`);
|
|
669
|
+
this.emitEvent({ type: 'gap:detected', tenantDid: target.did, remoteEndpoint: target.dwnUrl, protocol: target.protocol, reason: 'ProgressGap' });
|
|
670
|
+
const gapInfo = (error as any).gapInfo;
|
|
671
|
+
await this.transitionToRepairing(linkKey, link, {
|
|
672
|
+
resumeToken: gapInfo?.latestAvailable,
|
|
673
|
+
});
|
|
674
|
+
continue;
|
|
675
|
+
}
|
|
676
|
+
|
|
500
677
|
console.error(`SyncEngineLevel: Failed to open live subscription for ${target.did} -> ${target.dwnUrl}`, error);
|
|
678
|
+
|
|
679
|
+
// Clean up in-memory state for the failed link so it doesn't appear
|
|
680
|
+
// active to later code. The durable link remains at 'initializing'.
|
|
681
|
+
this._activeLinks.delete(linkKey);
|
|
682
|
+
this._linkRuntimes.delete(linkKey);
|
|
683
|
+
|
|
684
|
+
// Recompute connectivity — if no live subscriptions remain, reset to unknown.
|
|
685
|
+
if (this._liveSubscriptions.length === 0) {
|
|
686
|
+
this._connectivityState = 'unknown';
|
|
687
|
+
}
|
|
501
688
|
}
|
|
502
689
|
}
|
|
503
690
|
|
|
@@ -517,10 +704,409 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
517
704
|
this._syncIntervalId = setInterval(integrityCheck, intervalMilliseconds);
|
|
518
705
|
}
|
|
519
706
|
|
|
707
|
+
/**
|
|
708
|
+
* Get or create the runtime state for a link.
|
|
709
|
+
*/
|
|
710
|
+
private getOrCreateRuntime(linkKey: string): LinkRuntimeState {
|
|
711
|
+
let rt = this._linkRuntimes.get(linkKey);
|
|
712
|
+
if (!rt) {
|
|
713
|
+
rt = { nextDeliveryOrdinal: 0, nextCommitOrdinal: 0, inflight: new Map() };
|
|
714
|
+
this._linkRuntimes.set(linkKey, rt);
|
|
715
|
+
}
|
|
716
|
+
return rt;
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
/**
|
|
720
|
+
* Drain contiguously committed ordinals from the runtime state, advancing
|
|
721
|
+
* the link's pull checkpoint for each drained entry. Returns the number of
|
|
722
|
+
* entries drained (0 if the next ordinal is not yet committed).
|
|
723
|
+
*/
|
|
724
|
+
private drainCommittedPull(linkKey: string): number {
|
|
725
|
+
const rt = this._linkRuntimes.get(linkKey);
|
|
726
|
+
const link = this._activeLinks.get(linkKey);
|
|
727
|
+
if (!rt || !link) { return 0; }
|
|
728
|
+
|
|
729
|
+
let drained = 0;
|
|
730
|
+
while (true) {
|
|
731
|
+
const entry = rt.inflight.get(rt.nextCommitOrdinal);
|
|
732
|
+
if (!entry || !entry.committed) { break; }
|
|
733
|
+
|
|
734
|
+
// This ordinal is committed — advance the durable checkpoint.
|
|
735
|
+
ReplicationLedger.commitContiguousToken(link.pull, entry.token);
|
|
736
|
+
ReplicationLedger.setReceivedToken(link.pull, entry.token);
|
|
737
|
+
rt.inflight.delete(rt.nextCommitOrdinal);
|
|
738
|
+
rt.nextCommitOrdinal++;
|
|
739
|
+
drained++;
|
|
740
|
+
// Note: checkpoint:pull-advance event is emitted AFTER saveLink succeeds
|
|
741
|
+
// in the caller, not here. "Advanced" means durably persisted.
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
return drained;
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
// ---------------------------------------------------------------------------
|
|
748
|
+
// Per-link repair and degraded-poll orchestration (Phase 2)
|
|
749
|
+
// ---------------------------------------------------------------------------
|
|
750
|
+
|
|
751
|
+
/** Maximum consecutive repair attempts before falling back to degraded_poll. */
|
|
752
|
+
private static readonly MAX_REPAIR_ATTEMPTS = 3;
|
|
753
|
+
|
|
754
|
+
/** Per-link degraded-poll interval timers. */
|
|
755
|
+
private _degradedPollTimers: Map<string, ReturnType<typeof setInterval>> = new Map();
|
|
756
|
+
|
|
757
|
+
/** Per-link repair attempt counters. */
|
|
758
|
+
private _repairAttempts: Map<string, number> = new Map();
|
|
759
|
+
|
|
760
|
+
/** Per-link active repair promises — prevents concurrent repair for the same link. */
|
|
761
|
+
private _activeRepairs: Map<string, Promise<void>> = new Map();
|
|
762
|
+
|
|
763
|
+
/** Per-link retry timers for failed repairs below max attempts. */
|
|
764
|
+
private _repairRetryTimers: Map<string, ReturnType<typeof setTimeout>> = new Map();
|
|
765
|
+
|
|
766
|
+
/** Backoff schedule for repair retries (milliseconds). */
|
|
767
|
+
private static readonly REPAIR_BACKOFF_MS = [1_000, 3_000, 10_000];
|
|
768
|
+
|
|
769
|
+
/**
|
|
770
|
+
* Per-link repair context — stores ProgressGap metadata for use during
|
|
771
|
+
* repair. The `resumeToken` (from `gapInfo.latestAvailable`) is used as
|
|
772
|
+
* the post-repair checkpoint so the reopened subscription replays from
|
|
773
|
+
* a valid boundary instead of starting live-only.
|
|
774
|
+
*/
|
|
775
|
+
private _repairContext: Map<string, { resumeToken?: ProgressToken }> = new Map();
|
|
776
|
+
|
|
777
|
+
/**
|
|
778
|
+
* Central helper for transitioning a link to `repairing`. Encapsulates:
|
|
779
|
+
* - status change
|
|
780
|
+
* - optional gap context storage
|
|
781
|
+
* - repair kick-off with retry scheduling on failure
|
|
782
|
+
*
|
|
783
|
+
* All code paths that set `repairing` should go through this helper to
|
|
784
|
+
* guarantee a future retry path.
|
|
785
|
+
*/
|
|
786
|
+
private async transitionToRepairing(
|
|
787
|
+
linkKey: string,
|
|
788
|
+
link: ReplicationLinkState,
|
|
789
|
+
options?: { resumeToken?: ProgressToken },
|
|
790
|
+
): Promise<void> {
|
|
791
|
+
const prevStatus = link.status;
|
|
792
|
+
const prevConnectivity = link.connectivity;
|
|
793
|
+
link.connectivity = 'offline';
|
|
794
|
+
await this.ledger.setStatus(link, 'repairing');
|
|
795
|
+
|
|
796
|
+
this.emitEvent({ type: 'link:status-change', tenantDid: link.tenantDid, remoteEndpoint: link.remoteEndpoint, protocol: link.protocol, from: prevStatus, to: 'repairing' });
|
|
797
|
+
if (prevConnectivity !== 'offline') {
|
|
798
|
+
this.emitEvent({ type: 'link:connectivity-change', tenantDid: link.tenantDid, remoteEndpoint: link.remoteEndpoint, protocol: link.protocol, from: prevConnectivity, to: 'offline' });
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
if (options?.resumeToken) {
|
|
802
|
+
this._repairContext.set(linkKey, { resumeToken: options.resumeToken });
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
// Clear runtime ordinals immediately — stale state must not linger
|
|
806
|
+
// across repair attempts.
|
|
807
|
+
const rt = this._linkRuntimes.get(linkKey);
|
|
808
|
+
if (rt) {
|
|
809
|
+
rt.inflight.clear();
|
|
810
|
+
rt.nextCommitOrdinal = rt.nextDeliveryOrdinal;
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
// Kick off repair with retry scheduling on failure.
|
|
814
|
+
void this.repairLink(linkKey).catch(() => {
|
|
815
|
+
this.scheduleRepairRetry(linkKey);
|
|
816
|
+
});
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
/**
|
|
820
|
+
* Schedule a retry for a failed repair. Uses exponential backoff.
|
|
821
|
+
* No-op if the link is already in `degraded_poll` (timer loop owns retries)
|
|
822
|
+
* or if a retry is already scheduled.
|
|
823
|
+
*/
|
|
824
|
+
private scheduleRepairRetry(linkKey: string): void {
|
|
825
|
+
// Don't schedule if already in degraded_poll or retry pending.
|
|
826
|
+
const link = this._activeLinks.get(linkKey);
|
|
827
|
+
if (!link || link.status === 'degraded_poll') { return; }
|
|
828
|
+
if (this._repairRetryTimers.has(linkKey)) { return; }
|
|
829
|
+
|
|
830
|
+
// attempts is already post-increment from doRepairLink, so subtract 1
|
|
831
|
+
// for the backoff index: first failure (attempts=1) → backoff[0]=1s.
|
|
832
|
+
const attempts = this._repairAttempts.get(linkKey) ?? 1;
|
|
833
|
+
const backoff = SyncEngineLevel.REPAIR_BACKOFF_MS;
|
|
834
|
+
const delayMs = backoff[Math.min(attempts - 1, backoff.length - 1)];
|
|
835
|
+
|
|
836
|
+
const timerGeneration = this._syncGeneration;
|
|
837
|
+
const timer = setTimeout(async (): Promise<void> => {
|
|
838
|
+
this._repairRetryTimers.delete(linkKey);
|
|
839
|
+
|
|
840
|
+
// Bail if teardown occurred since this timer was scheduled.
|
|
841
|
+
if (this._syncGeneration !== timerGeneration) { return; }
|
|
842
|
+
|
|
843
|
+
// Verify link still exists and is still repairing.
|
|
844
|
+
const currentLink = this._activeLinks.get(linkKey);
|
|
845
|
+
if (!currentLink || currentLink.status !== 'repairing') { return; }
|
|
846
|
+
|
|
847
|
+
try {
|
|
848
|
+
await this.repairLink(linkKey);
|
|
849
|
+
} catch {
|
|
850
|
+
// repairLink handles max attempts → degraded_poll internally.
|
|
851
|
+
// If still below max, schedule another retry.
|
|
852
|
+
if (currentLink.status === 'repairing') {
|
|
853
|
+
this.scheduleRepairRetry(linkKey);
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
}, delayMs);
|
|
857
|
+
|
|
858
|
+
this._repairRetryTimers.set(linkKey, timer);
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
/**
|
|
862
|
+
* Repair a single link. Deduplicates concurrent calls via `_activeRepairs`.
|
|
863
|
+
* If repair is already running for this link, returns the existing promise.
|
|
864
|
+
*/
|
|
865
|
+
private repairLink(linkKey: string): Promise<void> {
|
|
866
|
+
const existing = this._activeRepairs.get(linkKey);
|
|
867
|
+
if (existing) { return existing; }
|
|
868
|
+
|
|
869
|
+
const promise = this.doRepairLink(linkKey).finally(() => {
|
|
870
|
+
this._activeRepairs.delete(linkKey);
|
|
871
|
+
});
|
|
872
|
+
this._activeRepairs.set(linkKey, promise);
|
|
873
|
+
return promise;
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
/**
|
|
877
|
+
* Internal repair implementation. Runs SMT set reconciliation for a single
|
|
878
|
+
* link, then attempts to re-establish live subscriptions. If repair succeeds,
|
|
879
|
+
* transitions to `live`. If it fails, throws so callers (degraded_poll timer,
|
|
880
|
+
* startup) can handle retry scheduling.
|
|
881
|
+
*/
|
|
882
|
+
private async doRepairLink(linkKey: string): Promise<void> {
|
|
883
|
+
const link = this._activeLinks.get(linkKey);
|
|
884
|
+
if (!link) { return; }
|
|
885
|
+
|
|
886
|
+
// Capture the sync generation at repair start. If teardown occurs during
|
|
887
|
+
// any await, the generation will have incremented and we bail before
|
|
888
|
+
// mutating state — preventing the race where repair continues after teardown.
|
|
889
|
+
const generation = this._syncGeneration;
|
|
890
|
+
|
|
891
|
+
const { tenantDid: did, remoteEndpoint: dwnUrl, delegateDid, protocol } = link;
|
|
892
|
+
|
|
893
|
+
this.emitEvent({ type: 'repair:started', tenantDid: did, remoteEndpoint: dwnUrl, protocol, attempt: (this._repairAttempts.get(linkKey) ?? 0) + 1 });
|
|
894
|
+
const attempts = (this._repairAttempts.get(linkKey) ?? 0) + 1;
|
|
895
|
+
this._repairAttempts.set(linkKey, attempts);
|
|
896
|
+
|
|
897
|
+
// Step 1: Close existing subscriptions FIRST to stop old events from
|
|
898
|
+
// mutating local state while repair runs.
|
|
899
|
+
await this.closeLinkSubscriptions(link);
|
|
900
|
+
if (this._syncGeneration !== generation) { return; } // Teardown occurred.
|
|
901
|
+
|
|
902
|
+
// Step 2: Clear runtime ordinals immediately — stale state must not
|
|
903
|
+
// persist across repair attempts (successful or failed).
|
|
904
|
+
const rt = this.getOrCreateRuntime(linkKey);
|
|
905
|
+
rt.inflight.clear();
|
|
906
|
+
rt.nextDeliveryOrdinal = 0;
|
|
907
|
+
rt.nextCommitOrdinal = 0;
|
|
908
|
+
|
|
909
|
+
try {
|
|
910
|
+
// Step 3: Run SMT reconciliation for this link.
|
|
911
|
+
const localRoot = await this.getLocalRoot(did, delegateDid, protocol);
|
|
912
|
+
if (this._syncGeneration !== generation) { return; }
|
|
913
|
+
const remoteRoot = await this.getRemoteRoot(did, dwnUrl, delegateDid, protocol);
|
|
914
|
+
if (this._syncGeneration !== generation) { return; }
|
|
915
|
+
|
|
916
|
+
if (localRoot !== remoteRoot) {
|
|
917
|
+
const diff = await this.diffWithRemote({ did, dwnUrl, delegateDid, protocol });
|
|
918
|
+
if (this._syncGeneration !== generation) { return; }
|
|
919
|
+
|
|
920
|
+
if (diff.onlyRemote.length > 0) {
|
|
921
|
+
const prefetched: (MessagesSyncDiffEntry & { message: GenericMessage })[] = [];
|
|
922
|
+
const needsFetchCids: string[] = [];
|
|
923
|
+
for (const entry of diff.onlyRemote) {
|
|
924
|
+
if (!entry.message || (entry.message.descriptor.interface === 'Records' &&
|
|
925
|
+
entry.message.descriptor.method === 'Write' &&
|
|
926
|
+
(entry.message.descriptor as any).dataCid && !entry.encodedData)) {
|
|
927
|
+
needsFetchCids.push(entry.messageCid);
|
|
928
|
+
} else {
|
|
929
|
+
prefetched.push(entry as MessagesSyncDiffEntry & { message: GenericMessage });
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
await this.pullMessages({ did, dwnUrl, delegateDid, protocol, messageCids: needsFetchCids, prefetched });
|
|
933
|
+
if (this._syncGeneration !== generation) { return; }
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
if (diff.onlyLocal.length > 0) {
|
|
937
|
+
await this.pushMessages({ did, dwnUrl, delegateDid, protocol, messageCids: diff.onlyLocal });
|
|
938
|
+
if (this._syncGeneration !== generation) { return; }
|
|
939
|
+
}
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
// Step 4: Determine the post-repair resume token.
|
|
943
|
+
// - If repair was triggered by ProgressGap, use the stored resumeToken
|
|
944
|
+
// (from gapInfo.latestAvailable) so the reopened subscription replays
|
|
945
|
+
// from a valid boundary, closing the race window between SMT and resubscribe.
|
|
946
|
+
// - Otherwise, use the existing contiguousAppliedToken if still valid.
|
|
947
|
+
// - Push checkpoint is NOT reset during repair: push frontier tracks what
|
|
948
|
+
// the local EventLog has delivered to the remote. SMT repair handles
|
|
949
|
+
// pull-side convergence; push-side convergence is handled by the diff's
|
|
950
|
+
// onlyLocal push. The push checkpoint remains the local authority.
|
|
951
|
+
const repairCtx = this._repairContext.get(linkKey);
|
|
952
|
+
const resumeToken = repairCtx?.resumeToken ?? link.pull.contiguousAppliedToken;
|
|
953
|
+
ReplicationLedger.resetCheckpoint(link.pull, resumeToken);
|
|
954
|
+
await this.ledger.saveLink(link);
|
|
955
|
+
if (this._syncGeneration !== generation) { return; }
|
|
956
|
+
|
|
957
|
+
// Step 5: Reopen subscriptions with the repaired checkpoints.
|
|
958
|
+
const target = { did, dwnUrl, delegateDid, protocol };
|
|
959
|
+
await this.openLivePullSubscription(target);
|
|
960
|
+
if (this._syncGeneration !== generation) { return; }
|
|
961
|
+
try {
|
|
962
|
+
await this.openLocalPushSubscription({
|
|
963
|
+
...target,
|
|
964
|
+
pushCursor: link.push.contiguousAppliedToken,
|
|
965
|
+
});
|
|
966
|
+
} catch (pushError) {
|
|
967
|
+
const pullSub = this._liveSubscriptions.find(
|
|
968
|
+
s => s.did === did && s.dwnUrl === dwnUrl && s.protocol === protocol
|
|
969
|
+
);
|
|
970
|
+
if (pullSub) {
|
|
971
|
+
try { await pullSub.close(); } catch { /* best effort */ }
|
|
972
|
+
this._liveSubscriptions = this._liveSubscriptions.filter(s => s !== pullSub);
|
|
973
|
+
}
|
|
974
|
+
throw pushError;
|
|
975
|
+
}
|
|
976
|
+
if (this._syncGeneration !== generation) { return; }
|
|
977
|
+
|
|
978
|
+
// Step 6: Clean up repair context and transition to live.
|
|
979
|
+
this._repairContext.delete(linkKey);
|
|
980
|
+
this._repairAttempts.delete(linkKey);
|
|
981
|
+
const retryTimer = this._repairRetryTimers.get(linkKey);
|
|
982
|
+
if (retryTimer) { clearTimeout(retryTimer); this._repairRetryTimers.delete(linkKey); }
|
|
983
|
+
const prevRepairConnectivity = link.connectivity;
|
|
984
|
+
link.connectivity = 'online';
|
|
985
|
+
await this.ledger.setStatus(link, 'live');
|
|
986
|
+
this.emitEvent({ type: 'repair:completed', tenantDid: did, remoteEndpoint: dwnUrl, protocol });
|
|
987
|
+
if (prevRepairConnectivity !== 'online') {
|
|
988
|
+
this.emitEvent({ type: 'link:connectivity-change', tenantDid: did, remoteEndpoint: dwnUrl, protocol, from: prevRepairConnectivity, to: 'online' });
|
|
989
|
+
}
|
|
990
|
+
this.emitEvent({ type: 'link:status-change', tenantDid: did, remoteEndpoint: dwnUrl, protocol, from: 'repairing', to: 'live' });
|
|
991
|
+
|
|
992
|
+
} catch (error: any) {
|
|
993
|
+
// If teardown occurred during repair, don't retry or enter degraded_poll.
|
|
994
|
+
if (this._syncGeneration !== generation) { return; }
|
|
995
|
+
|
|
996
|
+
console.error(`SyncEngineLevel: Repair failed for ${did} -> ${dwnUrl} (attempt ${attempts})`, error);
|
|
997
|
+
this.emitEvent({ type: 'repair:failed', tenantDid: did, remoteEndpoint: dwnUrl, protocol, attempt: attempts, error: String(error.message ?? error) });
|
|
998
|
+
|
|
999
|
+
if (attempts >= SyncEngineLevel.MAX_REPAIR_ATTEMPTS) {
|
|
1000
|
+
console.warn(`SyncEngineLevel: Max repair attempts reached for ${did} -> ${dwnUrl}, entering degraded_poll`);
|
|
1001
|
+
await this.enterDegradedPoll(linkKey);
|
|
1002
|
+
return;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
// Re-throw so callers (degraded_poll timer) can handle retry scheduling.
|
|
1006
|
+
throw error;
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
/**
|
|
1011
|
+
* Close pull and push subscriptions for a specific link.
|
|
1012
|
+
*/
|
|
1013
|
+
private async closeLinkSubscriptions(link: ReplicationLinkState): Promise<void> {
|
|
1014
|
+
const { tenantDid: did, remoteEndpoint: dwnUrl, protocol } = link;
|
|
1015
|
+
|
|
1016
|
+
// Close pull subscription.
|
|
1017
|
+
const pullSub = this._liveSubscriptions.find(
|
|
1018
|
+
s => s.did === did && s.dwnUrl === dwnUrl && s.protocol === protocol
|
|
1019
|
+
);
|
|
1020
|
+
if (pullSub) {
|
|
1021
|
+
try { await pullSub.close(); } catch { /* best effort */ }
|
|
1022
|
+
this._liveSubscriptions = this._liveSubscriptions.filter(s => s !== pullSub);
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
// Close local push subscription.
|
|
1026
|
+
const pushSub = this._localSubscriptions.find(
|
|
1027
|
+
s => s.did === did && s.dwnUrl === dwnUrl && s.protocol === protocol
|
|
1028
|
+
);
|
|
1029
|
+
if (pushSub) {
|
|
1030
|
+
try { await pushSub.close(); } catch { /* best effort */ }
|
|
1031
|
+
this._localSubscriptions = this._localSubscriptions.filter(s => s !== pushSub);
|
|
1032
|
+
}
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
/**
|
|
1036
|
+
* Transition a link to `degraded_poll` and start a per-link polling timer.
|
|
1037
|
+
* The timer runs SMT reconciliation at a reduced frequency (30s with jitter)
|
|
1038
|
+
* and attempts to re-establish live subscriptions after each successful repair.
|
|
1039
|
+
*/
|
|
1040
|
+
private async enterDegradedPoll(linkKey: string): Promise<void> {
|
|
1041
|
+
const link = this._activeLinks.get(linkKey);
|
|
1042
|
+
if (!link) { return; }
|
|
1043
|
+
link.connectivity = 'offline';
|
|
1044
|
+
|
|
1045
|
+
const prevDegradedStatus = link.status;
|
|
1046
|
+
await this.ledger.setStatus(link, 'degraded_poll');
|
|
1047
|
+
this._repairAttempts.delete(linkKey);
|
|
1048
|
+
this.emitEvent({ type: 'link:status-change', tenantDid: link.tenantDid, remoteEndpoint: link.remoteEndpoint, protocol: link.protocol, from: prevDegradedStatus, to: 'degraded_poll' });
|
|
1049
|
+
this.emitEvent({ type: 'degraded-poll:entered', tenantDid: link.tenantDid, remoteEndpoint: link.remoteEndpoint, protocol: link.protocol });
|
|
1050
|
+
|
|
1051
|
+
// Clear any existing timer for this link.
|
|
1052
|
+
const existing = this._degradedPollTimers.get(linkKey);
|
|
1053
|
+
if (existing) { clearInterval(existing); }
|
|
1054
|
+
|
|
1055
|
+
// Schedule per-link polling with jitter (15-30 seconds).
|
|
1056
|
+
const baseInterval = 15_000;
|
|
1057
|
+
const jitter = Math.floor(Math.random() * 15_000);
|
|
1058
|
+
const interval = baseInterval + jitter;
|
|
1059
|
+
|
|
1060
|
+
const pollGeneration = this._syncGeneration;
|
|
1061
|
+
const timer = setInterval(async (): Promise<void> => {
|
|
1062
|
+
// Bail if teardown occurred since this timer was created.
|
|
1063
|
+
if (this._syncGeneration !== pollGeneration) {
|
|
1064
|
+
clearInterval(timer);
|
|
1065
|
+
this._degradedPollTimers.delete(linkKey);
|
|
1066
|
+
return;
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
// If the link was transitioned out of degraded_poll externally (e.g.,
|
|
1070
|
+
// by teardown or manual intervention), stop polling.
|
|
1071
|
+
if (link.status !== 'degraded_poll') {
|
|
1072
|
+
clearInterval(timer);
|
|
1073
|
+
this._degradedPollTimers.delete(linkKey);
|
|
1074
|
+
return;
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
try {
|
|
1078
|
+
// Attempt repair. Reset attempt counter so repairLink doesn't
|
|
1079
|
+
// immediately re-enter degraded_poll on failure.
|
|
1080
|
+
this._repairAttempts.set(linkKey, 0);
|
|
1081
|
+
await this.ledger.setStatus(link, 'repairing');
|
|
1082
|
+
await this.repairLink(linkKey);
|
|
1083
|
+
|
|
1084
|
+
// If repairLink succeeded, link is now 'live' — stop polling.
|
|
1085
|
+
if ((link.status as string) === 'live') {
|
|
1086
|
+
clearInterval(timer);
|
|
1087
|
+
this._degradedPollTimers.delete(linkKey);
|
|
1088
|
+
}
|
|
1089
|
+
} catch {
|
|
1090
|
+
// Repair failed — restore degraded_poll status so the timer continues.
|
|
1091
|
+
// This is critical: repairLink sets status to 'repairing' internally,
|
|
1092
|
+
// and if we don't restore degraded_poll, the next tick would see
|
|
1093
|
+
// status !== 'degraded_poll' and stop the timer permanently.
|
|
1094
|
+
await this.ledger.setStatus(link, 'degraded_poll');
|
|
1095
|
+
}
|
|
1096
|
+
}, interval);
|
|
1097
|
+
|
|
1098
|
+
this._degradedPollTimers.set(linkKey, timer);
|
|
1099
|
+
}
|
|
1100
|
+
|
|
520
1101
|
/**
|
|
521
1102
|
* Tears down all live subscriptions and push listeners.
|
|
522
1103
|
*/
|
|
523
1104
|
private async teardownLiveSync(): Promise<void> {
|
|
1105
|
+
// Increment generation to invalidate all in-flight async operations
|
|
1106
|
+
// (repairs, retry timers, degraded-poll ticks). Any async work that
|
|
1107
|
+
// captured the previous generation will bail on its next checkpoint.
|
|
1108
|
+
this._syncGeneration++;
|
|
1109
|
+
|
|
524
1110
|
// Clear the push debounce timer.
|
|
525
1111
|
if (this._pushDebounceTimer) {
|
|
526
1112
|
clearTimeout(this._pushDebounceTimer);
|
|
@@ -549,6 +1135,26 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
549
1135
|
}
|
|
550
1136
|
}
|
|
551
1137
|
this._localSubscriptions = [];
|
|
1138
|
+
|
|
1139
|
+
// Clear degraded-poll timers and repair state.
|
|
1140
|
+
for (const timer of this._degradedPollTimers.values()) {
|
|
1141
|
+
clearInterval(timer);
|
|
1142
|
+
}
|
|
1143
|
+
this._degradedPollTimers.clear();
|
|
1144
|
+
this._repairAttempts.clear();
|
|
1145
|
+
this._activeRepairs.clear();
|
|
1146
|
+
for (const timer of this._repairRetryTimers.values()) {
|
|
1147
|
+
clearTimeout(timer);
|
|
1148
|
+
}
|
|
1149
|
+
this._repairRetryTimers.clear();
|
|
1150
|
+
this._repairContext.clear();
|
|
1151
|
+
|
|
1152
|
+
// Clear closure evaluation contexts.
|
|
1153
|
+
this._closureContexts.clear();
|
|
1154
|
+
|
|
1155
|
+
// Clear the in-memory link and runtime state.
|
|
1156
|
+
this._activeLinks.clear();
|
|
1157
|
+
this._linkRuntimes.clear();
|
|
552
1158
|
}
|
|
553
1159
|
|
|
554
1160
|
// ---------------------------------------------------------------------------
|
|
@@ -564,49 +1170,124 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
564
1170
|
}): Promise<void> {
|
|
565
1171
|
const { did, delegateDid, dwnUrl, protocol } = target;
|
|
566
1172
|
|
|
567
|
-
// Resolve the cursor from the
|
|
1173
|
+
// Resolve the cursor from the link's pull checkpoint (preferred) or legacy storage.
|
|
568
1174
|
const cursorKey = this.buildCursorKey(did, dwnUrl, protocol);
|
|
569
|
-
const
|
|
1175
|
+
const link = this._activeLinks.get(cursorKey);
|
|
1176
|
+
const cursor = link?.pull.contiguousAppliedToken ?? await this.getCursor(cursorKey);
|
|
570
1177
|
|
|
571
1178
|
// Build the MessagesSubscribe filters.
|
|
572
|
-
|
|
1179
|
+
// When the link has protocolPathPrefixes, include them in the filter so the
|
|
1180
|
+
// EventLog delivers only matching events (server-side filtering). This replaces
|
|
1181
|
+
// the less efficient agent-side isEventInScope filtering for the pull path.
|
|
1182
|
+
// Note: only the first prefix is used as the MessagesFilter field because
|
|
1183
|
+
// MessagesFilter.protocolPathPrefix is a single string. Multiple prefixes
|
|
1184
|
+
// would need multiple filters (OR semantics) — for now we use the first one.
|
|
1185
|
+
const protocolPathPrefix = link?.scope.kind === 'protocol'
|
|
1186
|
+
? link.scope.protocolPathPrefixes?.[0]
|
|
1187
|
+
: undefined;
|
|
1188
|
+
const filters = protocol
|
|
1189
|
+
? [{ protocol, ...(protocolPathPrefix ? { protocolPathPrefix } : {}) }]
|
|
1190
|
+
: [];
|
|
573
1191
|
|
|
574
1192
|
// Look up permission grant for MessagesSubscribe if using a delegate.
|
|
1193
|
+
// The unified scope matching in AgentPermissionsApi accepts a
|
|
1194
|
+
// Messages.Read grant for MessagesSubscribe requests, so a single
|
|
1195
|
+
// lookup is sufficient.
|
|
575
1196
|
let permissionGrantId: string | undefined;
|
|
576
1197
|
if (delegateDid) {
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
permissionGrantId = grant.grant.id;
|
|
586
|
-
} catch {
|
|
587
|
-
// Fall back to trying MessagesRead which is a unified scope.
|
|
588
|
-
const grant = await this._permissionsApi.getPermissionForRequest({
|
|
589
|
-
connectedDid : did,
|
|
590
|
-
messageType : DwnInterface.MessagesRead,
|
|
591
|
-
delegateDid,
|
|
592
|
-
protocol,
|
|
593
|
-
cached : true
|
|
594
|
-
});
|
|
595
|
-
permissionGrantId = grant.grant.id;
|
|
596
|
-
}
|
|
1198
|
+
const grant = await this._permissionsApi.getPermissionForRequest({
|
|
1199
|
+
connectedDid : did,
|
|
1200
|
+
messageType : DwnInterface.MessagesSubscribe,
|
|
1201
|
+
delegateDid,
|
|
1202
|
+
protocol,
|
|
1203
|
+
cached : true
|
|
1204
|
+
});
|
|
1205
|
+
permissionGrantId = grant.grant.id;
|
|
597
1206
|
}
|
|
598
1207
|
|
|
599
1208
|
// Define the subscription handler that processes incoming events.
|
|
1209
|
+
// NOTE: The WebSocket client fires handlers without awaiting (fire-and-forget),
|
|
1210
|
+
// so multiple handlers can be in-flight concurrently. The ordinal tracker
|
|
1211
|
+
// ensures the checkpoint advances only when all earlier deliveries are committed.
|
|
600
1212
|
const subscriptionHandler = async (subMessage: SubscriptionMessage): Promise<void> => {
|
|
601
1213
|
if (subMessage.type === 'eose') {
|
|
602
|
-
// End-of-stored-events — catch-up complete
|
|
603
|
-
|
|
604
|
-
|
|
1214
|
+
// End-of-stored-events — catch-up complete.
|
|
1215
|
+
if (link) {
|
|
1216
|
+
// Guard: if the link transitioned to repairing while catch-up events
|
|
1217
|
+
// were being processed, skip all mutations — repair owns the state now.
|
|
1218
|
+
if (link.status !== 'live' && link.status !== 'initializing') {
|
|
1219
|
+
return;
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
if (!ReplicationLedger.validateTokenDomain(link.pull, subMessage.cursor)) {
|
|
1223
|
+
console.warn(`SyncEngineLevel: Token domain mismatch on EOSE for ${did} -> ${dwnUrl}, transitioning to repairing`);
|
|
1224
|
+
await this.transitionToRepairing(cursorKey, link);
|
|
1225
|
+
return;
|
|
1226
|
+
}
|
|
1227
|
+
ReplicationLedger.setReceivedToken(link.pull, subMessage.cursor);
|
|
1228
|
+
// Drain committed entries. Do NOT unconditionally advance to the
|
|
1229
|
+
// EOSE cursor — earlier stored events may still be in-flight
|
|
1230
|
+
// (handlers are fire-and-forget). The checkpoint advances only as
|
|
1231
|
+
// far as the contiguous drain reaches.
|
|
1232
|
+
this.drainCommittedPull(cursorKey);
|
|
1233
|
+
await this.ledger.saveLink(link);
|
|
1234
|
+
} else {
|
|
1235
|
+
await this.setCursor(cursorKey, subMessage.cursor);
|
|
1236
|
+
}
|
|
1237
|
+
// Transport is reachable — set connectivity to online.
|
|
1238
|
+
if (link) {
|
|
1239
|
+
const prevEoseConnectivity = link.connectivity;
|
|
1240
|
+
link.connectivity = 'online';
|
|
1241
|
+
if (prevEoseConnectivity !== 'online') {
|
|
1242
|
+
this.emitEvent({ type: 'link:connectivity-change', tenantDid: did, remoteEndpoint: dwnUrl, protocol, from: prevEoseConnectivity, to: 'online' });
|
|
1243
|
+
}
|
|
1244
|
+
} else {
|
|
1245
|
+
this._connectivityState = 'online';
|
|
1246
|
+
}
|
|
605
1247
|
return;
|
|
606
1248
|
}
|
|
607
1249
|
|
|
608
1250
|
if (subMessage.type === 'event') {
|
|
609
1251
|
const event: MessageEvent = subMessage.event;
|
|
1252
|
+
|
|
1253
|
+
// Guard: if the link is not live (e.g., repairing, degraded_poll, paused),
|
|
1254
|
+
// skip all processing. Old subscription handlers may still fire after the
|
|
1255
|
+
// link transitions — these events should be ignored entirely, not just
|
|
1256
|
+
// skipped at the checkpoint level.
|
|
1257
|
+
if (link && link.status !== 'live' && link.status !== 'initializing') {
|
|
1258
|
+
return;
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1261
|
+
// Domain validation: reject tokens from a different stream/epoch.
|
|
1262
|
+
if (link && !ReplicationLedger.validateTokenDomain(link.pull, subMessage.cursor)) {
|
|
1263
|
+
console.warn(`SyncEngineLevel: Token domain mismatch for ${did} -> ${dwnUrl}, transitioning to repairing`);
|
|
1264
|
+
await this.transitionToRepairing(cursorKey, link);
|
|
1265
|
+
return;
|
|
1266
|
+
}
|
|
1267
|
+
|
|
1268
|
+
// Subset scope filtering: if the link has protocolPath/contextId prefixes,
|
|
1269
|
+
// skip events that don't match. This is agent-side filtering because
|
|
1270
|
+
// MessagesSubscribe only supports protocol-level filtering today.
|
|
1271
|
+
//
|
|
1272
|
+
// Skipped events MUST advance contiguousAppliedToken — otherwise the
|
|
1273
|
+
// link would replay the same filtered-out events indefinitely after
|
|
1274
|
+
// reconnect/repair. This is safe because the event is intentionally
|
|
1275
|
+
// excluded from this scope and doesn't need processing.
|
|
1276
|
+
if (link && !isEventInScope(event.message, link.scope)) {
|
|
1277
|
+
ReplicationLedger.setReceivedToken(link.pull, subMessage.cursor);
|
|
1278
|
+
ReplicationLedger.commitContiguousToken(link.pull, subMessage.cursor);
|
|
1279
|
+
await this.ledger.saveLink(link);
|
|
1280
|
+
return;
|
|
1281
|
+
}
|
|
1282
|
+
|
|
1283
|
+
// Assign a delivery ordinal BEFORE async processing begins.
|
|
1284
|
+
// This captures the delivery order even if processing completes out of order.
|
|
1285
|
+
const rt = link ? this.getOrCreateRuntime(cursorKey) : undefined;
|
|
1286
|
+
const ordinal = rt ? rt.nextDeliveryOrdinal++ : -1;
|
|
1287
|
+
if (rt) {
|
|
1288
|
+
rt.inflight.set(ordinal, { ordinal, token: subMessage.cursor, committed: false });
|
|
1289
|
+
}
|
|
1290
|
+
|
|
610
1291
|
try {
|
|
611
1292
|
// Extract inline data from the event (available for records <= 30 KB).
|
|
612
1293
|
let dataStream = this.extractDataStream(event);
|
|
@@ -627,12 +1308,99 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
627
1308
|
}
|
|
628
1309
|
|
|
629
1310
|
await this.agent.dwn.processRawMessage(did, event.message, { dataStream });
|
|
1311
|
+
|
|
1312
|
+
// Invalidate closure cache entries that may be affected by this message.
|
|
1313
|
+
// Must run before closure validation so subsequent evaluations in the
|
|
1314
|
+
// same session see the updated local state.
|
|
1315
|
+
const closureCtxForInvalidation = this._closureContexts.get(did);
|
|
1316
|
+
if (closureCtxForInvalidation) {
|
|
1317
|
+
invalidateClosureCache(closureCtxForInvalidation, event.message);
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1320
|
+
// Closure validation for scoped subset sync (Phase 3).
|
|
1321
|
+
// For protocol-scoped links, verify that all hard dependencies for
|
|
1322
|
+
// this operation are locally present before considering it committed.
|
|
1323
|
+
// Full-tenant scope bypasses this entirely (returns complete with 0 queries).
|
|
1324
|
+
if (link && link.scope.kind === 'protocol') {
|
|
1325
|
+
const messageStore = this.agent.dwn.node.storage.messageStore;
|
|
1326
|
+
let closureCtx = this._closureContexts.get(did);
|
|
1327
|
+
if (!closureCtx) {
|
|
1328
|
+
closureCtx = createClosureContext(did);
|
|
1329
|
+
this._closureContexts.set(did, closureCtx);
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
const closureResult = await evaluateClosure(
|
|
1333
|
+
event.message, messageStore, link.scope, closureCtx
|
|
1334
|
+
);
|
|
1335
|
+
|
|
1336
|
+
if (!closureResult.complete) {
|
|
1337
|
+
console.warn(
|
|
1338
|
+
`SyncEngineLevel: Closure incomplete for ${did} -> ${dwnUrl}: ` +
|
|
1339
|
+
`${closureResult.failure!.code} — ${closureResult.failure!.detail}`
|
|
1340
|
+
);
|
|
1341
|
+
await this.transitionToRepairing(cursorKey, link);
|
|
1342
|
+
return;
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
// Squash convergence: processRawMessage triggers the DWN's built-in
|
|
1347
|
+
// squash resumable task (performRecordsSquash) which runs inline and
|
|
1348
|
+
// handles subset consumers correctly:
|
|
1349
|
+
// - If older siblings are locally present → purges them
|
|
1350
|
+
// - If squash arrives before older siblings → backstop rejects them (409)
|
|
1351
|
+
// - If no older siblings are local → no-op (correct)
|
|
1352
|
+
// Both sync orderings (squash-first or siblings-first) converge to
|
|
1353
|
+
// the same final state. No additional sync-engine side-effect is needed.
|
|
1354
|
+
|
|
1355
|
+
// Track this CID for echo-loop suppression, scoped to the source endpoint.
|
|
1356
|
+
const pulledCid = await Message.getCid(event.message);
|
|
1357
|
+
this._recentlyPulledCids.set(`${pulledCid}|${dwnUrl}`, Date.now() + SyncEngineLevel.ECHO_SUPPRESS_TTL_MS);
|
|
1358
|
+
this.evictExpiredEchoEntries();
|
|
1359
|
+
|
|
1360
|
+
// Mark this ordinal as committed and drain the checkpoint.
|
|
1361
|
+
// Guard: if the link transitioned to repairing while this handler was
|
|
1362
|
+
// in-flight (e.g., an earlier ordinal's handler failed concurrently),
|
|
1363
|
+
// skip all state mutations — the repair process owns progression now.
|
|
1364
|
+
if (link && rt && link.status === 'live') {
|
|
1365
|
+
const entry = rt.inflight.get(ordinal);
|
|
1366
|
+
if (entry) { entry.committed = true; }
|
|
1367
|
+
|
|
1368
|
+
ReplicationLedger.setReceivedToken(link.pull, subMessage.cursor);
|
|
1369
|
+
const drained = this.drainCommittedPull(cursorKey);
|
|
1370
|
+
if (drained > 0) {
|
|
1371
|
+
await this.ledger.saveLink(link);
|
|
1372
|
+
// Emit after durable save — "advanced" means persisted.
|
|
1373
|
+
if (link.pull.contiguousAppliedToken) {
|
|
1374
|
+
this.emitEvent({
|
|
1375
|
+
type : 'checkpoint:pull-advance',
|
|
1376
|
+
tenantDid : link.tenantDid,
|
|
1377
|
+
remoteEndpoint : link.remoteEndpoint,
|
|
1378
|
+
protocol : link.protocol,
|
|
1379
|
+
position : link.pull.contiguousAppliedToken.position,
|
|
1380
|
+
messageCid : link.pull.contiguousAppliedToken.messageCid,
|
|
1381
|
+
});
|
|
1382
|
+
}
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
// Overflow: too many in-flight ordinals without draining.
|
|
1386
|
+
if (rt.inflight.size > MAX_PENDING_TOKENS) {
|
|
1387
|
+
console.warn(`SyncEngineLevel: Pull in-flight overflow for ${did} -> ${dwnUrl}, transitioning to repairing`);
|
|
1388
|
+
await this.transitionToRepairing(cursorKey, link);
|
|
1389
|
+
}
|
|
1390
|
+
} else if (!link) {
|
|
1391
|
+
// Legacy path: no link available, use simple cursor persistence.
|
|
1392
|
+
await this.setCursor(cursorKey, subMessage.cursor);
|
|
1393
|
+
}
|
|
630
1394
|
} catch (error: any) {
|
|
631
1395
|
console.error(`SyncEngineLevel: Error processing live-pull event for ${did}`, error);
|
|
1396
|
+
// A failed processRawMessage means local state is incomplete.
|
|
1397
|
+
// Transition to repairing immediately — do NOT advance the checkpoint
|
|
1398
|
+
// past this failure or let later ordinals commit past it. SMT
|
|
1399
|
+
// reconciliation will discover and fill the gap.
|
|
1400
|
+
if (link) {
|
|
1401
|
+
await this.transitionToRepairing(cursorKey, link);
|
|
1402
|
+
}
|
|
632
1403
|
}
|
|
633
|
-
|
|
634
|
-
// Persist cursor for resume on reconnect.
|
|
635
|
-
await this.setCursor(cursorKey, subMessage.cursor);
|
|
636
1404
|
}
|
|
637
1405
|
};
|
|
638
1406
|
|
|
@@ -656,10 +1424,12 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
656
1424
|
|
|
657
1425
|
// Build a resubscribe factory so the WebSocket client can resume with
|
|
658
1426
|
// a fresh cursor-stamped message after reconnection.
|
|
659
|
-
const resubscribeFactory: ResubscribeFactory = async (resumeCursor?:
|
|
1427
|
+
const resubscribeFactory: ResubscribeFactory = async (resumeCursor?: ProgressToken) => {
|
|
1428
|
+
// On reconnect, use the latest durable checkpoint position if available.
|
|
1429
|
+
const effectiveCursor = resumeCursor ?? link?.pull.contiguousAppliedToken ?? cursor;
|
|
660
1430
|
const resumeRequest = {
|
|
661
1431
|
...subscribeRequest,
|
|
662
|
-
messageParams: { ...subscribeRequest.messageParams, cursor:
|
|
1432
|
+
messageParams: { ...subscribeRequest.messageParams, cursor: effectiveCursor },
|
|
663
1433
|
};
|
|
664
1434
|
const { message: resumeMsg } = await this.agent.dwn.processRequest(resumeRequest);
|
|
665
1435
|
if (!resumeMsg) {
|
|
@@ -682,9 +1452,15 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
682
1452
|
resubscribeFactory,
|
|
683
1453
|
},
|
|
684
1454
|
}) as MessagesSubscribeReply;
|
|
1455
|
+
if (reply.status.code === 410) {
|
|
1456
|
+
// ProgressGap — the cursor is no longer replayable. The link needs repair.
|
|
1457
|
+
const gapError = new Error(`SyncEngineLevel: ProgressGap for ${did} -> ${dwnUrl}: ${reply.status.detail}`);
|
|
1458
|
+
(gapError as any).isProgressGap = true;
|
|
1459
|
+
(gapError as any).gapInfo = reply.error;
|
|
1460
|
+
throw gapError;
|
|
1461
|
+
}
|
|
685
1462
|
if (reply.status.code !== 200 || !reply.subscription) {
|
|
686
|
-
|
|
687
|
-
return;
|
|
1463
|
+
throw new Error(`SyncEngineLevel: MessagesSubscribe failed for ${did} -> ${dwnUrl}: ${reply.status.code} ${reply.status.detail}`);
|
|
688
1464
|
}
|
|
689
1465
|
|
|
690
1466
|
this._liveSubscriptions.push({
|
|
@@ -695,7 +1471,15 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
695
1471
|
close: async (): Promise<void> => { await reply.subscription!.close(); },
|
|
696
1472
|
});
|
|
697
1473
|
|
|
698
|
-
|
|
1474
|
+
// Set per-link connectivity to online after successful subscription setup.
|
|
1475
|
+
const pullLink = this._activeLinks.get(this.buildCursorKey(did, dwnUrl, protocol));
|
|
1476
|
+
if (pullLink) {
|
|
1477
|
+
const prevPullConnectivity = pullLink.connectivity;
|
|
1478
|
+
pullLink.connectivity = 'online';
|
|
1479
|
+
if (prevPullConnectivity !== 'online') {
|
|
1480
|
+
this.emitEvent({ type: 'link:connectivity-change', tenantDid: did, remoteEndpoint: dwnUrl, protocol, from: prevPullConnectivity, to: 'online' });
|
|
1481
|
+
}
|
|
1482
|
+
}
|
|
699
1483
|
}
|
|
700
1484
|
|
|
701
1485
|
// ---------------------------------------------------------------------------
|
|
@@ -708,6 +1492,7 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
708
1492
|
*/
|
|
709
1493
|
private async openLocalPushSubscription(target: {
|
|
710
1494
|
did: string; dwnUrl: string; delegateDid?: string; protocol?: string;
|
|
1495
|
+
pushCursor?: ProgressToken;
|
|
711
1496
|
}): Promise<void> {
|
|
712
1497
|
const { did, delegateDid, dwnUrl, protocol } = target;
|
|
713
1498
|
|
|
@@ -719,7 +1504,7 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
719
1504
|
if (delegateDid) {
|
|
720
1505
|
const grant = await this._permissionsApi.getPermissionForRequest({
|
|
721
1506
|
connectedDid : did,
|
|
722
|
-
messageType : DwnInterface.
|
|
1507
|
+
messageType : DwnInterface.MessagesSubscribe,
|
|
723
1508
|
delegateDid,
|
|
724
1509
|
protocol,
|
|
725
1510
|
cached : true,
|
|
@@ -733,6 +1518,33 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
733
1518
|
return;
|
|
734
1519
|
}
|
|
735
1520
|
|
|
1521
|
+
// Subset scope filtering for push: only push events that match the
|
|
1522
|
+
// link's scope prefixes. Events outside the scope are not our responsibility.
|
|
1523
|
+
// Skipped events MUST advance the push checkpoint to prevent infinite
|
|
1524
|
+
// replay after repair/reconnect (same reason as the pull side).
|
|
1525
|
+
const pushLink = this._activeLinks.get(this.buildCursorKey(did, dwnUrl, protocol));
|
|
1526
|
+
if (pushLink && !isEventInScope(subMessage.event.message, pushLink.scope)) {
|
|
1527
|
+
// Guard: only mutate durable state when the link is live/initializing.
|
|
1528
|
+
// During repair/degraded_poll, orchestration owns checkpoint progression.
|
|
1529
|
+
if (pushLink.status !== 'live' && pushLink.status !== 'initializing') {
|
|
1530
|
+
return;
|
|
1531
|
+
}
|
|
1532
|
+
|
|
1533
|
+
// Validate token domain before committing — a stream/epoch mismatch
|
|
1534
|
+
// on the local EventLog should trigger repair, not silently overwrite.
|
|
1535
|
+
if (!ReplicationLedger.validateTokenDomain(pushLink.push, subMessage.cursor)) {
|
|
1536
|
+
await this.transitionToRepairing(
|
|
1537
|
+
this.buildCursorKey(did, dwnUrl, protocol), pushLink
|
|
1538
|
+
);
|
|
1539
|
+
return;
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
ReplicationLedger.setReceivedToken(pushLink.push, subMessage.cursor);
|
|
1543
|
+
ReplicationLedger.commitContiguousToken(pushLink.push, subMessage.cursor);
|
|
1544
|
+
await this.ledger.saveLink(pushLink);
|
|
1545
|
+
return;
|
|
1546
|
+
}
|
|
1547
|
+
|
|
736
1548
|
// Accumulate the message CID for a debounced push.
|
|
737
1549
|
const targetKey = this.buildCursorKey(did, dwnUrl, protocol);
|
|
738
1550
|
const cid = await Message.getCid(subMessage.event.message);
|
|
@@ -740,12 +1552,19 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
740
1552
|
return;
|
|
741
1553
|
}
|
|
742
1554
|
|
|
1555
|
+
// Echo-loop suppression: skip CIDs that were recently pulled from this
|
|
1556
|
+
// specific remote. A message pulled from Provider A is only suppressed
|
|
1557
|
+
// for push to A — it still fans out to Provider B and C.
|
|
1558
|
+
if (this.isRecentlyPulled(cid, dwnUrl)) {
|
|
1559
|
+
return;
|
|
1560
|
+
}
|
|
1561
|
+
|
|
743
1562
|
let pending = this._pendingPushCids.get(targetKey);
|
|
744
1563
|
if (!pending) {
|
|
745
|
-
pending = { did, dwnUrl, delegateDid, protocol,
|
|
1564
|
+
pending = { did, dwnUrl, delegateDid, protocol, entries: [] };
|
|
746
1565
|
this._pendingPushCids.set(targetKey, pending);
|
|
747
1566
|
}
|
|
748
|
-
pending.
|
|
1567
|
+
pending.entries.push({ cid, localToken: subMessage.cursor });
|
|
749
1568
|
|
|
750
1569
|
// Debounce the push.
|
|
751
1570
|
if (this._pushDebounceTimer) {
|
|
@@ -757,19 +1576,21 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
757
1576
|
};
|
|
758
1577
|
|
|
759
1578
|
// Process the local subscription request.
|
|
1579
|
+
// When a push cursor is provided (e.g., after repair), the local subscription
|
|
1580
|
+
// replays events from that position, closing the race window where local
|
|
1581
|
+
// writes during repair would otherwise be missed by push-on-write.
|
|
760
1582
|
const response = await this.agent.dwn.processRequest({
|
|
761
1583
|
author : did,
|
|
762
1584
|
target : did,
|
|
763
1585
|
messageType : DwnInterface.MessagesSubscribe,
|
|
764
1586
|
granteeDid : delegateDid,
|
|
765
|
-
messageParams : { filters, permissionGrantId },
|
|
1587
|
+
messageParams : { filters, permissionGrantId, cursor: target.pushCursor },
|
|
766
1588
|
subscriptionHandler : subscriptionHandler as any,
|
|
767
1589
|
});
|
|
768
1590
|
|
|
769
1591
|
const reply = response.reply as MessagesSubscribeReply;
|
|
770
1592
|
if (reply.status.code !== 200 || !reply.subscription) {
|
|
771
|
-
|
|
772
|
-
return;
|
|
1593
|
+
throw new Error(`SyncEngineLevel: Local MessagesSubscribe failed for ${did}: ${reply.status.code} ${reply.status.detail}`);
|
|
773
1594
|
}
|
|
774
1595
|
|
|
775
1596
|
this._localSubscriptions.push({
|
|
@@ -787,26 +1608,91 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
787
1608
|
private async flushPendingPushes(): Promise<void> {
|
|
788
1609
|
this._pushDebounceTimer = undefined;
|
|
789
1610
|
|
|
790
|
-
const
|
|
1611
|
+
const batches = [...this._pendingPushCids.entries()];
|
|
791
1612
|
this._pendingPushCids.clear();
|
|
792
1613
|
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
1614
|
+
// Push to all endpoints in parallel — each target is independent.
|
|
1615
|
+
await Promise.all(batches.map(async ([targetKey, pending]) => {
|
|
1616
|
+
const { did, dwnUrl, delegateDid, protocol, entries: pushEntries } = pending;
|
|
1617
|
+
if (pushEntries.length === 0) {
|
|
1618
|
+
return;
|
|
797
1619
|
}
|
|
798
1620
|
|
|
1621
|
+
const cids = pushEntries.map(e => e.cid);
|
|
1622
|
+
|
|
799
1623
|
try {
|
|
800
|
-
await pushMessages({
|
|
1624
|
+
const result = await pushMessages({
|
|
801
1625
|
did, dwnUrl, delegateDid, protocol,
|
|
802
1626
|
messageCids : cids,
|
|
803
1627
|
agent : this.agent,
|
|
804
1628
|
permissionsApi : this._permissionsApi,
|
|
805
1629
|
});
|
|
1630
|
+
|
|
1631
|
+
// Advance the push checkpoint for successfully pushed entries.
|
|
1632
|
+
// Push is sequential (single batch, in-order processing) so we can
|
|
1633
|
+
// commit directly without ordinal tracking — there's no concurrent
|
|
1634
|
+
// completion to reorder.
|
|
1635
|
+
const link = this._activeLinks.get(targetKey);
|
|
1636
|
+
if (link) {
|
|
1637
|
+
const succeededSet = new Set(result.succeeded);
|
|
1638
|
+
// Track highest contiguous success: if a CID fails, we stop advancing.
|
|
1639
|
+
let hitFailure = false;
|
|
1640
|
+
for (const entry of pushEntries) {
|
|
1641
|
+
if (hitFailure) { break; }
|
|
1642
|
+
if (succeededSet.has(entry.cid) && entry.localToken) {
|
|
1643
|
+
if (!ReplicationLedger.validateTokenDomain(link.push, entry.localToken)) {
|
|
1644
|
+
console.warn(`SyncEngineLevel: Push checkpoint domain mismatch for ${did} -> ${dwnUrl}, transitioning to repairing`);
|
|
1645
|
+
await this.transitionToRepairing(targetKey, link);
|
|
1646
|
+
break;
|
|
1647
|
+
}
|
|
1648
|
+
ReplicationLedger.setReceivedToken(link.push, entry.localToken);
|
|
1649
|
+
ReplicationLedger.commitContiguousToken(link.push, entry.localToken);
|
|
1650
|
+
} else {
|
|
1651
|
+
// This CID failed or had no token — stop advancing.
|
|
1652
|
+
hitFailure = true;
|
|
1653
|
+
}
|
|
1654
|
+
}
|
|
1655
|
+
await this.ledger.saveLink(link);
|
|
1656
|
+
}
|
|
1657
|
+
|
|
1658
|
+
// Re-queue failed entries so they are retried on the next debounce
|
|
1659
|
+
// cycle (or picked up by the SMT integrity check).
|
|
1660
|
+
if (result.failed.length > 0) {
|
|
1661
|
+
console.error(`SyncEngineLevel: Push-on-write failed for ${did} -> ${dwnUrl}: ${result.failed.length} of ${cids.length} messages failed`);
|
|
1662
|
+
const failedSet = new Set(result.failed);
|
|
1663
|
+
const failedEntries = pushEntries.filter(e => failedSet.has(e.cid));
|
|
1664
|
+
let requeued = this._pendingPushCids.get(targetKey);
|
|
1665
|
+
if (!requeued) {
|
|
1666
|
+
requeued = { did, dwnUrl, delegateDid, protocol, entries: [] };
|
|
1667
|
+
this._pendingPushCids.set(targetKey, requeued);
|
|
1668
|
+
}
|
|
1669
|
+
requeued.entries.push(...failedEntries);
|
|
1670
|
+
|
|
1671
|
+
// Schedule a retry after a short delay.
|
|
1672
|
+
if (!this._pushDebounceTimer) {
|
|
1673
|
+
this._pushDebounceTimer = setTimeout((): void => {
|
|
1674
|
+
void this.flushPendingPushes();
|
|
1675
|
+
}, PUSH_DEBOUNCE_MS * 4); // Back off: 1 second instead of 250ms.
|
|
1676
|
+
}
|
|
1677
|
+
}
|
|
806
1678
|
} catch (error: any) {
|
|
1679
|
+
// Truly unexpected error (not per-message failure). Re-queue entire
|
|
1680
|
+
// batch so entries aren't silently dropped from the debounce queue.
|
|
807
1681
|
console.error(`SyncEngineLevel: Push-on-write failed for ${did} -> ${dwnUrl}`, error);
|
|
1682
|
+
let requeued = this._pendingPushCids.get(targetKey);
|
|
1683
|
+
if (!requeued) {
|
|
1684
|
+
requeued = { did, dwnUrl, delegateDid, protocol, entries: [] };
|
|
1685
|
+
this._pendingPushCids.set(targetKey, requeued);
|
|
1686
|
+
}
|
|
1687
|
+
requeued.entries.push(...pushEntries);
|
|
1688
|
+
|
|
1689
|
+
if (!this._pushDebounceTimer) {
|
|
1690
|
+
this._pushDebounceTimer = setTimeout((): void => {
|
|
1691
|
+
void this.flushPendingPushes();
|
|
1692
|
+
}, PUSH_DEBOUNCE_MS * 4);
|
|
1693
|
+
}
|
|
808
1694
|
}
|
|
809
|
-
}
|
|
1695
|
+
}));
|
|
810
1696
|
}
|
|
811
1697
|
|
|
812
1698
|
// ---------------------------------------------------------------------------
|
|
@@ -818,10 +1704,29 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
818
1704
|
return protocol ? `${base}${CURSOR_SEPARATOR}${protocol}` : base;
|
|
819
1705
|
}
|
|
820
1706
|
|
|
821
|
-
|
|
1707
|
+
/**
|
|
1708
|
+
* Retrieves a stored progress token. Handles migration from old string cursors:
|
|
1709
|
+
* if the stored value is a bare string (pre-ProgressToken format), it is treated
|
|
1710
|
+
* as absent — the sync engine will do a full SMT reconciliation on first startup
|
|
1711
|
+
* after upgrade, which is correct and safe.
|
|
1712
|
+
*/
|
|
1713
|
+
private async getCursor(key: string): Promise<ProgressToken | undefined> {
|
|
822
1714
|
const cursors = this._db.sublevel('syncCursors');
|
|
823
1715
|
try {
|
|
824
|
-
|
|
1716
|
+
const raw = await cursors.get(key);
|
|
1717
|
+
try {
|
|
1718
|
+
const parsed = JSON.parse(raw);
|
|
1719
|
+
if (parsed && typeof parsed === 'object' &&
|
|
1720
|
+
typeof parsed.streamId === 'string' &&
|
|
1721
|
+
typeof parsed.epoch === 'string' &&
|
|
1722
|
+
typeof parsed.position === 'string' &&
|
|
1723
|
+
typeof parsed.messageCid === 'string') {
|
|
1724
|
+
return parsed as ProgressToken;
|
|
1725
|
+
}
|
|
1726
|
+
} catch {
|
|
1727
|
+
// Not valid JSON (old string cursor) — treat as absent.
|
|
1728
|
+
}
|
|
1729
|
+
return undefined;
|
|
825
1730
|
} catch (error) {
|
|
826
1731
|
const e = error as { code: string };
|
|
827
1732
|
if (e.code === 'LEVEL_NOT_FOUND') {
|
|
@@ -831,9 +1736,9 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
831
1736
|
}
|
|
832
1737
|
}
|
|
833
1738
|
|
|
834
|
-
private async setCursor(key: string, cursor:
|
|
1739
|
+
private async setCursor(key: string, cursor: ProgressToken): Promise<void> {
|
|
835
1740
|
const cursors = this._db.sublevel('syncCursors');
|
|
836
|
-
await cursors.put(key, cursor);
|
|
1741
|
+
await cursors.put(key, JSON.stringify(cursor));
|
|
837
1742
|
}
|
|
838
1743
|
|
|
839
1744
|
// ---------------------------------------------------------------------------
|
|
@@ -870,8 +1775,6 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
870
1775
|
return undefined;
|
|
871
1776
|
}
|
|
872
1777
|
|
|
873
|
-
|
|
874
|
-
|
|
875
1778
|
// ---------------------------------------------------------------------------
|
|
876
1779
|
// Default Hash Cache
|
|
877
1780
|
// ---------------------------------------------------------------------------
|
|
@@ -987,97 +1890,6 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
987
1890
|
}
|
|
988
1891
|
|
|
989
1892
|
// ---------------------------------------------------------------------------
|
|
990
|
-
// Tree Diff — walk the SMT to find divergent leaf sets
|
|
991
|
-
// ---------------------------------------------------------------------------
|
|
992
|
-
|
|
993
|
-
/**
|
|
994
|
-
* Walks the local and remote SMTs in parallel, recursing into subtrees whose
|
|
995
|
-
* hashes differ, until reaching `MAX_DIFF_DEPTH` where leaves are enumerated.
|
|
996
|
-
*
|
|
997
|
-
* Returns the sets of messageCids that exist only locally or only remotely.
|
|
998
|
-
*/
|
|
999
|
-
private async walkTreeDiff({ did, dwnUrl, delegateDid, protocol }: {
|
|
1000
|
-
did: string;
|
|
1001
|
-
dwnUrl: string;
|
|
1002
|
-
delegateDid?: string;
|
|
1003
|
-
protocol?: string;
|
|
1004
|
-
}): Promise<{ onlyLocal: string[]; onlyRemote: string[] }> {
|
|
1005
|
-
const onlyLocal: string[] = [];
|
|
1006
|
-
const onlyRemote: string[] = [];
|
|
1007
|
-
|
|
1008
|
-
// Hoist permission grant lookup — resolved once and reused for all subtree/leaf requests.
|
|
1009
|
-
const permissionGrantId = await this.getSyncPermissionGrantId(did, delegateDid, protocol);
|
|
1010
|
-
|
|
1011
|
-
// Gate remote HTTP requests through a semaphore so the binary tree walk
|
|
1012
|
-
// doesn't produce an exponential burst of concurrent requests. Local
|
|
1013
|
-
// DWN requests (in-process) are not gated.
|
|
1014
|
-
const remoteSemaphore = new Semaphore(REMOTE_CONCURRENCY);
|
|
1015
|
-
|
|
1016
|
-
const walk = async (prefix: string): Promise<void> => {
|
|
1017
|
-
// Get subtree hashes for this prefix from local and remote.
|
|
1018
|
-
// Only the remote request is gated by the semaphore.
|
|
1019
|
-
const [localHash, remoteHash] = await Promise.all([
|
|
1020
|
-
this.getLocalSubtreeHash(did, prefix, delegateDid, protocol, permissionGrantId),
|
|
1021
|
-
remoteSemaphore.run(() => this.getRemoteSubtreeHash(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId)),
|
|
1022
|
-
]);
|
|
1023
|
-
|
|
1024
|
-
// If hashes match, this subtree is identical — skip.
|
|
1025
|
-
if (localHash === remoteHash) {
|
|
1026
|
-
return;
|
|
1027
|
-
}
|
|
1028
|
-
|
|
1029
|
-
// Short-circuit: if one side is the default (empty-subtree) hash, all entries
|
|
1030
|
-
// on the other side are unique. Enumerate leaves directly instead of recursing
|
|
1031
|
-
// further into the tree — this avoids an exponential walk when one DWN has
|
|
1032
|
-
// entries that the other lacks entirely in this subtree.
|
|
1033
|
-
const emptyHash = await this.getDefaultHashHex(prefix.length);
|
|
1034
|
-
if (remoteHash === emptyHash && localHash !== emptyHash) {
|
|
1035
|
-
const localLeaves = await this.getLocalLeaves(did, prefix, delegateDid, protocol, permissionGrantId);
|
|
1036
|
-
onlyLocal.push(...localLeaves);
|
|
1037
|
-
return;
|
|
1038
|
-
}
|
|
1039
|
-
if (localHash === emptyHash && remoteHash !== emptyHash) {
|
|
1040
|
-
const remoteLeaves = await remoteSemaphore.run(
|
|
1041
|
-
() => this.getRemoteLeaves(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId),
|
|
1042
|
-
);
|
|
1043
|
-
onlyRemote.push(...remoteLeaves);
|
|
1044
|
-
return;
|
|
1045
|
-
}
|
|
1046
|
-
|
|
1047
|
-
// If we've reached the maximum diff depth, enumerate leaves.
|
|
1048
|
-
if (prefix.length >= MAX_DIFF_DEPTH) {
|
|
1049
|
-
const [localLeaves, remoteLeaves] = await Promise.all([
|
|
1050
|
-
this.getLocalLeaves(did, prefix, delegateDid, protocol, permissionGrantId),
|
|
1051
|
-
remoteSemaphore.run(() => this.getRemoteLeaves(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId)),
|
|
1052
|
-
]);
|
|
1053
|
-
|
|
1054
|
-
const localSet = new Set(localLeaves);
|
|
1055
|
-
const remoteSet = new Set(remoteLeaves);
|
|
1056
|
-
|
|
1057
|
-
for (const cid of localLeaves) {
|
|
1058
|
-
if (!remoteSet.has(cid)) {
|
|
1059
|
-
onlyLocal.push(cid);
|
|
1060
|
-
}
|
|
1061
|
-
}
|
|
1062
|
-
for (const cid of remoteLeaves) {
|
|
1063
|
-
if (!localSet.has(cid)) {
|
|
1064
|
-
onlyRemote.push(cid);
|
|
1065
|
-
}
|
|
1066
|
-
}
|
|
1067
|
-
return;
|
|
1068
|
-
}
|
|
1069
|
-
|
|
1070
|
-
// Recurse into left (0) and right (1) children in parallel.
|
|
1071
|
-
await Promise.all([
|
|
1072
|
-
walk(prefix + '0'),
|
|
1073
|
-
walk(prefix + '1'),
|
|
1074
|
-
]);
|
|
1075
|
-
};
|
|
1076
|
-
|
|
1077
|
-
await walk('');
|
|
1078
|
-
return { onlyLocal, onlyRemote };
|
|
1079
|
-
}
|
|
1080
|
-
|
|
1081
1893
|
// ---------------------------------------------------------------------------
|
|
1082
1894
|
// Batched Diff — single round-trip set reconciliation
|
|
1083
1895
|
// ---------------------------------------------------------------------------
|
|
@@ -1133,7 +1945,8 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
1133
1945
|
}
|
|
1134
1946
|
|
|
1135
1947
|
// Step 3: Enumerate local leaves for prefixes the remote reported as onlyLocal.
|
|
1136
|
-
|
|
1948
|
+
// Reuse the same grant ID from step 2 (avoids redundant lookup).
|
|
1949
|
+
const permissionGrantIdForLeaves = permissionGrantId;
|
|
1137
1950
|
const onlyLocalCids: string[] = [];
|
|
1138
1951
|
for (const prefix of reply.onlyLocal ?? []) {
|
|
1139
1952
|
const leaves = await this.getLocalLeaves(did, prefix, delegateDid, protocol, permissionGrantIdForLeaves);
|
|
@@ -1234,32 +2047,6 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
1234
2047
|
return reply.hash ?? '';
|
|
1235
2048
|
}
|
|
1236
2049
|
|
|
1237
|
-
private async getRemoteSubtreeHash(
|
|
1238
|
-
did: string, dwnUrl: string, prefix: string, delegateDid?: string, protocol?: string, permissionGrantId?: string
|
|
1239
|
-
): Promise<string> {
|
|
1240
|
-
const syncMessage = await this.agent.dwn.processRequest({
|
|
1241
|
-
store : false,
|
|
1242
|
-
author : did,
|
|
1243
|
-
target : did,
|
|
1244
|
-
messageType : DwnInterface.MessagesSync,
|
|
1245
|
-
granteeDid : delegateDid,
|
|
1246
|
-
messageParams : {
|
|
1247
|
-
action: 'subtree',
|
|
1248
|
-
prefix,
|
|
1249
|
-
protocol,
|
|
1250
|
-
permissionGrantId
|
|
1251
|
-
}
|
|
1252
|
-
});
|
|
1253
|
-
|
|
1254
|
-
const reply = await this.agent.rpc.sendDwnRequest({
|
|
1255
|
-
dwnUrl,
|
|
1256
|
-
targetDid : did,
|
|
1257
|
-
message : syncMessage.message,
|
|
1258
|
-
}) as MessagesSyncReply;
|
|
1259
|
-
|
|
1260
|
-
return reply.hash ?? '';
|
|
1261
|
-
}
|
|
1262
|
-
|
|
1263
2050
|
/**
|
|
1264
2051
|
* Get all leaf messageCids under a given prefix from the local DWN.
|
|
1265
2052
|
*
|
|
@@ -1294,32 +2081,6 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
1294
2081
|
return reply.entries ?? [];
|
|
1295
2082
|
}
|
|
1296
2083
|
|
|
1297
|
-
private async getRemoteLeaves(
|
|
1298
|
-
did: string, dwnUrl: string, prefix: string, delegateDid?: string, protocol?: string, permissionGrantId?: string
|
|
1299
|
-
): Promise<string[]> {
|
|
1300
|
-
const syncMessage = await this.agent.dwn.processRequest({
|
|
1301
|
-
store : false,
|
|
1302
|
-
author : did,
|
|
1303
|
-
target : did,
|
|
1304
|
-
messageType : DwnInterface.MessagesSync,
|
|
1305
|
-
granteeDid : delegateDid,
|
|
1306
|
-
messageParams : {
|
|
1307
|
-
action: 'leaves',
|
|
1308
|
-
prefix,
|
|
1309
|
-
protocol,
|
|
1310
|
-
permissionGrantId
|
|
1311
|
-
}
|
|
1312
|
-
});
|
|
1313
|
-
|
|
1314
|
-
const reply = await this.agent.rpc.sendDwnRequest({
|
|
1315
|
-
dwnUrl,
|
|
1316
|
-
targetDid : did,
|
|
1317
|
-
message : syncMessage.message,
|
|
1318
|
-
}) as MessagesSyncReply;
|
|
1319
|
-
|
|
1320
|
-
return reply.entries ?? [];
|
|
1321
|
-
}
|
|
1322
|
-
|
|
1323
2084
|
// ---------------------------------------------------------------------------
|
|
1324
2085
|
// Pull / Push — delegates to standalone functions in sync-messages.ts
|
|
1325
2086
|
// ---------------------------------------------------------------------------
|
|
@@ -1347,6 +2108,52 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
1347
2108
|
});
|
|
1348
2109
|
}
|
|
1349
2110
|
|
|
2111
|
+
// ---------------------------------------------------------------------------
|
|
2112
|
+
// Echo-loop suppression
|
|
2113
|
+
// ---------------------------------------------------------------------------
|
|
2114
|
+
|
|
2115
|
+
/**
|
|
2116
|
+
* Evicts expired entries from the echo-loop suppression cache.
|
|
2117
|
+
* Also enforces the size cap by evicting oldest entries first.
|
|
2118
|
+
*/
|
|
2119
|
+
private evictExpiredEchoEntries(): void {
|
|
2120
|
+
const now = Date.now();
|
|
2121
|
+
|
|
2122
|
+
// Evict expired entries.
|
|
2123
|
+
for (const [cid, expiry] of this._recentlyPulledCids) {
|
|
2124
|
+
if (now >= expiry) {
|
|
2125
|
+
this._recentlyPulledCids.delete(cid);
|
|
2126
|
+
}
|
|
2127
|
+
}
|
|
2128
|
+
|
|
2129
|
+
// Enforce size cap by evicting oldest entries.
|
|
2130
|
+
if (this._recentlyPulledCids.size > SyncEngineLevel.ECHO_SUPPRESS_MAX_ENTRIES) {
|
|
2131
|
+
const excess = this._recentlyPulledCids.size - SyncEngineLevel.ECHO_SUPPRESS_MAX_ENTRIES;
|
|
2132
|
+
let evicted = 0;
|
|
2133
|
+
for (const key of this._recentlyPulledCids.keys()) {
|
|
2134
|
+
if (evicted >= excess) { break; }
|
|
2135
|
+
this._recentlyPulledCids.delete(key);
|
|
2136
|
+
evicted++;
|
|
2137
|
+
}
|
|
2138
|
+
}
|
|
2139
|
+
}
|
|
2140
|
+
|
|
2141
|
+
/**
|
|
2142
|
+
* Checks whether a CID was recently pulled from a specific remote endpoint
|
|
2143
|
+
* and should not be pushed back to that same endpoint (echo-loop suppression).
|
|
2144
|
+
* Does not suppress pushes to other endpoints — multi-provider fan-out works.
|
|
2145
|
+
*/
|
|
2146
|
+
private isRecentlyPulled(cid: string, dwnUrl: string): boolean {
|
|
2147
|
+
const key = `${cid}|${dwnUrl}`;
|
|
2148
|
+
const expiry = this._recentlyPulledCids.get(key);
|
|
2149
|
+
if (expiry === undefined) { return false; }
|
|
2150
|
+
if (Date.now() >= expiry) {
|
|
2151
|
+
this._recentlyPulledCids.delete(key);
|
|
2152
|
+
return false;
|
|
2153
|
+
}
|
|
2154
|
+
return true;
|
|
2155
|
+
}
|
|
2156
|
+
|
|
1350
2157
|
/**
|
|
1351
2158
|
* Reads missing messages from the local DWN and pushes them to the remote DWN
|
|
1352
2159
|
* in dependency order (topological sort).
|
|
@@ -1357,7 +2164,7 @@ export class SyncEngineLevel implements SyncEngine {
|
|
|
1357
2164
|
delegateDid?: string;
|
|
1358
2165
|
protocol?: string;
|
|
1359
2166
|
messageCids: string[];
|
|
1360
|
-
}): Promise<
|
|
2167
|
+
}): Promise<PushResult> {
|
|
1361
2168
|
return pushMessages({
|
|
1362
2169
|
did, dwnUrl, delegateDid, protocol, messageCids,
|
|
1363
2170
|
agent : this.agent,
|