@enbox/agent 0.5.9 → 0.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.mjs +9 -9
- package/dist/browser.mjs.map +4 -4
- package/dist/esm/dwn-api.js.map +1 -1
- package/dist/esm/dwn-record-upgrade.js +1 -1
- package/dist/esm/dwn-record-upgrade.js.map +1 -1
- package/dist/esm/index.js +4 -0
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/sync-closure-resolver.js +855 -0
- package/dist/esm/sync-closure-resolver.js.map +1 -0
- package/dist/esm/sync-closure-types.js +189 -0
- package/dist/esm/sync-closure-types.js.map +1 -0
- package/dist/esm/sync-engine-level.js +977 -224
- package/dist/esm/sync-engine-level.js.map +1 -1
- package/dist/esm/sync-messages.js +19 -5
- package/dist/esm/sync-messages.js.map +1 -1
- package/dist/esm/sync-replication-ledger.js +220 -0
- package/dist/esm/sync-replication-ledger.js.map +1 -0
- package/dist/esm/types/sync.js +54 -1
- package/dist/esm/types/sync.js.map +1 -1
- package/dist/types/dwn-api.d.ts.map +1 -1
- package/dist/types/index.d.ts +5 -0
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/sync-closure-resolver.d.ts +19 -0
- package/dist/types/sync-closure-resolver.d.ts.map +1 -0
- package/dist/types/sync-closure-types.d.ts +122 -0
- package/dist/types/sync-closure-types.d.ts.map +1 -0
- package/dist/types/sync-engine-level.d.ts +137 -11
- package/dist/types/sync-engine-level.d.ts.map +1 -1
- package/dist/types/sync-messages.d.ts +6 -1
- package/dist/types/sync-messages.d.ts.map +1 -1
- package/dist/types/sync-replication-ledger.d.ts +72 -0
- package/dist/types/sync-replication-ledger.d.ts.map +1 -0
- package/dist/types/types/sync.d.ts +188 -0
- package/dist/types/types/sync.d.ts.map +1 -1
- package/package.json +3 -3
- package/src/dwn-api.ts +2 -1
- package/src/dwn-record-upgrade.ts +1 -1
- package/src/index.ts +5 -0
- package/src/sync-closure-resolver.ts +919 -0
- package/src/sync-closure-types.ts +270 -0
- package/src/sync-engine-level.ts +1062 -255
- package/src/sync-messages.ts +21 -6
- package/src/sync-replication-ledger.ts +197 -0
- package/src/types/sync.ts +202 -0
|
@@ -17,6 +17,10 @@ var __asyncValues = (this && this.__asyncValues) || function (o) {
|
|
|
17
17
|
import ms from 'ms';
|
|
18
18
|
import { Level } from 'level';
|
|
19
19
|
import { Encoder, hashToHex, initDefaultHashes, Message } from '@enbox/dwn-sdk-js';
|
|
20
|
+
import { evaluateClosure } from './sync-closure-resolver.js';
|
|
21
|
+
import { MAX_PENDING_TOKENS } from './types/sync.js';
|
|
22
|
+
import { ReplicationLedger } from './sync-replication-ledger.js';
|
|
23
|
+
import { createClosureContext, invalidateClosureCache } from './sync-closure-types.js';
|
|
20
24
|
import { AgentPermissionsApi } from './permissions-api.js';
|
|
21
25
|
import { DwnInterface } from './types/dwn.js';
|
|
22
26
|
import { isRecordsWrite } from './utils.js';
|
|
@@ -36,55 +40,6 @@ const MAX_DIFF_DEPTH = 16;
|
|
|
36
40
|
*/
|
|
37
41
|
const BATCHED_DIFF_DEPTH = 8;
|
|
38
42
|
/**
|
|
39
|
-
* Maximum number of concurrent remote HTTP requests during a tree diff.
|
|
40
|
-
* The binary tree walk fans out in parallel — without a limit, depth N
|
|
41
|
-
* produces 2^N concurrent requests, which can exhaust server rate limits.
|
|
42
|
-
*/
|
|
43
|
-
const REMOTE_CONCURRENCY = 4;
|
|
44
|
-
/**
|
|
45
|
-
* Counting semaphore for bounding concurrent async operations.
|
|
46
|
-
* Used by the tree walk to limit in-flight remote HTTP requests.
|
|
47
|
-
*/
|
|
48
|
-
class Semaphore {
|
|
49
|
-
constructor(permits) {
|
|
50
|
-
this._waiting = [];
|
|
51
|
-
this._permits = permits;
|
|
52
|
-
}
|
|
53
|
-
/** Wait until a permit is available, then consume one. */
|
|
54
|
-
acquire() {
|
|
55
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
56
|
-
if (this._permits > 0) {
|
|
57
|
-
this._permits--;
|
|
58
|
-
return;
|
|
59
|
-
}
|
|
60
|
-
return new Promise((resolve) => {
|
|
61
|
-
this._waiting.push(resolve);
|
|
62
|
-
});
|
|
63
|
-
});
|
|
64
|
-
}
|
|
65
|
-
/** Release a permit, waking the next waiter if any. */
|
|
66
|
-
release() {
|
|
67
|
-
const next = this._waiting.shift();
|
|
68
|
-
if (next) {
|
|
69
|
-
next();
|
|
70
|
-
}
|
|
71
|
-
else {
|
|
72
|
-
this._permits++;
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
/** Acquire a permit, run the task, then release regardless of outcome. */
|
|
76
|
-
run(fn) {
|
|
77
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
78
|
-
yield this.acquire();
|
|
79
|
-
try {
|
|
80
|
-
return yield fn();
|
|
81
|
-
}
|
|
82
|
-
finally {
|
|
83
|
-
this.release();
|
|
84
|
-
}
|
|
85
|
-
});
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
43
|
/**
|
|
89
44
|
* Key for the subscription cursor sublevel. Cursors are keyed by
|
|
90
45
|
* `{did}^{dwnUrl}[^{protocol}]` and store an opaque EventLog cursor string.
|
|
@@ -95,28 +50,130 @@ const CURSOR_SEPARATOR = '^';
|
|
|
95
50
|
* we batch them and push after this delay to avoid a push per individual write.
|
|
96
51
|
*/
|
|
97
52
|
const PUSH_DEBOUNCE_MS = 250;
|
|
53
|
+
/**
|
|
54
|
+
* Checks whether a message's protocolPath and contextId match the link's
|
|
55
|
+
* subset scope prefixes. Returns true if the message is in scope.
|
|
56
|
+
*
|
|
57
|
+
* When the scope has no prefixes (or is kind:'full'), all messages match.
|
|
58
|
+
* When protocolPathPrefixes or contextIdPrefixes are specified, the message
|
|
59
|
+
* must match at least one prefix in each specified set.
|
|
60
|
+
*
|
|
61
|
+
* This is agent-side filtering for subset scopes. The underlying
|
|
62
|
+
* MessagesSubscribe filter only supports protocol-level scoping today —
|
|
63
|
+
* protocolPath/contextId prefix filtering at the EventLog level is a
|
|
64
|
+
* follow-up (requires dwn-sdk-js MessagesFilter extension).
|
|
65
|
+
*/
|
|
66
|
+
function isEventInScope(message, scope) {
|
|
67
|
+
if (scope.kind === 'full') {
|
|
68
|
+
return true;
|
|
69
|
+
}
|
|
70
|
+
if (!scope.protocolPathPrefixes && !scope.contextIdPrefixes) {
|
|
71
|
+
return true;
|
|
72
|
+
}
|
|
73
|
+
const desc = message.descriptor;
|
|
74
|
+
// Check protocolPath prefix.
|
|
75
|
+
if (scope.protocolPathPrefixes && scope.protocolPathPrefixes.length > 0) {
|
|
76
|
+
const protocolPath = desc.protocolPath;
|
|
77
|
+
if (!protocolPath) {
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
const matches = scope.protocolPathPrefixes.some(prefix => protocolPath === prefix || protocolPath.startsWith(prefix + '/'));
|
|
81
|
+
if (!matches) {
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
// Check contextId prefix.
|
|
86
|
+
if (scope.contextIdPrefixes && scope.contextIdPrefixes.length > 0) {
|
|
87
|
+
const contextId = message.contextId;
|
|
88
|
+
if (!contextId) {
|
|
89
|
+
return false;
|
|
90
|
+
}
|
|
91
|
+
const matches = scope.contextIdPrefixes.some(prefix => contextId === prefix || contextId.startsWith(prefix + '/'));
|
|
92
|
+
if (!matches) {
|
|
93
|
+
return false;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return true;
|
|
97
|
+
}
|
|
98
98
|
export class SyncEngineLevel {
|
|
99
99
|
constructor({ agent, dataPath, db }) {
|
|
100
100
|
this._syncLock = false;
|
|
101
|
+
/**
|
|
102
|
+
* In-memory cache of active links, keyed by `{did}^{dwnUrl}^{protocol}`.
|
|
103
|
+
* Populated from the ledger on `startLiveSync`, used by subscription handlers
|
|
104
|
+
* to avoid async ledger lookups on every event.
|
|
105
|
+
*/
|
|
106
|
+
this._activeLinks = new Map();
|
|
107
|
+
/**
|
|
108
|
+
* Per-link in-memory delivery-order tracking for the pull path. Keyed by
|
|
109
|
+
* the same link key as `_activeLinks`. Not persisted — on crash, replay
|
|
110
|
+
* restarts from `contiguousAppliedToken` and idempotent apply handles
|
|
111
|
+
* re-delivered events.
|
|
112
|
+
*/
|
|
113
|
+
this._linkRuntimes = new Map();
|
|
101
114
|
// ---------------------------------------------------------------------------
|
|
102
115
|
// Live sync state
|
|
103
116
|
// ---------------------------------------------------------------------------
|
|
104
117
|
/** Current sync mode, set by `startSync`. */
|
|
105
118
|
this._syncMode = 'poll';
|
|
119
|
+
/**
|
|
120
|
+
* Monotonic session generation counter. Incremented on every teardown.
|
|
121
|
+
* Async operations (repair, retry timers) capture the generation at start
|
|
122
|
+
* and bail if it has changed — this prevents stale work from mutating
|
|
123
|
+
* state after teardown or mode switch.
|
|
124
|
+
*/
|
|
125
|
+
this._syncGeneration = 0;
|
|
106
126
|
/** Active live pull subscriptions (remote -> local via MessagesSubscribe). */
|
|
107
127
|
this._liveSubscriptions = [];
|
|
108
128
|
/** Active local EventLog subscriptions for push-on-write (local -> remote). */
|
|
109
129
|
this._localSubscriptions = [];
|
|
110
130
|
/** Connectivity state derived from subscription health. */
|
|
111
131
|
this._connectivityState = 'unknown';
|
|
112
|
-
/**
|
|
132
|
+
/** Registered event listeners for observability. */
|
|
133
|
+
this._eventListeners = new Set();
|
|
134
|
+
/** Entry in the pending push queue — a message CID with its local EventLog token. */
|
|
113
135
|
this._pendingPushCids = new Map();
|
|
136
|
+
/**
|
|
137
|
+
* CIDs recently received via pull subscription, keyed by `cid|dwnUrl` to
|
|
138
|
+
* scope suppression per remote endpoint. A message pulled from Provider A
|
|
139
|
+
* is only suppressed for push back to Provider A — it still fans out to
|
|
140
|
+
* Provider B and C. TTL: 60 seconds. Cap: 10,000 entries.
|
|
141
|
+
*/
|
|
142
|
+
this._recentlyPulledCids = new Map();
|
|
143
|
+
/**
|
|
144
|
+
* Per-tenant closure evaluation contexts for the current live sync session.
|
|
145
|
+
* Caches ProtocolsConfigure and grant lookups across events for the same
|
|
146
|
+
* tenant. Keyed by tenantDid to prevent cross-tenant cache pollution.
|
|
147
|
+
*/
|
|
148
|
+
this._closureContexts = new Map();
|
|
114
149
|
/** Count of consecutive SMT sync failures (for backoff in poll mode). */
|
|
115
150
|
this._consecutiveFailures = 0;
|
|
151
|
+
/** Per-link degraded-poll interval timers. */
|
|
152
|
+
this._degradedPollTimers = new Map();
|
|
153
|
+
/** Per-link repair attempt counters. */
|
|
154
|
+
this._repairAttempts = new Map();
|
|
155
|
+
/** Per-link active repair promises — prevents concurrent repair for the same link. */
|
|
156
|
+
this._activeRepairs = new Map();
|
|
157
|
+
/** Per-link retry timers for failed repairs below max attempts. */
|
|
158
|
+
this._repairRetryTimers = new Map();
|
|
159
|
+
/**
|
|
160
|
+
* Per-link repair context — stores ProgressGap metadata for use during
|
|
161
|
+
* repair. The `resumeToken` (from `gapInfo.latestAvailable`) is used as
|
|
162
|
+
* the post-repair checkpoint so the reopened subscription replays from
|
|
163
|
+
* a valid boundary instead of starting live-only.
|
|
164
|
+
*/
|
|
165
|
+
this._repairContext = new Map();
|
|
116
166
|
this._agent = agent;
|
|
117
167
|
this._permissionsApi = new AgentPermissionsApi({ agent: agent });
|
|
118
168
|
this._db = (db) ? db : new Level(dataPath !== null && dataPath !== void 0 ? dataPath : 'DATA/AGENT/SYNC_STORE');
|
|
119
169
|
}
|
|
170
|
+
/** Lazy accessor for the replication ledger. */
|
|
171
|
+
get ledger() {
|
|
172
|
+
if (!this._ledger) {
|
|
173
|
+
this._ledger = new ReplicationLedger(this._db);
|
|
174
|
+
}
|
|
175
|
+
return this._ledger;
|
|
176
|
+
}
|
|
120
177
|
/**
|
|
121
178
|
* Retrieves the `EnboxPlatformAgent` execution context.
|
|
122
179
|
*
|
|
@@ -134,7 +191,44 @@ export class SyncEngineLevel {
|
|
|
134
191
|
this._permissionsApi = new AgentPermissionsApi({ agent: agent });
|
|
135
192
|
}
|
|
136
193
|
get connectivityState() {
|
|
137
|
-
|
|
194
|
+
// Aggregate per-link connectivity: if any link is online, report online.
|
|
195
|
+
// If all are offline, report offline. If all unknown, report unknown.
|
|
196
|
+
// Falls back to the global _connectivityState for poll-mode (no active links).
|
|
197
|
+
if (this._activeLinks.size === 0) {
|
|
198
|
+
return this._connectivityState;
|
|
199
|
+
}
|
|
200
|
+
let hasOnline = false;
|
|
201
|
+
let hasOffline = false;
|
|
202
|
+
for (const link of this._activeLinks.values()) {
|
|
203
|
+
if (link.connectivity === 'online') {
|
|
204
|
+
hasOnline = true;
|
|
205
|
+
}
|
|
206
|
+
if (link.connectivity === 'offline') {
|
|
207
|
+
hasOffline = true;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
if (hasOnline) {
|
|
211
|
+
return 'online';
|
|
212
|
+
}
|
|
213
|
+
if (hasOffline) {
|
|
214
|
+
return 'offline';
|
|
215
|
+
}
|
|
216
|
+
return 'unknown';
|
|
217
|
+
}
|
|
218
|
+
on(listener) {
|
|
219
|
+
this._eventListeners.add(listener);
|
|
220
|
+
return () => { this._eventListeners.delete(listener); };
|
|
221
|
+
}
|
|
222
|
+
/** Emit a sync event to all registered listeners. */
|
|
223
|
+
emitEvent(event) {
|
|
224
|
+
for (const listener of this._eventListeners) {
|
|
225
|
+
try {
|
|
226
|
+
listener(event);
|
|
227
|
+
}
|
|
228
|
+
catch (_a) {
|
|
229
|
+
// Don't let listener errors propagate into sync engine logic.
|
|
230
|
+
}
|
|
231
|
+
}
|
|
138
232
|
}
|
|
139
233
|
clear() {
|
|
140
234
|
return __awaiter(this, void 0, void 0, function* () {
|
|
@@ -404,15 +498,69 @@ export class SyncEngineLevel {
|
|
|
404
498
|
catch (error) {
|
|
405
499
|
console.error('SyncEngineLevel: Error during initial live-sync catch-up', error);
|
|
406
500
|
}
|
|
407
|
-
// Step 2:
|
|
501
|
+
// Step 2: Initialize replication links and open live subscriptions.
|
|
408
502
|
const syncTargets = yield this.getSyncTargets();
|
|
409
503
|
for (const target of syncTargets) {
|
|
504
|
+
let link;
|
|
410
505
|
try {
|
|
506
|
+
// Get or create the link in the durable ledger.
|
|
507
|
+
// Use protocol-scoped scope when a protocol is specified, otherwise full-tenant.
|
|
508
|
+
const linkScope = target.protocol
|
|
509
|
+
? { kind: 'protocol', protocol: target.protocol }
|
|
510
|
+
: { kind: 'full' };
|
|
511
|
+
link = yield this.ledger.getOrCreateLink({
|
|
512
|
+
tenantDid: target.did,
|
|
513
|
+
remoteEndpoint: target.dwnUrl,
|
|
514
|
+
scope: linkScope,
|
|
515
|
+
delegateDid: target.delegateDid,
|
|
516
|
+
protocol: target.protocol,
|
|
517
|
+
});
|
|
518
|
+
// Cache the link for fast access by subscription handlers.
|
|
519
|
+
const linkKey = this.buildCursorKey(target.did, target.dwnUrl, target.protocol);
|
|
520
|
+
this._activeLinks.set(linkKey, link);
|
|
521
|
+
// Open subscriptions — only transition to live if both succeed.
|
|
522
|
+
// If pull succeeds but push fails, close the pull subscription to
|
|
523
|
+
// avoid a resource leak with inconsistent state.
|
|
411
524
|
yield this.openLivePullSubscription(target);
|
|
412
|
-
|
|
525
|
+
try {
|
|
526
|
+
yield this.openLocalPushSubscription(target);
|
|
527
|
+
}
|
|
528
|
+
catch (pushError) {
|
|
529
|
+
// Close the already-opened pull subscription.
|
|
530
|
+
const pullSub = this._liveSubscriptions.find(s => s.did === target.did && s.dwnUrl === target.dwnUrl && s.protocol === target.protocol);
|
|
531
|
+
if (pullSub) {
|
|
532
|
+
try {
|
|
533
|
+
yield pullSub.close();
|
|
534
|
+
}
|
|
535
|
+
catch ( /* best effort */_a) { /* best effort */ }
|
|
536
|
+
this._liveSubscriptions = this._liveSubscriptions.filter(s => s !== pullSub);
|
|
537
|
+
}
|
|
538
|
+
throw pushError;
|
|
539
|
+
}
|
|
540
|
+
this.emitEvent({ type: 'link:status-change', tenantDid: target.did, remoteEndpoint: target.dwnUrl, protocol: target.protocol, from: 'initializing', to: 'live' });
|
|
541
|
+
yield this.ledger.setStatus(link, 'live');
|
|
413
542
|
}
|
|
414
543
|
catch (error) {
|
|
544
|
+
const linkKey = this.buildCursorKey(target.did, target.dwnUrl, target.protocol);
|
|
545
|
+
// Detect ProgressGap (410) — the cursor is stale, link needs SMT repair.
|
|
546
|
+
if (error.isProgressGap && link) {
|
|
547
|
+
console.warn(`SyncEngineLevel: ProgressGap detected for ${target.did} -> ${target.dwnUrl}, initiating repair`);
|
|
548
|
+
this.emitEvent({ type: 'gap:detected', tenantDid: target.did, remoteEndpoint: target.dwnUrl, protocol: target.protocol, reason: 'ProgressGap' });
|
|
549
|
+
const gapInfo = error.gapInfo;
|
|
550
|
+
yield this.transitionToRepairing(linkKey, link, {
|
|
551
|
+
resumeToken: gapInfo === null || gapInfo === void 0 ? void 0 : gapInfo.latestAvailable,
|
|
552
|
+
});
|
|
553
|
+
continue;
|
|
554
|
+
}
|
|
415
555
|
console.error(`SyncEngineLevel: Failed to open live subscription for ${target.did} -> ${target.dwnUrl}`, error);
|
|
556
|
+
// Clean up in-memory state for the failed link so it doesn't appear
|
|
557
|
+
// active to later code. The durable link remains at 'initializing'.
|
|
558
|
+
this._activeLinks.delete(linkKey);
|
|
559
|
+
this._linkRuntimes.delete(linkKey);
|
|
560
|
+
// Recompute connectivity — if no live subscriptions remain, reset to unknown.
|
|
561
|
+
if (this._liveSubscriptions.length === 0) {
|
|
562
|
+
this._connectivityState = 'unknown';
|
|
563
|
+
}
|
|
416
564
|
}
|
|
417
565
|
}
|
|
418
566
|
// Step 3: Schedule infrequent SMT integrity check.
|
|
@@ -430,11 +578,385 @@ export class SyncEngineLevel {
|
|
|
430
578
|
this._syncIntervalId = setInterval(integrityCheck, intervalMilliseconds);
|
|
431
579
|
});
|
|
432
580
|
}
|
|
581
|
+
/**
|
|
582
|
+
* Get or create the runtime state for a link.
|
|
583
|
+
*/
|
|
584
|
+
getOrCreateRuntime(linkKey) {
|
|
585
|
+
let rt = this._linkRuntimes.get(linkKey);
|
|
586
|
+
if (!rt) {
|
|
587
|
+
rt = { nextDeliveryOrdinal: 0, nextCommitOrdinal: 0, inflight: new Map() };
|
|
588
|
+
this._linkRuntimes.set(linkKey, rt);
|
|
589
|
+
}
|
|
590
|
+
return rt;
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* Drain contiguously committed ordinals from the runtime state, advancing
|
|
594
|
+
* the link's pull checkpoint for each drained entry. Returns the number of
|
|
595
|
+
* entries drained (0 if the next ordinal is not yet committed).
|
|
596
|
+
*/
|
|
597
|
+
drainCommittedPull(linkKey) {
|
|
598
|
+
const rt = this._linkRuntimes.get(linkKey);
|
|
599
|
+
const link = this._activeLinks.get(linkKey);
|
|
600
|
+
if (!rt || !link) {
|
|
601
|
+
return 0;
|
|
602
|
+
}
|
|
603
|
+
let drained = 0;
|
|
604
|
+
while (true) {
|
|
605
|
+
const entry = rt.inflight.get(rt.nextCommitOrdinal);
|
|
606
|
+
if (!entry || !entry.committed) {
|
|
607
|
+
break;
|
|
608
|
+
}
|
|
609
|
+
// This ordinal is committed — advance the durable checkpoint.
|
|
610
|
+
ReplicationLedger.commitContiguousToken(link.pull, entry.token);
|
|
611
|
+
ReplicationLedger.setReceivedToken(link.pull, entry.token);
|
|
612
|
+
rt.inflight.delete(rt.nextCommitOrdinal);
|
|
613
|
+
rt.nextCommitOrdinal++;
|
|
614
|
+
drained++;
|
|
615
|
+
// Note: checkpoint:pull-advance event is emitted AFTER saveLink succeeds
|
|
616
|
+
// in the caller, not here. "Advanced" means durably persisted.
|
|
617
|
+
}
|
|
618
|
+
return drained;
|
|
619
|
+
}
|
|
620
|
+
/**
|
|
621
|
+
* Central helper for transitioning a link to `repairing`. Encapsulates:
|
|
622
|
+
* - status change
|
|
623
|
+
* - optional gap context storage
|
|
624
|
+
* - repair kick-off with retry scheduling on failure
|
|
625
|
+
*
|
|
626
|
+
* All code paths that set `repairing` should go through this helper to
|
|
627
|
+
* guarantee a future retry path.
|
|
628
|
+
*/
|
|
629
|
+
transitionToRepairing(linkKey, link, options) {
|
|
630
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
631
|
+
const prevStatus = link.status;
|
|
632
|
+
const prevConnectivity = link.connectivity;
|
|
633
|
+
link.connectivity = 'offline';
|
|
634
|
+
yield this.ledger.setStatus(link, 'repairing');
|
|
635
|
+
this.emitEvent({ type: 'link:status-change', tenantDid: link.tenantDid, remoteEndpoint: link.remoteEndpoint, protocol: link.protocol, from: prevStatus, to: 'repairing' });
|
|
636
|
+
if (prevConnectivity !== 'offline') {
|
|
637
|
+
this.emitEvent({ type: 'link:connectivity-change', tenantDid: link.tenantDid, remoteEndpoint: link.remoteEndpoint, protocol: link.protocol, from: prevConnectivity, to: 'offline' });
|
|
638
|
+
}
|
|
639
|
+
if (options === null || options === void 0 ? void 0 : options.resumeToken) {
|
|
640
|
+
this._repairContext.set(linkKey, { resumeToken: options.resumeToken });
|
|
641
|
+
}
|
|
642
|
+
// Clear runtime ordinals immediately — stale state must not linger
|
|
643
|
+
// across repair attempts.
|
|
644
|
+
const rt = this._linkRuntimes.get(linkKey);
|
|
645
|
+
if (rt) {
|
|
646
|
+
rt.inflight.clear();
|
|
647
|
+
rt.nextCommitOrdinal = rt.nextDeliveryOrdinal;
|
|
648
|
+
}
|
|
649
|
+
// Kick off repair with retry scheduling on failure.
|
|
650
|
+
void this.repairLink(linkKey).catch(() => {
|
|
651
|
+
this.scheduleRepairRetry(linkKey);
|
|
652
|
+
});
|
|
653
|
+
});
|
|
654
|
+
}
|
|
655
|
+
/**
|
|
656
|
+
* Schedule a retry for a failed repair. Uses exponential backoff.
|
|
657
|
+
* No-op if the link is already in `degraded_poll` (timer loop owns retries)
|
|
658
|
+
* or if a retry is already scheduled.
|
|
659
|
+
*/
|
|
660
|
+
scheduleRepairRetry(linkKey) {
|
|
661
|
+
var _a;
|
|
662
|
+
// Don't schedule if already in degraded_poll or retry pending.
|
|
663
|
+
const link = this._activeLinks.get(linkKey);
|
|
664
|
+
if (!link || link.status === 'degraded_poll') {
|
|
665
|
+
return;
|
|
666
|
+
}
|
|
667
|
+
if (this._repairRetryTimers.has(linkKey)) {
|
|
668
|
+
return;
|
|
669
|
+
}
|
|
670
|
+
// attempts is already post-increment from doRepairLink, so subtract 1
|
|
671
|
+
// for the backoff index: first failure (attempts=1) → backoff[0]=1s.
|
|
672
|
+
const attempts = (_a = this._repairAttempts.get(linkKey)) !== null && _a !== void 0 ? _a : 1;
|
|
673
|
+
const backoff = SyncEngineLevel.REPAIR_BACKOFF_MS;
|
|
674
|
+
const delayMs = backoff[Math.min(attempts - 1, backoff.length - 1)];
|
|
675
|
+
const timerGeneration = this._syncGeneration;
|
|
676
|
+
const timer = setTimeout(() => __awaiter(this, void 0, void 0, function* () {
|
|
677
|
+
this._repairRetryTimers.delete(linkKey);
|
|
678
|
+
// Bail if teardown occurred since this timer was scheduled.
|
|
679
|
+
if (this._syncGeneration !== timerGeneration) {
|
|
680
|
+
return;
|
|
681
|
+
}
|
|
682
|
+
// Verify link still exists and is still repairing.
|
|
683
|
+
const currentLink = this._activeLinks.get(linkKey);
|
|
684
|
+
if (!currentLink || currentLink.status !== 'repairing') {
|
|
685
|
+
return;
|
|
686
|
+
}
|
|
687
|
+
try {
|
|
688
|
+
yield this.repairLink(linkKey);
|
|
689
|
+
}
|
|
690
|
+
catch (_a) {
|
|
691
|
+
// repairLink handles max attempts → degraded_poll internally.
|
|
692
|
+
// If still below max, schedule another retry.
|
|
693
|
+
if (currentLink.status === 'repairing') {
|
|
694
|
+
this.scheduleRepairRetry(linkKey);
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
}), delayMs);
|
|
698
|
+
this._repairRetryTimers.set(linkKey, timer);
|
|
699
|
+
}
|
|
700
|
+
/**
|
|
701
|
+
* Repair a single link. Deduplicates concurrent calls via `_activeRepairs`.
|
|
702
|
+
* If repair is already running for this link, returns the existing promise.
|
|
703
|
+
*/
|
|
704
|
+
repairLink(linkKey) {
|
|
705
|
+
const existing = this._activeRepairs.get(linkKey);
|
|
706
|
+
if (existing) {
|
|
707
|
+
return existing;
|
|
708
|
+
}
|
|
709
|
+
const promise = this.doRepairLink(linkKey).finally(() => {
|
|
710
|
+
this._activeRepairs.delete(linkKey);
|
|
711
|
+
});
|
|
712
|
+
this._activeRepairs.set(linkKey, promise);
|
|
713
|
+
return promise;
|
|
714
|
+
}
|
|
715
|
+
/**
|
|
716
|
+
* Internal repair implementation. Runs SMT set reconciliation for a single
|
|
717
|
+
* link, then attempts to re-establish live subscriptions. If repair succeeds,
|
|
718
|
+
* transitions to `live`. If it fails, throws so callers (degraded_poll timer,
|
|
719
|
+
* startup) can handle retry scheduling.
|
|
720
|
+
*/
|
|
721
|
+
doRepairLink(linkKey) {
|
|
722
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
723
|
+
var _a, _b, _c, _d;
|
|
724
|
+
const link = this._activeLinks.get(linkKey);
|
|
725
|
+
if (!link) {
|
|
726
|
+
return;
|
|
727
|
+
}
|
|
728
|
+
// Capture the sync generation at repair start. If teardown occurs during
|
|
729
|
+
// any await, the generation will have incremented and we bail before
|
|
730
|
+
// mutating state — preventing the race where repair continues after teardown.
|
|
731
|
+
const generation = this._syncGeneration;
|
|
732
|
+
const { tenantDid: did, remoteEndpoint: dwnUrl, delegateDid, protocol } = link;
|
|
733
|
+
this.emitEvent({ type: 'repair:started', tenantDid: did, remoteEndpoint: dwnUrl, protocol, attempt: ((_a = this._repairAttempts.get(linkKey)) !== null && _a !== void 0 ? _a : 0) + 1 });
|
|
734
|
+
const attempts = ((_b = this._repairAttempts.get(linkKey)) !== null && _b !== void 0 ? _b : 0) + 1;
|
|
735
|
+
this._repairAttempts.set(linkKey, attempts);
|
|
736
|
+
// Step 1: Close existing subscriptions FIRST to stop old events from
|
|
737
|
+
// mutating local state while repair runs.
|
|
738
|
+
yield this.closeLinkSubscriptions(link);
|
|
739
|
+
if (this._syncGeneration !== generation) {
|
|
740
|
+
return;
|
|
741
|
+
} // Teardown occurred.
|
|
742
|
+
// Step 2: Clear runtime ordinals immediately — stale state must not
|
|
743
|
+
// persist across repair attempts (successful or failed).
|
|
744
|
+
const rt = this.getOrCreateRuntime(linkKey);
|
|
745
|
+
rt.inflight.clear();
|
|
746
|
+
rt.nextDeliveryOrdinal = 0;
|
|
747
|
+
rt.nextCommitOrdinal = 0;
|
|
748
|
+
try {
|
|
749
|
+
// Step 3: Run SMT reconciliation for this link.
|
|
750
|
+
const localRoot = yield this.getLocalRoot(did, delegateDid, protocol);
|
|
751
|
+
if (this._syncGeneration !== generation) {
|
|
752
|
+
return;
|
|
753
|
+
}
|
|
754
|
+
const remoteRoot = yield this.getRemoteRoot(did, dwnUrl, delegateDid, protocol);
|
|
755
|
+
if (this._syncGeneration !== generation) {
|
|
756
|
+
return;
|
|
757
|
+
}
|
|
758
|
+
if (localRoot !== remoteRoot) {
|
|
759
|
+
const diff = yield this.diffWithRemote({ did, dwnUrl, delegateDid, protocol });
|
|
760
|
+
if (this._syncGeneration !== generation) {
|
|
761
|
+
return;
|
|
762
|
+
}
|
|
763
|
+
if (diff.onlyRemote.length > 0) {
|
|
764
|
+
const prefetched = [];
|
|
765
|
+
const needsFetchCids = [];
|
|
766
|
+
for (const entry of diff.onlyRemote) {
|
|
767
|
+
if (!entry.message || (entry.message.descriptor.interface === 'Records' &&
|
|
768
|
+
entry.message.descriptor.method === 'Write' &&
|
|
769
|
+
entry.message.descriptor.dataCid && !entry.encodedData)) {
|
|
770
|
+
needsFetchCids.push(entry.messageCid);
|
|
771
|
+
}
|
|
772
|
+
else {
|
|
773
|
+
prefetched.push(entry);
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
yield this.pullMessages({ did, dwnUrl, delegateDid, protocol, messageCids: needsFetchCids, prefetched });
|
|
777
|
+
if (this._syncGeneration !== generation) {
|
|
778
|
+
return;
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
if (diff.onlyLocal.length > 0) {
|
|
782
|
+
yield this.pushMessages({ did, dwnUrl, delegateDid, protocol, messageCids: diff.onlyLocal });
|
|
783
|
+
if (this._syncGeneration !== generation) {
|
|
784
|
+
return;
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
// Step 4: Determine the post-repair resume token.
|
|
789
|
+
// - If repair was triggered by ProgressGap, use the stored resumeToken
|
|
790
|
+
// (from gapInfo.latestAvailable) so the reopened subscription replays
|
|
791
|
+
// from a valid boundary, closing the race window between SMT and resubscribe.
|
|
792
|
+
// - Otherwise, use the existing contiguousAppliedToken if still valid.
|
|
793
|
+
// - Push checkpoint is NOT reset during repair: push frontier tracks what
|
|
794
|
+
// the local EventLog has delivered to the remote. SMT repair handles
|
|
795
|
+
// pull-side convergence; push-side convergence is handled by the diff's
|
|
796
|
+
// onlyLocal push. The push checkpoint remains the local authority.
|
|
797
|
+
const repairCtx = this._repairContext.get(linkKey);
|
|
798
|
+
const resumeToken = (_c = repairCtx === null || repairCtx === void 0 ? void 0 : repairCtx.resumeToken) !== null && _c !== void 0 ? _c : link.pull.contiguousAppliedToken;
|
|
799
|
+
ReplicationLedger.resetCheckpoint(link.pull, resumeToken);
|
|
800
|
+
yield this.ledger.saveLink(link);
|
|
801
|
+
if (this._syncGeneration !== generation) {
|
|
802
|
+
return;
|
|
803
|
+
}
|
|
804
|
+
// Step 5: Reopen subscriptions with the repaired checkpoints.
|
|
805
|
+
const target = { did, dwnUrl, delegateDid, protocol };
|
|
806
|
+
yield this.openLivePullSubscription(target);
|
|
807
|
+
if (this._syncGeneration !== generation) {
|
|
808
|
+
return;
|
|
809
|
+
}
|
|
810
|
+
try {
|
|
811
|
+
yield this.openLocalPushSubscription(Object.assign(Object.assign({}, target), { pushCursor: link.push.contiguousAppliedToken }));
|
|
812
|
+
}
|
|
813
|
+
catch (pushError) {
|
|
814
|
+
const pullSub = this._liveSubscriptions.find(s => s.did === did && s.dwnUrl === dwnUrl && s.protocol === protocol);
|
|
815
|
+
if (pullSub) {
|
|
816
|
+
try {
|
|
817
|
+
yield pullSub.close();
|
|
818
|
+
}
|
|
819
|
+
catch ( /* best effort */_e) { /* best effort */ }
|
|
820
|
+
this._liveSubscriptions = this._liveSubscriptions.filter(s => s !== pullSub);
|
|
821
|
+
}
|
|
822
|
+
throw pushError;
|
|
823
|
+
}
|
|
824
|
+
if (this._syncGeneration !== generation) {
|
|
825
|
+
return;
|
|
826
|
+
}
|
|
827
|
+
// Step 6: Clean up repair context and transition to live.
|
|
828
|
+
this._repairContext.delete(linkKey);
|
|
829
|
+
this._repairAttempts.delete(linkKey);
|
|
830
|
+
const retryTimer = this._repairRetryTimers.get(linkKey);
|
|
831
|
+
if (retryTimer) {
|
|
832
|
+
clearTimeout(retryTimer);
|
|
833
|
+
this._repairRetryTimers.delete(linkKey);
|
|
834
|
+
}
|
|
835
|
+
const prevRepairConnectivity = link.connectivity;
|
|
836
|
+
link.connectivity = 'online';
|
|
837
|
+
yield this.ledger.setStatus(link, 'live');
|
|
838
|
+
this.emitEvent({ type: 'repair:completed', tenantDid: did, remoteEndpoint: dwnUrl, protocol });
|
|
839
|
+
if (prevRepairConnectivity !== 'online') {
|
|
840
|
+
this.emitEvent({ type: 'link:connectivity-change', tenantDid: did, remoteEndpoint: dwnUrl, protocol, from: prevRepairConnectivity, to: 'online' });
|
|
841
|
+
}
|
|
842
|
+
this.emitEvent({ type: 'link:status-change', tenantDid: did, remoteEndpoint: dwnUrl, protocol, from: 'repairing', to: 'live' });
|
|
843
|
+
}
|
|
844
|
+
catch (error) {
|
|
845
|
+
// If teardown occurred during repair, don't retry or enter degraded_poll.
|
|
846
|
+
if (this._syncGeneration !== generation) {
|
|
847
|
+
return;
|
|
848
|
+
}
|
|
849
|
+
console.error(`SyncEngineLevel: Repair failed for ${did} -> ${dwnUrl} (attempt ${attempts})`, error);
|
|
850
|
+
this.emitEvent({ type: 'repair:failed', tenantDid: did, remoteEndpoint: dwnUrl, protocol, attempt: attempts, error: String((_d = error.message) !== null && _d !== void 0 ? _d : error) });
|
|
851
|
+
if (attempts >= SyncEngineLevel.MAX_REPAIR_ATTEMPTS) {
|
|
852
|
+
console.warn(`SyncEngineLevel: Max repair attempts reached for ${did} -> ${dwnUrl}, entering degraded_poll`);
|
|
853
|
+
yield this.enterDegradedPoll(linkKey);
|
|
854
|
+
return;
|
|
855
|
+
}
|
|
856
|
+
// Re-throw so callers (degraded_poll timer) can handle retry scheduling.
|
|
857
|
+
throw error;
|
|
858
|
+
}
|
|
859
|
+
});
|
|
860
|
+
}
|
|
861
|
+
/**
|
|
862
|
+
* Close pull and push subscriptions for a specific link.
|
|
863
|
+
*/
|
|
864
|
+
closeLinkSubscriptions(link) {
|
|
865
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
866
|
+
const { tenantDid: did, remoteEndpoint: dwnUrl, protocol } = link;
|
|
867
|
+
// Close pull subscription.
|
|
868
|
+
const pullSub = this._liveSubscriptions.find(s => s.did === did && s.dwnUrl === dwnUrl && s.protocol === protocol);
|
|
869
|
+
if (pullSub) {
|
|
870
|
+
try {
|
|
871
|
+
yield pullSub.close();
|
|
872
|
+
}
|
|
873
|
+
catch ( /* best effort */_a) { /* best effort */ }
|
|
874
|
+
this._liveSubscriptions = this._liveSubscriptions.filter(s => s !== pullSub);
|
|
875
|
+
}
|
|
876
|
+
// Close local push subscription.
|
|
877
|
+
const pushSub = this._localSubscriptions.find(s => s.did === did && s.dwnUrl === dwnUrl && s.protocol === protocol);
|
|
878
|
+
if (pushSub) {
|
|
879
|
+
try {
|
|
880
|
+
yield pushSub.close();
|
|
881
|
+
}
|
|
882
|
+
catch ( /* best effort */_b) { /* best effort */ }
|
|
883
|
+
this._localSubscriptions = this._localSubscriptions.filter(s => s !== pushSub);
|
|
884
|
+
}
|
|
885
|
+
});
|
|
886
|
+
}
|
|
887
|
+
/**
|
|
888
|
+
* Transition a link to `degraded_poll` and start a per-link polling timer.
|
|
889
|
+
* The timer runs SMT reconciliation at a reduced frequency (30s with jitter)
|
|
890
|
+
* and attempts to re-establish live subscriptions after each successful repair.
|
|
891
|
+
*/
|
|
892
|
+
enterDegradedPoll(linkKey) {
|
|
893
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
894
|
+
const link = this._activeLinks.get(linkKey);
|
|
895
|
+
if (!link) {
|
|
896
|
+
return;
|
|
897
|
+
}
|
|
898
|
+
link.connectivity = 'offline';
|
|
899
|
+
const prevDegradedStatus = link.status;
|
|
900
|
+
yield this.ledger.setStatus(link, 'degraded_poll');
|
|
901
|
+
this._repairAttempts.delete(linkKey);
|
|
902
|
+
this.emitEvent({ type: 'link:status-change', tenantDid: link.tenantDid, remoteEndpoint: link.remoteEndpoint, protocol: link.protocol, from: prevDegradedStatus, to: 'degraded_poll' });
|
|
903
|
+
this.emitEvent({ type: 'degraded-poll:entered', tenantDid: link.tenantDid, remoteEndpoint: link.remoteEndpoint, protocol: link.protocol });
|
|
904
|
+
// Clear any existing timer for this link.
|
|
905
|
+
const existing = this._degradedPollTimers.get(linkKey);
|
|
906
|
+
if (existing) {
|
|
907
|
+
clearInterval(existing);
|
|
908
|
+
}
|
|
909
|
+
// Schedule per-link polling with jitter (15-30 seconds).
|
|
910
|
+
const baseInterval = 15000;
|
|
911
|
+
const jitter = Math.floor(Math.random() * 15000);
|
|
912
|
+
const interval = baseInterval + jitter;
|
|
913
|
+
const pollGeneration = this._syncGeneration;
|
|
914
|
+
const timer = setInterval(() => __awaiter(this, void 0, void 0, function* () {
|
|
915
|
+
// Bail if teardown occurred since this timer was created.
|
|
916
|
+
if (this._syncGeneration !== pollGeneration) {
|
|
917
|
+
clearInterval(timer);
|
|
918
|
+
this._degradedPollTimers.delete(linkKey);
|
|
919
|
+
return;
|
|
920
|
+
}
|
|
921
|
+
// If the link was transitioned out of degraded_poll externally (e.g.,
|
|
922
|
+
// by teardown or manual intervention), stop polling.
|
|
923
|
+
if (link.status !== 'degraded_poll') {
|
|
924
|
+
clearInterval(timer);
|
|
925
|
+
this._degradedPollTimers.delete(linkKey);
|
|
926
|
+
return;
|
|
927
|
+
}
|
|
928
|
+
try {
|
|
929
|
+
// Attempt repair. Reset attempt counter so repairLink doesn't
|
|
930
|
+
// immediately re-enter degraded_poll on failure.
|
|
931
|
+
this._repairAttempts.set(linkKey, 0);
|
|
932
|
+
yield this.ledger.setStatus(link, 'repairing');
|
|
933
|
+
yield this.repairLink(linkKey);
|
|
934
|
+
// If repairLink succeeded, link is now 'live' — stop polling.
|
|
935
|
+
if (link.status === 'live') {
|
|
936
|
+
clearInterval(timer);
|
|
937
|
+
this._degradedPollTimers.delete(linkKey);
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
catch (_a) {
|
|
941
|
+
// Repair failed — restore degraded_poll status so the timer continues.
|
|
942
|
+
// This is critical: repairLink sets status to 'repairing' internally,
|
|
943
|
+
// and if we don't restore degraded_poll, the next tick would see
|
|
944
|
+
// status !== 'degraded_poll' and stop the timer permanently.
|
|
945
|
+
yield this.ledger.setStatus(link, 'degraded_poll');
|
|
946
|
+
}
|
|
947
|
+
}), interval);
|
|
948
|
+
this._degradedPollTimers.set(linkKey, timer);
|
|
949
|
+
});
|
|
950
|
+
}
|
|
433
951
|
/**
|
|
434
952
|
* Tears down all live subscriptions and push listeners.
|
|
435
953
|
*/
|
|
436
954
|
teardownLiveSync() {
|
|
437
955
|
return __awaiter(this, void 0, void 0, function* () {
|
|
956
|
+
// Increment generation to invalidate all in-flight async operations
|
|
957
|
+
// (repairs, retry timers, degraded-poll ticks). Any async work that
|
|
958
|
+
// captured the previous generation will bail on its next checkpoint.
|
|
959
|
+
this._syncGeneration++;
|
|
438
960
|
// Clear the push debounce timer.
|
|
439
961
|
if (this._pushDebounceTimer) {
|
|
440
962
|
clearTimeout(this._pushDebounceTimer);
|
|
@@ -462,6 +984,23 @@ export class SyncEngineLevel {
|
|
|
462
984
|
}
|
|
463
985
|
}
|
|
464
986
|
this._localSubscriptions = [];
|
|
987
|
+
// Clear degraded-poll timers and repair state.
|
|
988
|
+
for (const timer of this._degradedPollTimers.values()) {
|
|
989
|
+
clearInterval(timer);
|
|
990
|
+
}
|
|
991
|
+
this._degradedPollTimers.clear();
|
|
992
|
+
this._repairAttempts.clear();
|
|
993
|
+
this._activeRepairs.clear();
|
|
994
|
+
for (const timer of this._repairRetryTimers.values()) {
|
|
995
|
+
clearTimeout(timer);
|
|
996
|
+
}
|
|
997
|
+
this._repairRetryTimers.clear();
|
|
998
|
+
this._repairContext.clear();
|
|
999
|
+
// Clear closure evaluation contexts.
|
|
1000
|
+
this._closureContexts.clear();
|
|
1001
|
+
// Clear the in-memory link and runtime state.
|
|
1002
|
+
this._activeLinks.clear();
|
|
1003
|
+
this._linkRuntimes.clear();
|
|
465
1004
|
});
|
|
466
1005
|
}
|
|
467
1006
|
// ---------------------------------------------------------------------------
|
|
@@ -473,47 +1012,118 @@ export class SyncEngineLevel {
|
|
|
473
1012
|
*/
|
|
474
1013
|
openLivePullSubscription(target) {
|
|
475
1014
|
return __awaiter(this, void 0, void 0, function* () {
|
|
1015
|
+
var _a, _b;
|
|
476
1016
|
const { did, delegateDid, dwnUrl, protocol } = target;
|
|
477
|
-
// Resolve the cursor from the
|
|
1017
|
+
// Resolve the cursor from the link's pull checkpoint (preferred) or legacy storage.
|
|
478
1018
|
const cursorKey = this.buildCursorKey(did, dwnUrl, protocol);
|
|
479
|
-
const
|
|
1019
|
+
const link = this._activeLinks.get(cursorKey);
|
|
1020
|
+
const cursor = (_a = link === null || link === void 0 ? void 0 : link.pull.contiguousAppliedToken) !== null && _a !== void 0 ? _a : yield this.getCursor(cursorKey);
|
|
480
1021
|
// Build the MessagesSubscribe filters.
|
|
481
|
-
|
|
1022
|
+
// When the link has protocolPathPrefixes, include them in the filter so the
|
|
1023
|
+
// EventLog delivers only matching events (server-side filtering). This replaces
|
|
1024
|
+
// the less efficient agent-side isEventInScope filtering for the pull path.
|
|
1025
|
+
// Note: only the first prefix is used as the MessagesFilter field because
|
|
1026
|
+
// MessagesFilter.protocolPathPrefix is a single string. Multiple prefixes
|
|
1027
|
+
// would need multiple filters (OR semantics) — for now we use the first one.
|
|
1028
|
+
const protocolPathPrefix = (link === null || link === void 0 ? void 0 : link.scope.kind) === 'protocol'
|
|
1029
|
+
? (_b = link.scope.protocolPathPrefixes) === null || _b === void 0 ? void 0 : _b[0]
|
|
1030
|
+
: undefined;
|
|
1031
|
+
const filters = protocol
|
|
1032
|
+
? [Object.assign({ protocol }, (protocolPathPrefix ? { protocolPathPrefix } : {}))]
|
|
1033
|
+
: [];
|
|
482
1034
|
// Look up permission grant for MessagesSubscribe if using a delegate.
|
|
1035
|
+
// The unified scope matching in AgentPermissionsApi accepts a
|
|
1036
|
+
// Messages.Read grant for MessagesSubscribe requests, so a single
|
|
1037
|
+
// lookup is sufficient.
|
|
483
1038
|
let permissionGrantId;
|
|
484
1039
|
if (delegateDid) {
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
permissionGrantId = grant.grant.id;
|
|
494
|
-
}
|
|
495
|
-
catch (_a) {
|
|
496
|
-
// Fall back to trying MessagesRead which is a unified scope.
|
|
497
|
-
const grant = yield this._permissionsApi.getPermissionForRequest({
|
|
498
|
-
connectedDid: did,
|
|
499
|
-
messageType: DwnInterface.MessagesRead,
|
|
500
|
-
delegateDid,
|
|
501
|
-
protocol,
|
|
502
|
-
cached: true
|
|
503
|
-
});
|
|
504
|
-
permissionGrantId = grant.grant.id;
|
|
505
|
-
}
|
|
1040
|
+
const grant = yield this._permissionsApi.getPermissionForRequest({
|
|
1041
|
+
connectedDid: did,
|
|
1042
|
+
messageType: DwnInterface.MessagesSubscribe,
|
|
1043
|
+
delegateDid,
|
|
1044
|
+
protocol,
|
|
1045
|
+
cached: true
|
|
1046
|
+
});
|
|
1047
|
+
permissionGrantId = grant.grant.id;
|
|
506
1048
|
}
|
|
507
1049
|
// Define the subscription handler that processes incoming events.
|
|
1050
|
+
// NOTE: The WebSocket client fires handlers without awaiting (fire-and-forget),
|
|
1051
|
+
// so multiple handlers can be in-flight concurrently. The ordinal tracker
|
|
1052
|
+
// ensures the checkpoint advances only when all earlier deliveries are committed.
|
|
508
1053
|
const subscriptionHandler = (subMessage) => __awaiter(this, void 0, void 0, function* () {
|
|
509
1054
|
if (subMessage.type === 'eose') {
|
|
510
|
-
// End-of-stored-events — catch-up complete
|
|
511
|
-
|
|
512
|
-
|
|
1055
|
+
// End-of-stored-events — catch-up complete.
|
|
1056
|
+
if (link) {
|
|
1057
|
+
// Guard: if the link transitioned to repairing while catch-up events
|
|
1058
|
+
// were being processed, skip all mutations — repair owns the state now.
|
|
1059
|
+
if (link.status !== 'live' && link.status !== 'initializing') {
|
|
1060
|
+
return;
|
|
1061
|
+
}
|
|
1062
|
+
if (!ReplicationLedger.validateTokenDomain(link.pull, subMessage.cursor)) {
|
|
1063
|
+
console.warn(`SyncEngineLevel: Token domain mismatch on EOSE for ${did} -> ${dwnUrl}, transitioning to repairing`);
|
|
1064
|
+
yield this.transitionToRepairing(cursorKey, link);
|
|
1065
|
+
return;
|
|
1066
|
+
}
|
|
1067
|
+
ReplicationLedger.setReceivedToken(link.pull, subMessage.cursor);
|
|
1068
|
+
// Drain committed entries. Do NOT unconditionally advance to the
|
|
1069
|
+
// EOSE cursor — earlier stored events may still be in-flight
|
|
1070
|
+
// (handlers are fire-and-forget). The checkpoint advances only as
|
|
1071
|
+
// far as the contiguous drain reaches.
|
|
1072
|
+
this.drainCommittedPull(cursorKey);
|
|
1073
|
+
yield this.ledger.saveLink(link);
|
|
1074
|
+
}
|
|
1075
|
+
else {
|
|
1076
|
+
yield this.setCursor(cursorKey, subMessage.cursor);
|
|
1077
|
+
}
|
|
1078
|
+
// Transport is reachable — set connectivity to online.
|
|
1079
|
+
if (link) {
|
|
1080
|
+
const prevEoseConnectivity = link.connectivity;
|
|
1081
|
+
link.connectivity = 'online';
|
|
1082
|
+
if (prevEoseConnectivity !== 'online') {
|
|
1083
|
+
this.emitEvent({ type: 'link:connectivity-change', tenantDid: did, remoteEndpoint: dwnUrl, protocol, from: prevEoseConnectivity, to: 'online' });
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
else {
|
|
1087
|
+
this._connectivityState = 'online';
|
|
1088
|
+
}
|
|
513
1089
|
return;
|
|
514
1090
|
}
|
|
515
1091
|
if (subMessage.type === 'event') {
|
|
516
1092
|
const event = subMessage.event;
|
|
1093
|
+
// Guard: if the link is not live (e.g., repairing, degraded_poll, paused),
|
|
1094
|
+
// skip all processing. Old subscription handlers may still fire after the
|
|
1095
|
+
// link transitions — these events should be ignored entirely, not just
|
|
1096
|
+
// skipped at the checkpoint level.
|
|
1097
|
+
if (link && link.status !== 'live' && link.status !== 'initializing') {
|
|
1098
|
+
return;
|
|
1099
|
+
}
|
|
1100
|
+
// Domain validation: reject tokens from a different stream/epoch.
|
|
1101
|
+
if (link && !ReplicationLedger.validateTokenDomain(link.pull, subMessage.cursor)) {
|
|
1102
|
+
console.warn(`SyncEngineLevel: Token domain mismatch for ${did} -> ${dwnUrl}, transitioning to repairing`);
|
|
1103
|
+
yield this.transitionToRepairing(cursorKey, link);
|
|
1104
|
+
return;
|
|
1105
|
+
}
|
|
1106
|
+
// Subset scope filtering: if the link has protocolPath/contextId prefixes,
|
|
1107
|
+
// skip events that don't match. This is agent-side filtering because
|
|
1108
|
+
// MessagesSubscribe only supports protocol-level filtering today.
|
|
1109
|
+
//
|
|
1110
|
+
// Skipped events MUST advance contiguousAppliedToken — otherwise the
|
|
1111
|
+
// link would replay the same filtered-out events indefinitely after
|
|
1112
|
+
// reconnect/repair. This is safe because the event is intentionally
|
|
1113
|
+
// excluded from this scope and doesn't need processing.
|
|
1114
|
+
if (link && !isEventInScope(event.message, link.scope)) {
|
|
1115
|
+
ReplicationLedger.setReceivedToken(link.pull, subMessage.cursor);
|
|
1116
|
+
ReplicationLedger.commitContiguousToken(link.pull, subMessage.cursor);
|
|
1117
|
+
yield this.ledger.saveLink(link);
|
|
1118
|
+
return;
|
|
1119
|
+
}
|
|
1120
|
+
// Assign a delivery ordinal BEFORE async processing begins.
|
|
1121
|
+
// This captures the delivery order even if processing completes out of order.
|
|
1122
|
+
const rt = link ? this.getOrCreateRuntime(cursorKey) : undefined;
|
|
1123
|
+
const ordinal = rt ? rt.nextDeliveryOrdinal++ : -1;
|
|
1124
|
+
if (rt) {
|
|
1125
|
+
rt.inflight.set(ordinal, { ordinal, token: subMessage.cursor, committed: false });
|
|
1126
|
+
}
|
|
517
1127
|
try {
|
|
518
1128
|
// Extract inline data from the event (available for records <= 30 KB).
|
|
519
1129
|
let dataStream = this.extractDataStream(event);
|
|
@@ -532,12 +1142,90 @@ export class SyncEngineLevel {
|
|
|
532
1142
|
}
|
|
533
1143
|
}
|
|
534
1144
|
yield this.agent.dwn.processRawMessage(did, event.message, { dataStream });
|
|
1145
|
+
// Invalidate closure cache entries that may be affected by this message.
|
|
1146
|
+
// Must run before closure validation so subsequent evaluations in the
|
|
1147
|
+
// same session see the updated local state.
|
|
1148
|
+
const closureCtxForInvalidation = this._closureContexts.get(did);
|
|
1149
|
+
if (closureCtxForInvalidation) {
|
|
1150
|
+
invalidateClosureCache(closureCtxForInvalidation, event.message);
|
|
1151
|
+
}
|
|
1152
|
+
// Closure validation for scoped subset sync (Phase 3).
|
|
1153
|
+
// For protocol-scoped links, verify that all hard dependencies for
|
|
1154
|
+
// this operation are locally present before considering it committed.
|
|
1155
|
+
// Full-tenant scope bypasses this entirely (returns complete with 0 queries).
|
|
1156
|
+
if (link && link.scope.kind === 'protocol') {
|
|
1157
|
+
const messageStore = this.agent.dwn.node.storage.messageStore;
|
|
1158
|
+
let closureCtx = this._closureContexts.get(did);
|
|
1159
|
+
if (!closureCtx) {
|
|
1160
|
+
closureCtx = createClosureContext(did);
|
|
1161
|
+
this._closureContexts.set(did, closureCtx);
|
|
1162
|
+
}
|
|
1163
|
+
const closureResult = yield evaluateClosure(event.message, messageStore, link.scope, closureCtx);
|
|
1164
|
+
if (!closureResult.complete) {
|
|
1165
|
+
console.warn(`SyncEngineLevel: Closure incomplete for ${did} -> ${dwnUrl}: ` +
|
|
1166
|
+
`${closureResult.failure.code} — ${closureResult.failure.detail}`);
|
|
1167
|
+
yield this.transitionToRepairing(cursorKey, link);
|
|
1168
|
+
return;
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1171
|
+
// Squash convergence: processRawMessage triggers the DWN's built-in
|
|
1172
|
+
// squash resumable task (performRecordsSquash) which runs inline and
|
|
1173
|
+
// handles subset consumers correctly:
|
|
1174
|
+
// - If older siblings are locally present → purges them
|
|
1175
|
+
// - If squash arrives before older siblings → backstop rejects them (409)
|
|
1176
|
+
// - If no older siblings are local → no-op (correct)
|
|
1177
|
+
// Both sync orderings (squash-first or siblings-first) converge to
|
|
1178
|
+
// the same final state. No additional sync-engine side-effect is needed.
|
|
1179
|
+
// Track this CID for echo-loop suppression, scoped to the source endpoint.
|
|
1180
|
+
const pulledCid = yield Message.getCid(event.message);
|
|
1181
|
+
this._recentlyPulledCids.set(`${pulledCid}|${dwnUrl}`, Date.now() + SyncEngineLevel.ECHO_SUPPRESS_TTL_MS);
|
|
1182
|
+
this.evictExpiredEchoEntries();
|
|
1183
|
+
// Mark this ordinal as committed and drain the checkpoint.
|
|
1184
|
+
// Guard: if the link transitioned to repairing while this handler was
|
|
1185
|
+
// in-flight (e.g., an earlier ordinal's handler failed concurrently),
|
|
1186
|
+
// skip all state mutations — the repair process owns progression now.
|
|
1187
|
+
if (link && rt && link.status === 'live') {
|
|
1188
|
+
const entry = rt.inflight.get(ordinal);
|
|
1189
|
+
if (entry) {
|
|
1190
|
+
entry.committed = true;
|
|
1191
|
+
}
|
|
1192
|
+
ReplicationLedger.setReceivedToken(link.pull, subMessage.cursor);
|
|
1193
|
+
const drained = this.drainCommittedPull(cursorKey);
|
|
1194
|
+
if (drained > 0) {
|
|
1195
|
+
yield this.ledger.saveLink(link);
|
|
1196
|
+
// Emit after durable save — "advanced" means persisted.
|
|
1197
|
+
if (link.pull.contiguousAppliedToken) {
|
|
1198
|
+
this.emitEvent({
|
|
1199
|
+
type: 'checkpoint:pull-advance',
|
|
1200
|
+
tenantDid: link.tenantDid,
|
|
1201
|
+
remoteEndpoint: link.remoteEndpoint,
|
|
1202
|
+
protocol: link.protocol,
|
|
1203
|
+
position: link.pull.contiguousAppliedToken.position,
|
|
1204
|
+
messageCid: link.pull.contiguousAppliedToken.messageCid,
|
|
1205
|
+
});
|
|
1206
|
+
}
|
|
1207
|
+
}
|
|
1208
|
+
// Overflow: too many in-flight ordinals without draining.
|
|
1209
|
+
if (rt.inflight.size > MAX_PENDING_TOKENS) {
|
|
1210
|
+
console.warn(`SyncEngineLevel: Pull in-flight overflow for ${did} -> ${dwnUrl}, transitioning to repairing`);
|
|
1211
|
+
yield this.transitionToRepairing(cursorKey, link);
|
|
1212
|
+
}
|
|
1213
|
+
}
|
|
1214
|
+
else if (!link) {
|
|
1215
|
+
// Legacy path: no link available, use simple cursor persistence.
|
|
1216
|
+
yield this.setCursor(cursorKey, subMessage.cursor);
|
|
1217
|
+
}
|
|
535
1218
|
}
|
|
536
1219
|
catch (error) {
|
|
537
1220
|
console.error(`SyncEngineLevel: Error processing live-pull event for ${did}`, error);
|
|
1221
|
+
// A failed processRawMessage means local state is incomplete.
|
|
1222
|
+
// Transition to repairing immediately — do NOT advance the checkpoint
|
|
1223
|
+
// past this failure or let later ordinals commit past it. SMT
|
|
1224
|
+
// reconciliation will discover and fill the gap.
|
|
1225
|
+
if (link) {
|
|
1226
|
+
yield this.transitionToRepairing(cursorKey, link);
|
|
1227
|
+
}
|
|
538
1228
|
}
|
|
539
|
-
// Persist cursor for resume on reconnect.
|
|
540
|
-
yield this.setCursor(cursorKey, subMessage.cursor);
|
|
541
1229
|
}
|
|
542
1230
|
});
|
|
543
1231
|
// Construct the subscribe message and send it directly to the specific
|
|
@@ -559,7 +1247,10 @@ export class SyncEngineLevel {
|
|
|
559
1247
|
// Build a resubscribe factory so the WebSocket client can resume with
|
|
560
1248
|
// a fresh cursor-stamped message after reconnection.
|
|
561
1249
|
const resubscribeFactory = (resumeCursor) => __awaiter(this, void 0, void 0, function* () {
|
|
562
|
-
|
|
1250
|
+
var _a;
|
|
1251
|
+
// On reconnect, use the latest durable checkpoint position if available.
|
|
1252
|
+
const effectiveCursor = (_a = resumeCursor !== null && resumeCursor !== void 0 ? resumeCursor : link === null || link === void 0 ? void 0 : link.pull.contiguousAppliedToken) !== null && _a !== void 0 ? _a : cursor;
|
|
1253
|
+
const resumeRequest = Object.assign(Object.assign({}, subscribeRequest), { messageParams: Object.assign(Object.assign({}, subscribeRequest.messageParams), { cursor: effectiveCursor }) });
|
|
563
1254
|
const { message: resumeMsg } = yield this.agent.dwn.processRequest(resumeRequest);
|
|
564
1255
|
if (!resumeMsg) {
|
|
565
1256
|
throw new Error(`SyncEngineLevel: Failed to construct resume MessagesSubscribe for ${dwnUrl}`);
|
|
@@ -579,9 +1270,15 @@ export class SyncEngineLevel {
|
|
|
579
1270
|
resubscribeFactory,
|
|
580
1271
|
},
|
|
581
1272
|
});
|
|
1273
|
+
if (reply.status.code === 410) {
|
|
1274
|
+
// ProgressGap — the cursor is no longer replayable. The link needs repair.
|
|
1275
|
+
const gapError = new Error(`SyncEngineLevel: ProgressGap for ${did} -> ${dwnUrl}: ${reply.status.detail}`);
|
|
1276
|
+
gapError.isProgressGap = true;
|
|
1277
|
+
gapError.gapInfo = reply.error;
|
|
1278
|
+
throw gapError;
|
|
1279
|
+
}
|
|
582
1280
|
if (reply.status.code !== 200 || !reply.subscription) {
|
|
583
|
-
|
|
584
|
-
return;
|
|
1281
|
+
throw new Error(`SyncEngineLevel: MessagesSubscribe failed for ${did} -> ${dwnUrl}: ${reply.status.code} ${reply.status.detail}`);
|
|
585
1282
|
}
|
|
586
1283
|
this._liveSubscriptions.push({
|
|
587
1284
|
did,
|
|
@@ -590,7 +1287,15 @@ export class SyncEngineLevel {
|
|
|
590
1287
|
protocol,
|
|
591
1288
|
close: () => __awaiter(this, void 0, void 0, function* () { yield reply.subscription.close(); }),
|
|
592
1289
|
});
|
|
593
|
-
|
|
1290
|
+
// Set per-link connectivity to online after successful subscription setup.
|
|
1291
|
+
const pullLink = this._activeLinks.get(this.buildCursorKey(did, dwnUrl, protocol));
|
|
1292
|
+
if (pullLink) {
|
|
1293
|
+
const prevPullConnectivity = pullLink.connectivity;
|
|
1294
|
+
pullLink.connectivity = 'online';
|
|
1295
|
+
if (prevPullConnectivity !== 'online') {
|
|
1296
|
+
this.emitEvent({ type: 'link:connectivity-change', tenantDid: did, remoteEndpoint: dwnUrl, protocol, from: prevPullConnectivity, to: 'online' });
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
594
1299
|
});
|
|
595
1300
|
}
|
|
596
1301
|
// ---------------------------------------------------------------------------
|
|
@@ -610,7 +1315,7 @@ export class SyncEngineLevel {
|
|
|
610
1315
|
if (delegateDid) {
|
|
611
1316
|
const grant = yield this._permissionsApi.getPermissionForRequest({
|
|
612
1317
|
connectedDid: did,
|
|
613
|
-
messageType: DwnInterface.
|
|
1318
|
+
messageType: DwnInterface.MessagesSubscribe,
|
|
614
1319
|
delegateDid,
|
|
615
1320
|
protocol,
|
|
616
1321
|
cached: true,
|
|
@@ -622,18 +1327,46 @@ export class SyncEngineLevel {
|
|
|
622
1327
|
if (subMessage.type !== 'event') {
|
|
623
1328
|
return;
|
|
624
1329
|
}
|
|
1330
|
+
// Subset scope filtering for push: only push events that match the
|
|
1331
|
+
// link's scope prefixes. Events outside the scope are not our responsibility.
|
|
1332
|
+
// Skipped events MUST advance the push checkpoint to prevent infinite
|
|
1333
|
+
// replay after repair/reconnect (same reason as the pull side).
|
|
1334
|
+
const pushLink = this._activeLinks.get(this.buildCursorKey(did, dwnUrl, protocol));
|
|
1335
|
+
if (pushLink && !isEventInScope(subMessage.event.message, pushLink.scope)) {
|
|
1336
|
+
// Guard: only mutate durable state when the link is live/initializing.
|
|
1337
|
+
// During repair/degraded_poll, orchestration owns checkpoint progression.
|
|
1338
|
+
if (pushLink.status !== 'live' && pushLink.status !== 'initializing') {
|
|
1339
|
+
return;
|
|
1340
|
+
}
|
|
1341
|
+
// Validate token domain before committing — a stream/epoch mismatch
|
|
1342
|
+
// on the local EventLog should trigger repair, not silently overwrite.
|
|
1343
|
+
if (!ReplicationLedger.validateTokenDomain(pushLink.push, subMessage.cursor)) {
|
|
1344
|
+
yield this.transitionToRepairing(this.buildCursorKey(did, dwnUrl, protocol), pushLink);
|
|
1345
|
+
return;
|
|
1346
|
+
}
|
|
1347
|
+
ReplicationLedger.setReceivedToken(pushLink.push, subMessage.cursor);
|
|
1348
|
+
ReplicationLedger.commitContiguousToken(pushLink.push, subMessage.cursor);
|
|
1349
|
+
yield this.ledger.saveLink(pushLink);
|
|
1350
|
+
return;
|
|
1351
|
+
}
|
|
625
1352
|
// Accumulate the message CID for a debounced push.
|
|
626
1353
|
const targetKey = this.buildCursorKey(did, dwnUrl, protocol);
|
|
627
1354
|
const cid = yield Message.getCid(subMessage.event.message);
|
|
628
1355
|
if (cid === undefined) {
|
|
629
1356
|
return;
|
|
630
1357
|
}
|
|
1358
|
+
// Echo-loop suppression: skip CIDs that were recently pulled from this
|
|
1359
|
+
// specific remote. A message pulled from Provider A is only suppressed
|
|
1360
|
+
// for push to A — it still fans out to Provider B and C.
|
|
1361
|
+
if (this.isRecentlyPulled(cid, dwnUrl)) {
|
|
1362
|
+
return;
|
|
1363
|
+
}
|
|
631
1364
|
let pending = this._pendingPushCids.get(targetKey);
|
|
632
1365
|
if (!pending) {
|
|
633
|
-
pending = { did, dwnUrl, delegateDid, protocol,
|
|
1366
|
+
pending = { did, dwnUrl, delegateDid, protocol, entries: [] };
|
|
634
1367
|
this._pendingPushCids.set(targetKey, pending);
|
|
635
1368
|
}
|
|
636
|
-
pending.
|
|
1369
|
+
pending.entries.push({ cid, localToken: subMessage.cursor });
|
|
637
1370
|
// Debounce the push.
|
|
638
1371
|
if (this._pushDebounceTimer) {
|
|
639
1372
|
clearTimeout(this._pushDebounceTimer);
|
|
@@ -643,18 +1376,20 @@ export class SyncEngineLevel {
|
|
|
643
1376
|
}, PUSH_DEBOUNCE_MS);
|
|
644
1377
|
});
|
|
645
1378
|
// Process the local subscription request.
|
|
1379
|
+
// When a push cursor is provided (e.g., after repair), the local subscription
|
|
1380
|
+
// replays events from that position, closing the race window where local
|
|
1381
|
+
// writes during repair would otherwise be missed by push-on-write.
|
|
646
1382
|
const response = yield this.agent.dwn.processRequest({
|
|
647
1383
|
author: did,
|
|
648
1384
|
target: did,
|
|
649
1385
|
messageType: DwnInterface.MessagesSubscribe,
|
|
650
1386
|
granteeDid: delegateDid,
|
|
651
|
-
messageParams: { filters, permissionGrantId },
|
|
1387
|
+
messageParams: { filters, permissionGrantId, cursor: target.pushCursor },
|
|
652
1388
|
subscriptionHandler: subscriptionHandler,
|
|
653
1389
|
});
|
|
654
1390
|
const reply = response.reply;
|
|
655
1391
|
if (reply.status.code !== 200 || !reply.subscription) {
|
|
656
|
-
|
|
657
|
-
return;
|
|
1392
|
+
throw new Error(`SyncEngineLevel: Local MessagesSubscribe failed for ${did}: ${reply.status.code} ${reply.status.detail}`);
|
|
658
1393
|
}
|
|
659
1394
|
this._localSubscriptions.push({
|
|
660
1395
|
did,
|
|
@@ -671,25 +1406,88 @@ export class SyncEngineLevel {
|
|
|
671
1406
|
flushPendingPushes() {
|
|
672
1407
|
return __awaiter(this, void 0, void 0, function* () {
|
|
673
1408
|
this._pushDebounceTimer = undefined;
|
|
674
|
-
const
|
|
1409
|
+
const batches = [...this._pendingPushCids.entries()];
|
|
675
1410
|
this._pendingPushCids.clear();
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
1411
|
+
// Push to all endpoints in parallel — each target is independent.
|
|
1412
|
+
yield Promise.all(batches.map((_a) => __awaiter(this, [_a], void 0, function* ([targetKey, pending]) {
|
|
1413
|
+
const { did, dwnUrl, delegateDid, protocol, entries: pushEntries } = pending;
|
|
1414
|
+
if (pushEntries.length === 0) {
|
|
1415
|
+
return;
|
|
680
1416
|
}
|
|
1417
|
+
const cids = pushEntries.map(e => e.cid);
|
|
681
1418
|
try {
|
|
682
|
-
yield pushMessages({
|
|
1419
|
+
const result = yield pushMessages({
|
|
683
1420
|
did, dwnUrl, delegateDid, protocol,
|
|
684
1421
|
messageCids: cids,
|
|
685
1422
|
agent: this.agent,
|
|
686
1423
|
permissionsApi: this._permissionsApi,
|
|
687
1424
|
});
|
|
1425
|
+
// Advance the push checkpoint for successfully pushed entries.
|
|
1426
|
+
// Push is sequential (single batch, in-order processing) so we can
|
|
1427
|
+
// commit directly without ordinal tracking — there's no concurrent
|
|
1428
|
+
// completion to reorder.
|
|
1429
|
+
const link = this._activeLinks.get(targetKey);
|
|
1430
|
+
if (link) {
|
|
1431
|
+
const succeededSet = new Set(result.succeeded);
|
|
1432
|
+
// Track highest contiguous success: if a CID fails, we stop advancing.
|
|
1433
|
+
let hitFailure = false;
|
|
1434
|
+
for (const entry of pushEntries) {
|
|
1435
|
+
if (hitFailure) {
|
|
1436
|
+
break;
|
|
1437
|
+
}
|
|
1438
|
+
if (succeededSet.has(entry.cid) && entry.localToken) {
|
|
1439
|
+
if (!ReplicationLedger.validateTokenDomain(link.push, entry.localToken)) {
|
|
1440
|
+
console.warn(`SyncEngineLevel: Push checkpoint domain mismatch for ${did} -> ${dwnUrl}, transitioning to repairing`);
|
|
1441
|
+
yield this.transitionToRepairing(targetKey, link);
|
|
1442
|
+
break;
|
|
1443
|
+
}
|
|
1444
|
+
ReplicationLedger.setReceivedToken(link.push, entry.localToken);
|
|
1445
|
+
ReplicationLedger.commitContiguousToken(link.push, entry.localToken);
|
|
1446
|
+
}
|
|
1447
|
+
else {
|
|
1448
|
+
// This CID failed or had no token — stop advancing.
|
|
1449
|
+
hitFailure = true;
|
|
1450
|
+
}
|
|
1451
|
+
}
|
|
1452
|
+
yield this.ledger.saveLink(link);
|
|
1453
|
+
}
|
|
1454
|
+
// Re-queue failed entries so they are retried on the next debounce
|
|
1455
|
+
// cycle (or picked up by the SMT integrity check).
|
|
1456
|
+
if (result.failed.length > 0) {
|
|
1457
|
+
console.error(`SyncEngineLevel: Push-on-write failed for ${did} -> ${dwnUrl}: ${result.failed.length} of ${cids.length} messages failed`);
|
|
1458
|
+
const failedSet = new Set(result.failed);
|
|
1459
|
+
const failedEntries = pushEntries.filter(e => failedSet.has(e.cid));
|
|
1460
|
+
let requeued = this._pendingPushCids.get(targetKey);
|
|
1461
|
+
if (!requeued) {
|
|
1462
|
+
requeued = { did, dwnUrl, delegateDid, protocol, entries: [] };
|
|
1463
|
+
this._pendingPushCids.set(targetKey, requeued);
|
|
1464
|
+
}
|
|
1465
|
+
requeued.entries.push(...failedEntries);
|
|
1466
|
+
// Schedule a retry after a short delay.
|
|
1467
|
+
if (!this._pushDebounceTimer) {
|
|
1468
|
+
this._pushDebounceTimer = setTimeout(() => {
|
|
1469
|
+
void this.flushPendingPushes();
|
|
1470
|
+
}, PUSH_DEBOUNCE_MS * 4); // Back off: 1 second instead of 250ms.
|
|
1471
|
+
}
|
|
1472
|
+
}
|
|
688
1473
|
}
|
|
689
1474
|
catch (error) {
|
|
1475
|
+
// Truly unexpected error (not per-message failure). Re-queue entire
|
|
1476
|
+
// batch so entries aren't silently dropped from the debounce queue.
|
|
690
1477
|
console.error(`SyncEngineLevel: Push-on-write failed for ${did} -> ${dwnUrl}`, error);
|
|
1478
|
+
let requeued = this._pendingPushCids.get(targetKey);
|
|
1479
|
+
if (!requeued) {
|
|
1480
|
+
requeued = { did, dwnUrl, delegateDid, protocol, entries: [] };
|
|
1481
|
+
this._pendingPushCids.set(targetKey, requeued);
|
|
1482
|
+
}
|
|
1483
|
+
requeued.entries.push(...pushEntries);
|
|
1484
|
+
if (!this._pushDebounceTimer) {
|
|
1485
|
+
this._pushDebounceTimer = setTimeout(() => {
|
|
1486
|
+
void this.flushPendingPushes();
|
|
1487
|
+
}, PUSH_DEBOUNCE_MS * 4);
|
|
1488
|
+
}
|
|
691
1489
|
}
|
|
692
|
-
}
|
|
1490
|
+
})));
|
|
693
1491
|
});
|
|
694
1492
|
}
|
|
695
1493
|
// ---------------------------------------------------------------------------
|
|
@@ -699,11 +1497,31 @@ export class SyncEngineLevel {
|
|
|
699
1497
|
const base = `${did}${CURSOR_SEPARATOR}${dwnUrl}`;
|
|
700
1498
|
return protocol ? `${base}${CURSOR_SEPARATOR}${protocol}` : base;
|
|
701
1499
|
}
|
|
1500
|
+
/**
|
|
1501
|
+
* Retrieves a stored progress token. Handles migration from old string cursors:
|
|
1502
|
+
* if the stored value is a bare string (pre-ProgressToken format), it is treated
|
|
1503
|
+
* as absent — the sync engine will do a full SMT reconciliation on first startup
|
|
1504
|
+
* after upgrade, which is correct and safe.
|
|
1505
|
+
*/
|
|
702
1506
|
getCursor(key) {
|
|
703
1507
|
return __awaiter(this, void 0, void 0, function* () {
|
|
704
1508
|
const cursors = this._db.sublevel('syncCursors');
|
|
705
1509
|
try {
|
|
706
|
-
|
|
1510
|
+
const raw = yield cursors.get(key);
|
|
1511
|
+
try {
|
|
1512
|
+
const parsed = JSON.parse(raw);
|
|
1513
|
+
if (parsed && typeof parsed === 'object' &&
|
|
1514
|
+
typeof parsed.streamId === 'string' &&
|
|
1515
|
+
typeof parsed.epoch === 'string' &&
|
|
1516
|
+
typeof parsed.position === 'string' &&
|
|
1517
|
+
typeof parsed.messageCid === 'string') {
|
|
1518
|
+
return parsed;
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
catch (_a) {
|
|
1522
|
+
// Not valid JSON (old string cursor) — treat as absent.
|
|
1523
|
+
}
|
|
1524
|
+
return undefined;
|
|
707
1525
|
}
|
|
708
1526
|
catch (error) {
|
|
709
1527
|
const e = error;
|
|
@@ -717,7 +1535,7 @@ export class SyncEngineLevel {
|
|
|
717
1535
|
setCursor(key, cursor) {
|
|
718
1536
|
return __awaiter(this, void 0, void 0, function* () {
|
|
719
1537
|
const cursors = this._db.sublevel('syncCursors');
|
|
720
|
-
yield cursors.put(key, cursor);
|
|
1538
|
+
yield cursors.put(key, JSON.stringify(cursor));
|
|
721
1539
|
});
|
|
722
1540
|
}
|
|
723
1541
|
// ---------------------------------------------------------------------------
|
|
@@ -862,80 +1680,6 @@ export class SyncEngineLevel {
|
|
|
862
1680
|
});
|
|
863
1681
|
}
|
|
864
1682
|
// ---------------------------------------------------------------------------
|
|
865
|
-
// Tree Diff — walk the SMT to find divergent leaf sets
|
|
866
|
-
// ---------------------------------------------------------------------------
|
|
867
|
-
/**
|
|
868
|
-
* Walks the local and remote SMTs in parallel, recursing into subtrees whose
|
|
869
|
-
* hashes differ, until reaching `MAX_DIFF_DEPTH` where leaves are enumerated.
|
|
870
|
-
*
|
|
871
|
-
* Returns the sets of messageCids that exist only locally or only remotely.
|
|
872
|
-
*/
|
|
873
|
-
walkTreeDiff(_a) {
|
|
874
|
-
return __awaiter(this, arguments, void 0, function* ({ did, dwnUrl, delegateDid, protocol }) {
|
|
875
|
-
const onlyLocal = [];
|
|
876
|
-
const onlyRemote = [];
|
|
877
|
-
// Hoist permission grant lookup — resolved once and reused for all subtree/leaf requests.
|
|
878
|
-
const permissionGrantId = yield this.getSyncPermissionGrantId(did, delegateDid, protocol);
|
|
879
|
-
// Gate remote HTTP requests through a semaphore so the binary tree walk
|
|
880
|
-
// doesn't produce an exponential burst of concurrent requests. Local
|
|
881
|
-
// DWN requests (in-process) are not gated.
|
|
882
|
-
const remoteSemaphore = new Semaphore(REMOTE_CONCURRENCY);
|
|
883
|
-
const walk = (prefix) => __awaiter(this, void 0, void 0, function* () {
|
|
884
|
-
// Get subtree hashes for this prefix from local and remote.
|
|
885
|
-
// Only the remote request is gated by the semaphore.
|
|
886
|
-
const [localHash, remoteHash] = yield Promise.all([
|
|
887
|
-
this.getLocalSubtreeHash(did, prefix, delegateDid, protocol, permissionGrantId),
|
|
888
|
-
remoteSemaphore.run(() => this.getRemoteSubtreeHash(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId)),
|
|
889
|
-
]);
|
|
890
|
-
// If hashes match, this subtree is identical — skip.
|
|
891
|
-
if (localHash === remoteHash) {
|
|
892
|
-
return;
|
|
893
|
-
}
|
|
894
|
-
// Short-circuit: if one side is the default (empty-subtree) hash, all entries
|
|
895
|
-
// on the other side are unique. Enumerate leaves directly instead of recursing
|
|
896
|
-
// further into the tree — this avoids an exponential walk when one DWN has
|
|
897
|
-
// entries that the other lacks entirely in this subtree.
|
|
898
|
-
const emptyHash = yield this.getDefaultHashHex(prefix.length);
|
|
899
|
-
if (remoteHash === emptyHash && localHash !== emptyHash) {
|
|
900
|
-
const localLeaves = yield this.getLocalLeaves(did, prefix, delegateDid, protocol, permissionGrantId);
|
|
901
|
-
onlyLocal.push(...localLeaves);
|
|
902
|
-
return;
|
|
903
|
-
}
|
|
904
|
-
if (localHash === emptyHash && remoteHash !== emptyHash) {
|
|
905
|
-
const remoteLeaves = yield remoteSemaphore.run(() => this.getRemoteLeaves(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId));
|
|
906
|
-
onlyRemote.push(...remoteLeaves);
|
|
907
|
-
return;
|
|
908
|
-
}
|
|
909
|
-
// If we've reached the maximum diff depth, enumerate leaves.
|
|
910
|
-
if (prefix.length >= MAX_DIFF_DEPTH) {
|
|
911
|
-
const [localLeaves, remoteLeaves] = yield Promise.all([
|
|
912
|
-
this.getLocalLeaves(did, prefix, delegateDid, protocol, permissionGrantId),
|
|
913
|
-
remoteSemaphore.run(() => this.getRemoteLeaves(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId)),
|
|
914
|
-
]);
|
|
915
|
-
const localSet = new Set(localLeaves);
|
|
916
|
-
const remoteSet = new Set(remoteLeaves);
|
|
917
|
-
for (const cid of localLeaves) {
|
|
918
|
-
if (!remoteSet.has(cid)) {
|
|
919
|
-
onlyLocal.push(cid);
|
|
920
|
-
}
|
|
921
|
-
}
|
|
922
|
-
for (const cid of remoteLeaves) {
|
|
923
|
-
if (!localSet.has(cid)) {
|
|
924
|
-
onlyRemote.push(cid);
|
|
925
|
-
}
|
|
926
|
-
}
|
|
927
|
-
return;
|
|
928
|
-
}
|
|
929
|
-
// Recurse into left (0) and right (1) children in parallel.
|
|
930
|
-
yield Promise.all([
|
|
931
|
-
walk(prefix + '0'),
|
|
932
|
-
walk(prefix + '1'),
|
|
933
|
-
]);
|
|
934
|
-
});
|
|
935
|
-
yield walk('');
|
|
936
|
-
return { onlyLocal, onlyRemote };
|
|
937
|
-
});
|
|
938
|
-
}
|
|
939
1683
|
// ---------------------------------------------------------------------------
|
|
940
1684
|
// Batched Diff — single round-trip set reconciliation
|
|
941
1685
|
// ---------------------------------------------------------------------------
|
|
@@ -982,7 +1726,8 @@ export class SyncEngineLevel {
|
|
|
982
1726
|
throw new Error(`SyncEngineLevel: diff failed with ${reply.status.code}: ${reply.status.detail}`);
|
|
983
1727
|
}
|
|
984
1728
|
// Step 3: Enumerate local leaves for prefixes the remote reported as onlyLocal.
|
|
985
|
-
|
|
1729
|
+
// Reuse the same grant ID from step 2 (avoids redundant lookup).
|
|
1730
|
+
const permissionGrantIdForLeaves = permissionGrantId;
|
|
986
1731
|
const onlyLocalCids = [];
|
|
987
1732
|
for (const prefix of (_b = reply.onlyLocal) !== null && _b !== void 0 ? _b : []) {
|
|
988
1733
|
const leaves = yield this.getLocalLeaves(did, prefix, delegateDid, protocol, permissionGrantIdForLeaves);
|
|
@@ -1073,30 +1818,6 @@ export class SyncEngineLevel {
|
|
|
1073
1818
|
return (_a = reply.hash) !== null && _a !== void 0 ? _a : '';
|
|
1074
1819
|
});
|
|
1075
1820
|
}
|
|
1076
|
-
getRemoteSubtreeHash(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId) {
|
|
1077
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
1078
|
-
var _a;
|
|
1079
|
-
const syncMessage = yield this.agent.dwn.processRequest({
|
|
1080
|
-
store: false,
|
|
1081
|
-
author: did,
|
|
1082
|
-
target: did,
|
|
1083
|
-
messageType: DwnInterface.MessagesSync,
|
|
1084
|
-
granteeDid: delegateDid,
|
|
1085
|
-
messageParams: {
|
|
1086
|
-
action: 'subtree',
|
|
1087
|
-
prefix,
|
|
1088
|
-
protocol,
|
|
1089
|
-
permissionGrantId
|
|
1090
|
-
}
|
|
1091
|
-
});
|
|
1092
|
-
const reply = yield this.agent.rpc.sendDwnRequest({
|
|
1093
|
-
dwnUrl,
|
|
1094
|
-
targetDid: did,
|
|
1095
|
-
message: syncMessage.message,
|
|
1096
|
-
});
|
|
1097
|
-
return (_a = reply.hash) !== null && _a !== void 0 ? _a : '';
|
|
1098
|
-
});
|
|
1099
|
-
}
|
|
1100
1821
|
/**
|
|
1101
1822
|
* Get all leaf messageCids under a given prefix from the local DWN.
|
|
1102
1823
|
*
|
|
@@ -1130,30 +1851,6 @@ export class SyncEngineLevel {
|
|
|
1130
1851
|
return (_a = reply.entries) !== null && _a !== void 0 ? _a : [];
|
|
1131
1852
|
});
|
|
1132
1853
|
}
|
|
1133
|
-
getRemoteLeaves(did, dwnUrl, prefix, delegateDid, protocol, permissionGrantId) {
|
|
1134
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
1135
|
-
var _a;
|
|
1136
|
-
const syncMessage = yield this.agent.dwn.processRequest({
|
|
1137
|
-
store: false,
|
|
1138
|
-
author: did,
|
|
1139
|
-
target: did,
|
|
1140
|
-
messageType: DwnInterface.MessagesSync,
|
|
1141
|
-
granteeDid: delegateDid,
|
|
1142
|
-
messageParams: {
|
|
1143
|
-
action: 'leaves',
|
|
1144
|
-
prefix,
|
|
1145
|
-
protocol,
|
|
1146
|
-
permissionGrantId
|
|
1147
|
-
}
|
|
1148
|
-
});
|
|
1149
|
-
const reply = yield this.agent.rpc.sendDwnRequest({
|
|
1150
|
-
dwnUrl,
|
|
1151
|
-
targetDid: did,
|
|
1152
|
-
message: syncMessage.message,
|
|
1153
|
-
});
|
|
1154
|
-
return (_a = reply.entries) !== null && _a !== void 0 ? _a : [];
|
|
1155
|
-
});
|
|
1156
|
-
}
|
|
1157
1854
|
// ---------------------------------------------------------------------------
|
|
1158
1855
|
// Pull / Push — delegates to standalone functions in sync-messages.ts
|
|
1159
1856
|
// ---------------------------------------------------------------------------
|
|
@@ -1174,6 +1871,51 @@ export class SyncEngineLevel {
|
|
|
1174
1871
|
});
|
|
1175
1872
|
});
|
|
1176
1873
|
}
|
|
1874
|
+
// ---------------------------------------------------------------------------
|
|
1875
|
+
// Echo-loop suppression
|
|
1876
|
+
// ---------------------------------------------------------------------------
|
|
1877
|
+
/**
|
|
1878
|
+
* Evicts expired entries from the echo-loop suppression cache.
|
|
1879
|
+
* Also enforces the size cap by evicting oldest entries first.
|
|
1880
|
+
*/
|
|
1881
|
+
evictExpiredEchoEntries() {
|
|
1882
|
+
const now = Date.now();
|
|
1883
|
+
// Evict expired entries.
|
|
1884
|
+
for (const [cid, expiry] of this._recentlyPulledCids) {
|
|
1885
|
+
if (now >= expiry) {
|
|
1886
|
+
this._recentlyPulledCids.delete(cid);
|
|
1887
|
+
}
|
|
1888
|
+
}
|
|
1889
|
+
// Enforce size cap by evicting oldest entries.
|
|
1890
|
+
if (this._recentlyPulledCids.size > SyncEngineLevel.ECHO_SUPPRESS_MAX_ENTRIES) {
|
|
1891
|
+
const excess = this._recentlyPulledCids.size - SyncEngineLevel.ECHO_SUPPRESS_MAX_ENTRIES;
|
|
1892
|
+
let evicted = 0;
|
|
1893
|
+
for (const key of this._recentlyPulledCids.keys()) {
|
|
1894
|
+
if (evicted >= excess) {
|
|
1895
|
+
break;
|
|
1896
|
+
}
|
|
1897
|
+
this._recentlyPulledCids.delete(key);
|
|
1898
|
+
evicted++;
|
|
1899
|
+
}
|
|
1900
|
+
}
|
|
1901
|
+
}
|
|
1902
|
+
/**
|
|
1903
|
+
* Checks whether a CID was recently pulled from a specific remote endpoint
|
|
1904
|
+
* and should not be pushed back to that same endpoint (echo-loop suppression).
|
|
1905
|
+
* Does not suppress pushes to other endpoints — multi-provider fan-out works.
|
|
1906
|
+
*/
|
|
1907
|
+
isRecentlyPulled(cid, dwnUrl) {
|
|
1908
|
+
const key = `${cid}|${dwnUrl}`;
|
|
1909
|
+
const expiry = this._recentlyPulledCids.get(key);
|
|
1910
|
+
if (expiry === undefined) {
|
|
1911
|
+
return false;
|
|
1912
|
+
}
|
|
1913
|
+
if (Date.now() >= expiry) {
|
|
1914
|
+
this._recentlyPulledCids.delete(key);
|
|
1915
|
+
return false;
|
|
1916
|
+
}
|
|
1917
|
+
return true;
|
|
1918
|
+
}
|
|
1177
1919
|
/**
|
|
1178
1920
|
* Reads missing messages from the local DWN and pushes them to the remote DWN
|
|
1179
1921
|
* in dependency order (topological sort).
|
|
@@ -1265,8 +2007,19 @@ export class SyncEngineLevel {
|
|
|
1265
2007
|
});
|
|
1266
2008
|
}
|
|
1267
2009
|
}
|
|
2010
|
+
/** TTL for echo-loop suppression entries (60 seconds). */
|
|
2011
|
+
SyncEngineLevel.ECHO_SUPPRESS_TTL_MS = 60000;
|
|
2012
|
+
/** Maximum entries in the echo-loop suppression cache. */
|
|
2013
|
+
SyncEngineLevel.ECHO_SUPPRESS_MAX_ENTRIES = 10000;
|
|
1268
2014
|
/** Maximum consecutive failures before entering backoff. */
|
|
1269
2015
|
SyncEngineLevel.MAX_CONSECUTIVE_FAILURES = 5;
|
|
1270
2016
|
/** Backoff multiplier for consecutive failures (caps at 4x the configured interval). */
|
|
1271
2017
|
SyncEngineLevel.MAX_BACKOFF_MULTIPLIER = 4;
|
|
2018
|
+
// ---------------------------------------------------------------------------
|
|
2019
|
+
// Per-link repair and degraded-poll orchestration (Phase 2)
|
|
2020
|
+
// ---------------------------------------------------------------------------
|
|
2021
|
+
/** Maximum consecutive repair attempts before falling back to degraded_poll. */
|
|
2022
|
+
SyncEngineLevel.MAX_REPAIR_ATTEMPTS = 3;
|
|
2023
|
+
/** Backoff schedule for repair retries (milliseconds). */
|
|
2024
|
+
SyncEngineLevel.REPAIR_BACKOFF_MS = [1000, 3000, 10000];
|
|
1272
2025
|
//# sourceMappingURL=sync-engine-level.js.map
|