@pylonsync/sync 0.3.228 → 0.3.230
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/idb-warm-load.test.ts +144 -0
- package/src/index.ts +261 -64
- package/src/local-store.ts +57 -5
- package/src/multi-tab-orchestrator.ts +31 -5
- package/src/mutation-queue.ts +32 -3
- package/src/persistence.ts +69 -30
- package/src/round6-codex.test.ts +157 -0
- package/src/scenarios.test.ts +146 -0
- package/src/test-harness/server.ts +36 -0
- package/src/test-harness/transport.ts +16 -0
package/package.json
CHANGED
|
@@ -423,4 +423,148 @@ describe("IDB warm-load hydration", () => {
|
|
|
423
423
|
// Fast path — no warning expected on a trivial load.
|
|
424
424
|
expect(warned).toBe(false);
|
|
425
425
|
});
|
|
426
|
+
|
|
427
|
+
// IDB WRITE HANG (pins persistence.commit). A write tx that ABORTS
|
|
428
|
+
// (quota-exceeded, or any storage error) must let the persist promise
|
|
429
|
+
// SETTLE — not hang. The engine awaits the persist before advancing
|
|
430
|
+
// the cursor in enqueueApply, so a hung write would wedge the whole
|
|
431
|
+
// apply queue and silently kill live sync. Pre-fix saveRow/deleteRow/
|
|
432
|
+
// saveCursor registered only `oncomplete`, so an abort never resolved.
|
|
433
|
+
test("a write tx that aborts resolves (degrades) instead of hanging", async () => {
|
|
434
|
+
const origWarn = console.warn;
|
|
435
|
+
console.warn = () => {};
|
|
436
|
+
try {
|
|
437
|
+
const p = new IndexedDBPersistence("idb-abort-degrade");
|
|
438
|
+
await p.open();
|
|
439
|
+
const db = p.connection!;
|
|
440
|
+
const realTx = db.transaction.bind(db);
|
|
441
|
+
// Abort the NEXT readwrite tx right after handing it back.
|
|
442
|
+
let armed = true;
|
|
443
|
+
(db as unknown as { transaction: typeof db.transaction }).transaction = ((
|
|
444
|
+
...args: Parameters<typeof db.transaction>
|
|
445
|
+
) => {
|
|
446
|
+
const tx = realTx(...args);
|
|
447
|
+
if (armed && String(args[1]) === "readwrite") {
|
|
448
|
+
armed = false;
|
|
449
|
+
queueMicrotask(() => {
|
|
450
|
+
try {
|
|
451
|
+
tx.abort();
|
|
452
|
+
} catch {
|
|
453
|
+
/* already settled */
|
|
454
|
+
}
|
|
455
|
+
});
|
|
456
|
+
}
|
|
457
|
+
return tx;
|
|
458
|
+
}) as typeof db.transaction;
|
|
459
|
+
|
|
460
|
+
// Pre-fix: this promise never settles → the race rejects.
|
|
461
|
+
await Promise.race([
|
|
462
|
+
p.saveRow("Note", "n1", { id: "n1", title: "x" } as Row),
|
|
463
|
+
new Promise((_, reject) =>
|
|
464
|
+
setTimeout(() => reject(new Error("saveRow hung on abort")), 1000),
|
|
465
|
+
),
|
|
466
|
+
]);
|
|
467
|
+
|
|
468
|
+
// A subsequent (un-armed) write still commits — the engine degrades,
|
|
469
|
+
// it isn't permanently broken.
|
|
470
|
+
await p.saveRow("Note", "n2", { id: "n2", title: "y" } as Row);
|
|
471
|
+
const rows = await p.loadAll("Note");
|
|
472
|
+
expect(rows.some((r) => (r as { id?: string }).id === "n2")).toBe(true);
|
|
473
|
+
} finally {
|
|
474
|
+
console.warn = origWarn;
|
|
475
|
+
}
|
|
476
|
+
});
|
|
477
|
+
|
|
478
|
+
// CURSOR-AHEAD-OF-DISK (pins the persistDegraded gate). When a row write
|
|
479
|
+
// ABORTS, the row never reaches disk — so the engine MUST NOT persist a
|
|
480
|
+
// cursor past it, or the next cold start's warm-load skips that row
|
|
481
|
+
// forever (cursor ahead of replica). The in-memory cursor still advances
|
|
482
|
+
// (live session stays correct); only the ON-DISK cursor is held back so
|
|
483
|
+
// a restart re-pulls the gap. This is the regression for the IDB-hang
|
|
484
|
+
// fix that (before this gate) traded a hang for silent data loss.
|
|
485
|
+
test("a row write that aborts holds the on-disk cursor back", async () => {
|
|
486
|
+
const origWarn = console.warn;
|
|
487
|
+
console.warn = () => {};
|
|
488
|
+
try {
|
|
489
|
+
const appName = "idb-cursor-drift";
|
|
490
|
+
const engine = makeEngine(appName);
|
|
491
|
+
await engine.start();
|
|
492
|
+
|
|
493
|
+
const internal = engine as unknown as {
|
|
494
|
+
persistence: IndexedDBPersistence;
|
|
495
|
+
cursor: { last_seq: number };
|
|
496
|
+
persistDegraded: boolean;
|
|
497
|
+
enqueueApply(
|
|
498
|
+
changes: unknown[],
|
|
499
|
+
targetCursor?: { last_seq: number },
|
|
500
|
+
): Promise<void>;
|
|
501
|
+
};
|
|
502
|
+
const persistence = internal.persistence;
|
|
503
|
+
// Cursor on disk after start() (server.serverSeq seed) — capture it
|
|
504
|
+
// so we assert it does NOT advance to 50 below.
|
|
505
|
+
const onDiskBefore = (await persistence.loadCursor())?.last_seq ?? 0;
|
|
506
|
+
|
|
507
|
+
// Abort the next ENTITIES (row) readwrite, leaving the separate
|
|
508
|
+
// CURSOR-store tx alone — mirrors a quota abort on a row write.
|
|
509
|
+
const db = persistence.connection!;
|
|
510
|
+
const realTx = db.transaction.bind(db);
|
|
511
|
+
let armed = true;
|
|
512
|
+
(db as unknown as { transaction: typeof db.transaction }).transaction = ((
|
|
513
|
+
...args: Parameters<typeof db.transaction>
|
|
514
|
+
) => {
|
|
515
|
+
const tx = realTx(...args);
|
|
516
|
+
const stores = args[0];
|
|
517
|
+
const touchesEntities = Array.isArray(stores)
|
|
518
|
+
? stores.includes("entities")
|
|
519
|
+
: stores === "entities";
|
|
520
|
+
const touchesCursors = Array.isArray(stores)
|
|
521
|
+
? stores.includes("cursors")
|
|
522
|
+
: stores === "cursors";
|
|
523
|
+
if (
|
|
524
|
+
armed &&
|
|
525
|
+
String(args[1]) === "readwrite" &&
|
|
526
|
+
touchesEntities &&
|
|
527
|
+
!touchesCursors
|
|
528
|
+
) {
|
|
529
|
+
armed = false;
|
|
530
|
+
queueMicrotask(() => {
|
|
531
|
+
try {
|
|
532
|
+
tx.abort();
|
|
533
|
+
} catch {
|
|
534
|
+
/* already settled */
|
|
535
|
+
}
|
|
536
|
+
});
|
|
537
|
+
}
|
|
538
|
+
return tx;
|
|
539
|
+
}) as typeof db.transaction;
|
|
540
|
+
|
|
541
|
+
// Apply a change with a target cursor far ahead. The row write
|
|
542
|
+
// aborts; the on-disk cursor must stay where it was.
|
|
543
|
+
await internal.enqueueApply(
|
|
544
|
+
[
|
|
545
|
+
{
|
|
546
|
+
seq: 50,
|
|
547
|
+
entity: "Note",
|
|
548
|
+
row_id: "n1",
|
|
549
|
+
kind: "insert",
|
|
550
|
+
data: { id: "n1", title: "x" },
|
|
551
|
+
timestamp: "",
|
|
552
|
+
},
|
|
553
|
+
],
|
|
554
|
+
{ last_seq: 50 },
|
|
555
|
+
);
|
|
556
|
+
|
|
557
|
+
// In-memory cursor advanced (live sync correct); degrade flag latched.
|
|
558
|
+
expect(internal.cursor.last_seq).toBe(50);
|
|
559
|
+
expect(internal.persistDegraded).toBe(true);
|
|
560
|
+
// The on-disk cursor did NOT advance past the un-persisted row.
|
|
561
|
+
const onDiskAfter = (await persistence.loadCursor())?.last_seq ?? 0;
|
|
562
|
+
expect(onDiskAfter).toBe(onDiskBefore);
|
|
563
|
+
expect(onDiskAfter).toBeLessThan(50);
|
|
564
|
+
|
|
565
|
+
engine.stop();
|
|
566
|
+
} finally {
|
|
567
|
+
console.warn = origWarn;
|
|
568
|
+
}
|
|
569
|
+
});
|
|
426
570
|
});
|
package/src/index.ts
CHANGED
|
@@ -233,6 +233,20 @@ export class SyncEngine {
|
|
|
233
233
|
*/
|
|
234
234
|
private _hadCachedReplica = false;
|
|
235
235
|
|
|
236
|
+
/**
|
|
237
|
+
* Sticky flag: a persisted row/cursor write degraded (IDB quota /
|
|
238
|
+
* abort), so the on-disk replica is known to be behind the in-memory
|
|
239
|
+
* cursor. Once set, `enqueueApply` STOPS advancing the persisted
|
|
240
|
+
* cursor — persisting a cursor ahead of the durable rows would make
|
|
241
|
+
* the next cold start skip them forever (cursor-ahead-of-replica). The
|
|
242
|
+
* in-memory replica stays authoritative for the live session; on
|
|
243
|
+
* restart the lagging on-disk cursor simply re-pulls the gap. Resets to
|
|
244
|
+
* false only on `resetReplicaInner` (full wipe + resync, disk is clean
|
|
245
|
+
* again). A storage-pressured tab thus degrades to "re-pull on restart"
|
|
246
|
+
* — like a memory-only client — instead of silently losing rows.
|
|
247
|
+
*/
|
|
248
|
+
private persistDegraded = false;
|
|
249
|
+
|
|
236
250
|
readonly store: LocalStore;
|
|
237
251
|
readonly mutations: MutationQueue;
|
|
238
252
|
|
|
@@ -556,13 +570,16 @@ export class SyncEngine {
|
|
|
556
570
|
this.cursor = cachedCursor;
|
|
557
571
|
}
|
|
558
572
|
|
|
559
|
-
// Auto-save changes to IndexedDB. Returns a Promise
|
|
560
|
-
//
|
|
561
|
-
//
|
|
573
|
+
// Auto-save changes to IndexedDB. Returns a Promise<boolean>
|
|
574
|
+
// (true = durable) so the async apply path (applyChangesAsync)
|
|
575
|
+
// can both await the write before the cursor advances AND hold
|
|
576
|
+
// the persisted cursor back when a write degraded — the fix for
|
|
577
|
+
// "cursor ahead of replica" on crash AND on quota/abort.
|
|
562
578
|
const persistence = this.persistence;
|
|
563
579
|
this.store._persistFn = async (change: ChangeEvent) => {
|
|
564
580
|
const { persistChange } = await import("./persistence");
|
|
565
|
-
if (persistence)
|
|
581
|
+
if (!persistence) return true;
|
|
582
|
+
return persistChange(persistence, change);
|
|
566
583
|
};
|
|
567
584
|
|
|
568
585
|
// Hydrate the mutation queue from disk. Any offline writes
|
|
@@ -663,7 +680,7 @@ export class SyncEngine {
|
|
|
663
680
|
// runs; the apply path's idempotent op_id-keyed merge handles the
|
|
664
681
|
// worst case (one re-applied batch on next cold pull if the tab
|
|
665
682
|
// crashes between this line and the saveCursor task completing).
|
|
666
|
-
if (this.persistence) {
|
|
683
|
+
if (this.persistence && !this.persistDegraded) {
|
|
667
684
|
void this.persistence.saveCursor(this.cursor);
|
|
668
685
|
}
|
|
669
686
|
|
|
@@ -756,8 +773,8 @@ export class SyncEngine {
|
|
|
756
773
|
fromBroadcast: true,
|
|
757
774
|
});
|
|
758
775
|
},
|
|
759
|
-
onResetReceived: () => {
|
|
760
|
-
void this.resetReplicaInner();
|
|
776
|
+
onResetReceived: (wipeMutations: boolean) => {
|
|
777
|
+
void this.resetReplicaInner({ wipeMutations });
|
|
761
778
|
},
|
|
762
779
|
onSessionReceived: (resolved: ResolvedSession) => {
|
|
763
780
|
// Funnel through the shared session chain so concurrent triggers
|
|
@@ -768,7 +785,15 @@ export class SyncEngine {
|
|
|
768
785
|
},
|
|
769
786
|
onMutationsForwarded: (ops: PendingMutation[]) => {
|
|
770
787
|
for (const op of ops) {
|
|
771
|
-
|
|
788
|
+
// Thread the follower's captured `prevRow` so a server
|
|
789
|
+
// rejection of this forwarded update/delete restores the
|
|
790
|
+
// canonical value rather than deleting it. Without it the
|
|
791
|
+
// leader's queue entry has prevRow === undefined, and
|
|
792
|
+
// failPushedMutation's restoreRow(undefined ?? null) would
|
|
793
|
+
// DELETE the leader's still-valid row. The follower's prevRow
|
|
794
|
+
// (its pre-edit value) equals the leader's canonical row, so
|
|
795
|
+
// restoring it is correct on both tabs.
|
|
796
|
+
this.mutations.add(op.change, op.prevRow);
|
|
772
797
|
}
|
|
773
798
|
void this.push();
|
|
774
799
|
},
|
|
@@ -777,8 +802,19 @@ export class SyncEngine {
|
|
|
777
802
|
this.mutations.clear();
|
|
778
803
|
},
|
|
779
804
|
onMutationsFailed: (ops: { opId: string; error: string }[]) => {
|
|
805
|
+
// The leader pushed this follower's forwarded mutation and the
|
|
806
|
+
// server rejected it. Roll back the follower's OWN optimistic
|
|
807
|
+
// ghost (the leader already rolled back its copy) — calling
|
|
808
|
+
// markFailed alone left the ghost row stuck in the very tab the
|
|
809
|
+
// user is looking at. failPushedMutation restores prevRow for
|
|
810
|
+
// update/delete and removes the insert ghost, then marks failed.
|
|
780
811
|
for (const op of ops) {
|
|
781
|
-
this.mutations.
|
|
812
|
+
const m = this.mutations.get(op.opId);
|
|
813
|
+
if (m) {
|
|
814
|
+
this.failPushedMutation(m, op.error);
|
|
815
|
+
} else {
|
|
816
|
+
this.mutations.markFailed(op.opId, op.error);
|
|
817
|
+
}
|
|
782
818
|
}
|
|
783
819
|
},
|
|
784
820
|
onBinaryReceived: (bytes: Uint8Array) => {
|
|
@@ -882,6 +918,28 @@ export class SyncEngine {
|
|
|
882
918
|
});
|
|
883
919
|
}
|
|
884
920
|
},
|
|
921
|
+
onEntityObserve: (entity: string) => {
|
|
922
|
+
// Leader path: a follower's useQuery observed this entity. Add
|
|
923
|
+
// it to our reconcile sweep and fetch it now if we have no local
|
|
924
|
+
// rows — the resulting `reconciled` batch is broadcast to every
|
|
925
|
+
// tab, so the follower's view populates. Same shape as the
|
|
926
|
+
// leader half of observeEntity; the `has` guard dedupes against
|
|
927
|
+
// our own interest.
|
|
928
|
+
if (!this.isMultiTabLeader) return;
|
|
929
|
+
if (this.observedEntities.has(entity)) return;
|
|
930
|
+
this.observedEntities.add(entity);
|
|
931
|
+
if (this.isHydrated() && this.store.list(entity).length === 0) {
|
|
932
|
+
void this.reconcile([entity]);
|
|
933
|
+
}
|
|
934
|
+
},
|
|
935
|
+
onReplayObservedEntities: () => {
|
|
936
|
+
// Follower path: re-declare every observed entity to the new
|
|
937
|
+
// leader so its reconcile sweep covers them after a leader flip.
|
|
938
|
+
if (this.isMultiTabLeader) return;
|
|
939
|
+
for (const entity of this.observedEntities) {
|
|
940
|
+
this.broadcastToTabs({ type: "entity-observe", entity });
|
|
941
|
+
}
|
|
942
|
+
},
|
|
885
943
|
};
|
|
886
944
|
}
|
|
887
945
|
|
|
@@ -1008,7 +1066,11 @@ export class SyncEngine {
|
|
|
1008
1066
|
(c) => typeof c.seq === "number" && c.seq > this.cursor.last_seq,
|
|
1009
1067
|
);
|
|
1010
1068
|
if (filtered.length > 0) {
|
|
1011
|
-
await this.store.applyChangesAsync(filtered);
|
|
1069
|
+
const durable = await this.store.applyChangesAsync(filtered);
|
|
1070
|
+
// A row in this batch didn't reach disk (quota / abort). Latch
|
|
1071
|
+
// the degraded flag so we never persist a cursor ahead of the
|
|
1072
|
+
// durable replica — the next cold start must re-pull this gap.
|
|
1073
|
+
if (!durable) this.persistDegraded = true;
|
|
1012
1074
|
}
|
|
1013
1075
|
// Pick the cursor target. Explicit `targetCursor` (from pull) wins
|
|
1014
1076
|
// — pull's response carries the server's authoritative current_seq
|
|
@@ -1020,8 +1082,12 @@ export class SyncEngine {
|
|
|
1020
1082
|
? { last_seq: filtered[filtered.length - 1].seq }
|
|
1021
1083
|
: null);
|
|
1022
1084
|
if (candidate && candidate.last_seq > this.cursor.last_seq) {
|
|
1085
|
+
// In-memory cursor ALWAYS advances — live sync stays correct.
|
|
1023
1086
|
this.cursor = candidate;
|
|
1024
|
-
|
|
1087
|
+
// The on-disk cursor only advances while persistence is healthy.
|
|
1088
|
+
// Once degraded, freezing it keeps disk self-consistent (cursor
|
|
1089
|
+
// never exceeds the rows actually written) so restart re-pulls.
|
|
1090
|
+
if (this.persistence && !this.persistDegraded) {
|
|
1025
1091
|
await this.persistence.saveCursor(this.cursor);
|
|
1026
1092
|
}
|
|
1027
1093
|
}
|
|
@@ -1126,16 +1192,41 @@ export class SyncEngine {
|
|
|
1126
1192
|
* rehydrated on the next page load — phantom rows that no purge of
|
|
1127
1193
|
* in-memory state could fix.
|
|
1128
1194
|
*/
|
|
1129
|
-
async resetReplica(): Promise<void> {
|
|
1195
|
+
async resetReplica(opts: { wipeMutations?: boolean } = {}): Promise<void> {
|
|
1130
1196
|
// Public callers go through the queue so a reset can't race with
|
|
1131
1197
|
// an in-flight pull / push / reconcile. Internal callers that
|
|
1132
1198
|
// already hold the queue slot use `resetReplicaInner` directly.
|
|
1133
|
-
return this.opQueue.enqueue("reset", () => this.resetReplicaInner());
|
|
1199
|
+
return this.opQueue.enqueue("reset", () => this.resetReplicaInner(opts));
|
|
1134
1200
|
}
|
|
1135
1201
|
|
|
1136
|
-
|
|
1202
|
+
/**
|
|
1203
|
+
* Drop the local replica and pull fresh. `wipeMutations` decides the
|
|
1204
|
+
* fate of the durable offline write queue:
|
|
1205
|
+
* - `false` (default, 410 RESYNC, SAME user): KEEP pending writes —
|
|
1206
|
+
* they survive the snapshot refresh and re-push under the same
|
|
1207
|
+
* session.
|
|
1208
|
+
* - `true` (token/tenant flip, DIFFERENT identity): DROP them — the
|
|
1209
|
+
* queued writes belong to the outgoing identity and must never be
|
|
1210
|
+
* replayed as the incoming one (cross-identity write leak).
|
|
1211
|
+
*/
|
|
1212
|
+
private async resetReplicaInner(
|
|
1213
|
+
opts: { wipeMutations?: boolean } = {},
|
|
1214
|
+
): Promise<void> {
|
|
1215
|
+
const wipeMutations = opts.wipeMutations === true;
|
|
1137
1216
|
this.cursor = { last_seq: 0 };
|
|
1138
1217
|
this.store.clearAll();
|
|
1218
|
+
// Disk is about to be wiped + re-pulled from 0, so any prior
|
|
1219
|
+
// persist degradation is moot — start the durability invariant
|
|
1220
|
+
// fresh. (If the fresh snapshot also fails to persist, enqueueApply
|
|
1221
|
+
// re-latches the flag.)
|
|
1222
|
+
this.persistDegraded = false;
|
|
1223
|
+
if (wipeMutations) {
|
|
1224
|
+
// Identity flip: discard the outgoing identity's pending offline
|
|
1225
|
+
// writes (and persist the empty queue to disk via the mutation
|
|
1226
|
+
// backend). persistence.clear() deliberately leaves MUTATIONS_STORE
|
|
1227
|
+
// alone for the 410 path, so this is the only site that drops them.
|
|
1228
|
+
this.mutations.clearAll();
|
|
1229
|
+
}
|
|
1139
1230
|
// The cache is now empty. The next pull will start from 0 and
|
|
1140
1231
|
// return a full snapshot — that's a true cold start, so the
|
|
1141
1232
|
// onConnected fast-path may skip the post-pull reconcile. Without
|
|
@@ -1154,9 +1245,11 @@ export class SyncEngine {
|
|
|
1154
1245
|
}
|
|
1155
1246
|
// Leader broadcasts the reset so follower replicas wipe their
|
|
1156
1247
|
// own copies in lockstep — otherwise a follower keeps stale
|
|
1157
|
-
// rows under the old identity until its own pull catches up.
|
|
1248
|
+
// rows under the old identity until its own pull catches up. The
|
|
1249
|
+
// `wipeMutations` flag rides along so followers make the same
|
|
1250
|
+
// keep-vs-drop decision for THEIR forwarded offline writes.
|
|
1158
1251
|
if (this.isMultiTabLeader) {
|
|
1159
|
-
this.broadcastToTabs({ type: "reset" });
|
|
1252
|
+
this.broadcastToTabs({ type: "reset", wipeMutations });
|
|
1160
1253
|
}
|
|
1161
1254
|
}
|
|
1162
1255
|
|
|
@@ -1264,8 +1357,9 @@ export class SyncEngine {
|
|
|
1264
1357
|
const { tokenChanged } = this.session.observeToken(this.currentToken());
|
|
1265
1358
|
if (tokenChanged) {
|
|
1266
1359
|
// We're holding the "pull" slot in the op queue — bypass the
|
|
1267
|
-
// queue's reset path to avoid self-deadlock.
|
|
1268
|
-
|
|
1360
|
+
// queue's reset path to avoid self-deadlock. Identity flipped, so
|
|
1361
|
+
// wipe the old identity's pending offline writes.
|
|
1362
|
+
await this.resetReplicaInner({ wipeMutations: true });
|
|
1269
1363
|
// Token flipped → the cached tenant is for the previous user. Pull
|
|
1270
1364
|
// the fresh session in parallel with the cursor catch-up below.
|
|
1271
1365
|
void this.refreshResolvedSession();
|
|
@@ -1301,12 +1395,18 @@ export class SyncEngine {
|
|
|
1301
1395
|
// Continue paginating in the same loop iteration so we don't
|
|
1302
1396
|
// leave a fresh client with a partial replica.
|
|
1303
1397
|
snapshotAfter = resp.snapshot_after ?? undefined;
|
|
1304
|
-
// The change-log tail also paginates via `has_more` —
|
|
1305
|
-
//
|
|
1306
|
-
//
|
|
1307
|
-
//
|
|
1398
|
+
// The change-log tail also paginates via `has_more` — drain it
|
|
1399
|
+
// by recursing into `pullInner` directly. We are INSIDE the
|
|
1400
|
+
// `pull` op-queue slot right now; calling the public `pull()`
|
|
1401
|
+
// would re-enqueue under the same "pull" key, which coalesces
|
|
1402
|
+
// to the promise we're currently running inside (op-queue.ts
|
|
1403
|
+
// deletes the key only after `fn` resolves) and `await` it →
|
|
1404
|
+
// permanent self-deadlock that bricks the entire pull path for
|
|
1405
|
+
// the session. This is the exact hazard the 410 handler avoids;
|
|
1406
|
+
// `pullInner` re-reads `this.cursor.last_seq` (already advanced
|
|
1407
|
+
// by enqueueApply) so the recursion resumes at the right cursor.
|
|
1308
1408
|
if (!snapshotAfter && resp.has_more) {
|
|
1309
|
-
await this.
|
|
1409
|
+
await this.pullInner();
|
|
1310
1410
|
break;
|
|
1311
1411
|
}
|
|
1312
1412
|
}
|
|
@@ -1397,6 +1497,12 @@ export class SyncEngine {
|
|
|
1397
1497
|
* that doesn't throw a 410. */
|
|
1398
1498
|
private consecutive_410s = 0;
|
|
1399
1499
|
|
|
1500
|
+
/** Consecutive TRANSIENT push failures (offline / 5xx / 429 / 401)
|
|
1501
|
+
* since the last server response. Drives the exponential backoff on
|
|
1502
|
+
* the retry of a transient-failed push so an offline tab doesn't
|
|
1503
|
+
* hot-loop. Reset to 0 the moment the server returns any response. */
|
|
1504
|
+
private pushFailureCount = 0;
|
|
1505
|
+
|
|
1400
1506
|
/** Set by pullInner whenever the just-completed pull started with
|
|
1401
1507
|
* `cursor.last_seq === 0` (cold load OR post-reset). The WS
|
|
1402
1508
|
* onConnected hook reads this to skip the reconcile() that would
|
|
@@ -1480,9 +1586,17 @@ export class SyncEngine {
|
|
|
1480
1586
|
observeEntity(entity: string): void {
|
|
1481
1587
|
if (this.observedEntities.has(entity)) return;
|
|
1482
1588
|
this.observedEntities.add(entity);
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1589
|
+
if (!this.isMultiTabLeader) {
|
|
1590
|
+
// Follower: only the leader talks to the network. Forward the
|
|
1591
|
+
// interest so the LEADER adds this entity to its reconcile sweep
|
|
1592
|
+
// and fetches any server row we never cached — then converge via
|
|
1593
|
+
// the `reconciled` broadcast. Without the forward, a follower's
|
|
1594
|
+
// useQuery on a never-cached entity renders empty forever (the
|
|
1595
|
+
// leader never sweeps an entity it has no local rows for and was
|
|
1596
|
+
// never told a peer cares about).
|
|
1597
|
+
this.broadcastToTabs({ type: "entity-observe", entity });
|
|
1598
|
+
return;
|
|
1599
|
+
}
|
|
1486
1600
|
if (this.isHydrated() && this.store.list(entity).length === 0) {
|
|
1487
1601
|
// Scoped reconcile bypasses the no-arg debounce and reuses the
|
|
1488
1602
|
// session-flip / cursor-drift guards in reconcileInner.
|
|
@@ -1777,8 +1891,9 @@ export class SyncEngine {
|
|
|
1777
1891
|
// transitions but NOT the apply queue — without queuing
|
|
1778
1892
|
// the reset, a concurrent applyChangesAsync could write
|
|
1779
1893
|
// rows AFTER we clear the store, leaving stale data under
|
|
1780
|
-
// the new identity.
|
|
1781
|
-
|
|
1894
|
+
// the new identity. Identity flipped → wipe the outgoing
|
|
1895
|
+
// identity's pending offline writes too.
|
|
1896
|
+
await this.resetReplica({ wipeMutations: true });
|
|
1782
1897
|
}
|
|
1783
1898
|
if (this.isMultiTabLeader) {
|
|
1784
1899
|
// Only the leader pulls — followers receive subsequent
|
|
@@ -2007,6 +2122,10 @@ export class SyncEngine {
|
|
|
2007
2122
|
changes: pending.map((m) => m.change),
|
|
2008
2123
|
client_id: this.clientId,
|
|
2009
2124
|
});
|
|
2125
|
+
// The request reached the server and returned a response — clear
|
|
2126
|
+
// the transient-failure backoff counter (success or per-op
|
|
2127
|
+
// rejections both mean "we're online and the server answered").
|
|
2128
|
+
this.pushFailureCount = 0;
|
|
2010
2129
|
|
|
2011
2130
|
// Per-op `results` mapping: match by op_id when present, fall
|
|
2012
2131
|
// back to positional. Invariant: a partial-failure batch lands
|
|
@@ -2120,33 +2239,55 @@ export class SyncEngine {
|
|
|
2120
2239
|
}, 250);
|
|
2121
2240
|
}
|
|
2122
2241
|
} catch (err) {
|
|
2123
|
-
//
|
|
2124
|
-
//
|
|
2125
|
-
//
|
|
2126
|
-
//
|
|
2127
|
-
//
|
|
2128
|
-
//
|
|
2242
|
+
// Whole-request failure. CRITICAL distinction:
|
|
2243
|
+
//
|
|
2244
|
+
// - TRANSIENT (offline / network drop / 5xx / 429 / 401 / 408):
|
|
2245
|
+
// the server never durably rejected the write. We MUST keep
|
|
2246
|
+
// the mutations `pending` and the optimistic ghost intact, and
|
|
2247
|
+
// retry with backoff. Marking them failed + rolling back here
|
|
2248
|
+
// is what broke offline support — an offline insert vanished
|
|
2249
|
+
// from the UI and was never re-sent (it became `failed`, and
|
|
2250
|
+
// pushInner only ships `pending`). A network `fetch` throw has
|
|
2251
|
+
// NO `.status`, so it lands here as transient. op_id makes the
|
|
2252
|
+
// eventual retry idempotent even if the server HAD committed.
|
|
2129
2253
|
//
|
|
2130
|
-
//
|
|
2131
|
-
//
|
|
2132
|
-
//
|
|
2133
|
-
// the server if the failure was a transient transport error
|
|
2134
|
-
// — the next push() will re-include the user's intent.
|
|
2254
|
+
// - PERMANENT (400/403/404/409/422): a client error that won't
|
|
2255
|
+
// change on retry (malformed batch, forbidden, gone). Fail +
|
|
2256
|
+
// roll back the optimistic ghost + surface mutations-failed.
|
|
2135
2257
|
const msg = err instanceof Error ? err.message : String(err);
|
|
2136
|
-
const
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
const
|
|
2140
|
-
|
|
2141
|
-
|
|
2258
|
+
const status = (err as { status?: number })?.status;
|
|
2259
|
+
if (isPermanentPushError(status)) {
|
|
2260
|
+
const failedOps: { opId: string; error: string }[] = [];
|
|
2261
|
+
for (const m of pending) {
|
|
2262
|
+
this.failPushedMutation(m, msg);
|
|
2263
|
+
const opId = m.change.op_id;
|
|
2264
|
+
if (typeof opId === "string") {
|
|
2265
|
+
failedOps.push({ opId, error: msg });
|
|
2266
|
+
}
|
|
2142
2267
|
}
|
|
2268
|
+
if (failedOps.length > 0) {
|
|
2269
|
+
this.broadcastToTabs({ type: "mutations-failed", ops: failedOps });
|
|
2270
|
+
}
|
|
2271
|
+
this.mutations.clear();
|
|
2272
|
+
// eslint-disable-next-line no-console
|
|
2273
|
+
console.warn(`[sync] /api/sync/push rejected (status ${status}):`, msg);
|
|
2274
|
+
} else {
|
|
2275
|
+
// Transient: leave the queue + ghosts alone, retry with bounded
|
|
2276
|
+
// exponential backoff. Resets on the next response (success or
|
|
2277
|
+
// per-op rejection). A 429 also pushes the WS reconnect out so a
|
|
2278
|
+
// rate-limited push doesn't drive a tight loop.
|
|
2279
|
+
if (status === 429) this.transport?.bumpReconnect(3);
|
|
2280
|
+
const attempt = this.pushFailureCount;
|
|
2281
|
+
this.pushFailureCount += 1;
|
|
2282
|
+
const delayMs = Math.min(30_000, 1000 * 2 ** Math.min(attempt, 5));
|
|
2283
|
+
// eslint-disable-next-line no-console
|
|
2284
|
+
console.warn(
|
|
2285
|
+
`[sync] /api/sync/push transient failure (status ${status ?? "offline"}); keeping ${pending.length} mutation(s) pending, retrying in ${delayMs}ms`,
|
|
2286
|
+
);
|
|
2287
|
+
setTimeout(() => {
|
|
2288
|
+
void this.push();
|
|
2289
|
+
}, delayMs);
|
|
2143
2290
|
}
|
|
2144
|
-
if (failedOps.length > 0) {
|
|
2145
|
-
this.broadcastToTabs({ type: "mutations-failed", ops: failedOps });
|
|
2146
|
-
}
|
|
2147
|
-
this.mutations.clear();
|
|
2148
|
-
// eslint-disable-next-line no-console
|
|
2149
|
-
console.warn("[sync] /api/sync/push failed:", msg);
|
|
2150
2291
|
}
|
|
2151
2292
|
}
|
|
2152
2293
|
|
|
@@ -2169,8 +2310,23 @@ export class SyncEngine {
|
|
|
2169
2310
|
* channel) so insert-only rollback is the right shape to ship now.
|
|
2170
2311
|
*/
|
|
2171
2312
|
private failPushedMutation(m: PendingMutation, error: string): void {
|
|
2172
|
-
|
|
2173
|
-
|
|
2313
|
+
const { entity, row_id, kind } = m.change;
|
|
2314
|
+
if (kind === "insert") {
|
|
2315
|
+
// No tombstone — a future legitimate insert of this id must work.
|
|
2316
|
+
this.store.rollbackOptimisticInsert(entity, row_id);
|
|
2317
|
+
} else if (kind === "update" || kind === "delete") {
|
|
2318
|
+
// Restore the captured pre-mutation row (update: prior field
|
|
2319
|
+
// values; delete: bring it back AND clear the optimistic tombstone
|
|
2320
|
+
// fence). `prevRow === null` means the row didn't exist pre-mutation
|
|
2321
|
+
// → remove + un-fence. `prevRow === undefined` means THIS engine
|
|
2322
|
+
// never captured a snapshot — i.e. the optimistic change wasn't
|
|
2323
|
+
// applied to this store (a forwarded op whose prevRow didn't
|
|
2324
|
+
// thread). Touching the store then would delete a canonical row we
|
|
2325
|
+
// still hold, so leave it untouched and let pull/reconcile
|
|
2326
|
+
// reconverge. The `!== undefined` guard distinguishes the two.
|
|
2327
|
+
if (m.prevRow !== undefined) {
|
|
2328
|
+
this.store.restoreRow(entity, row_id, m.prevRow);
|
|
2329
|
+
}
|
|
2174
2330
|
}
|
|
2175
2331
|
this.mutations.markFailed(m.id, error);
|
|
2176
2332
|
}
|
|
@@ -2198,24 +2354,39 @@ export class SyncEngine {
|
|
|
2198
2354
|
|
|
2199
2355
|
/** Update a row with optimistic local update. */
|
|
2200
2356
|
async update(entity: string, id: string, data: Partial<Row>): Promise<void> {
|
|
2357
|
+
// Snapshot the pre-update row BEFORE applying the optimistic merge so
|
|
2358
|
+
// a rejected push can restore the exact prior value (see
|
|
2359
|
+
// failPushedMutation). Clone — the live row is mutated in place.
|
|
2360
|
+
const before = this.store.get(entity, id);
|
|
2361
|
+
const prev = before ? { ...before } : null;
|
|
2201
2362
|
this.store.optimisticUpdate(entity, id, data);
|
|
2202
|
-
this.mutations.add(
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
|
|
2363
|
+
this.mutations.add(
|
|
2364
|
+
{
|
|
2365
|
+
entity,
|
|
2366
|
+
row_id: id,
|
|
2367
|
+
kind: "update",
|
|
2368
|
+
data: data as Row,
|
|
2369
|
+
},
|
|
2370
|
+
prev,
|
|
2371
|
+
);
|
|
2208
2372
|
await this.push();
|
|
2209
2373
|
}
|
|
2210
2374
|
|
|
2211
2375
|
/** Delete a row with optimistic local update. */
|
|
2212
2376
|
async delete(entity: string, id: string): Promise<void> {
|
|
2377
|
+
// Snapshot the row before removing it so a rejected delete can bring
|
|
2378
|
+
// it back (and clear the optimistic tombstone).
|
|
2379
|
+
const before = this.store.get(entity, id);
|
|
2380
|
+
const prev = before ? { ...before } : null;
|
|
2213
2381
|
this.store.optimisticDelete(entity, id);
|
|
2214
|
-
this.mutations.add(
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
2218
|
-
|
|
2382
|
+
this.mutations.add(
|
|
2383
|
+
{
|
|
2384
|
+
entity,
|
|
2385
|
+
row_id: id,
|
|
2386
|
+
kind: "delete",
|
|
2387
|
+
},
|
|
2388
|
+
prev,
|
|
2389
|
+
);
|
|
2219
2390
|
await this.push();
|
|
2220
2391
|
}
|
|
2221
2392
|
|
|
@@ -2915,6 +3086,32 @@ function rowsDiffer(a: Row, b: Row): boolean {
|
|
|
2915
3086
|
return stableStringify(a) !== stableStringify(b);
|
|
2916
3087
|
}
|
|
2917
3088
|
|
|
3089
|
+
/**
|
|
3090
|
+
* Is a whole-request push failure PERMANENT (the write was durably
|
|
3091
|
+
* rejected and won't succeed on retry) vs TRANSIENT (offline / server
|
|
3092
|
+
* hiccup / rate limit — retry will eventually land)?
|
|
3093
|
+
*
|
|
3094
|
+
* - `undefined` status = a `fetch` network throw (offline, DNS, CORS,
|
|
3095
|
+
* connection reset) → transient.
|
|
3096
|
+
* - 400/403/404/409/422 = client errors that are stable across retries
|
|
3097
|
+
* (malformed batch, forbidden, gone, conflict, unprocessable) →
|
|
3098
|
+
* permanent.
|
|
3099
|
+
* - everything else (5xx, 429 rate-limit, 408 timeout, 401 needs
|
|
3100
|
+
* re-auth, 502/503/504) → transient: keep the mutation queued and
|
|
3101
|
+
* retry. Per-op policy rejections do NOT come through here — they
|
|
3102
|
+
* arrive as a 200 with per-op `results`, handled on the success path.
|
|
3103
|
+
*/
|
|
3104
|
+
function isPermanentPushError(status?: number): boolean {
|
|
3105
|
+
if (status === undefined) return false;
|
|
3106
|
+
return (
|
|
3107
|
+
status === 400 ||
|
|
3108
|
+
status === 403 ||
|
|
3109
|
+
status === 404 ||
|
|
3110
|
+
status === 409 ||
|
|
3111
|
+
status === 422
|
|
3112
|
+
);
|
|
3113
|
+
}
|
|
3114
|
+
|
|
2918
3115
|
function stableStringify(value: unknown): string {
|
|
2919
3116
|
if (value === null || typeof value !== "object") return JSON.stringify(value);
|
|
2920
3117
|
if (Array.isArray(value)) {
|