@pylonsync/sync 0.3.227 → 0.3.229
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/idb-warm-load.test.ts +144 -0
- package/src/index.ts +307 -63
- package/src/local-store.ts +57 -5
- package/src/multi-tab-orchestrator.ts +31 -5
- package/src/mutation-queue.ts +32 -3
- package/src/persistence.ts +69 -30
- package/src/round6-codex.test.ts +157 -0
- package/src/scenarios.test.ts +184 -0
- package/src/test-harness/server.ts +36 -0
- package/src/test-harness/transport.ts +16 -0
package/src/index.ts
CHANGED
|
@@ -233,6 +233,20 @@ export class SyncEngine {
|
|
|
233
233
|
*/
|
|
234
234
|
private _hadCachedReplica = false;
|
|
235
235
|
|
|
236
|
+
/**
|
|
237
|
+
* Sticky flag: a persisted row/cursor write degraded (IDB quota /
|
|
238
|
+
* abort), so the on-disk replica is known to be behind the in-memory
|
|
239
|
+
* cursor. Once set, `enqueueApply` STOPS advancing the persisted
|
|
240
|
+
* cursor — persisting a cursor ahead of the durable rows would make
|
|
241
|
+
* the next cold start skip them forever (cursor-ahead-of-replica). The
|
|
242
|
+
* in-memory replica stays authoritative for the live session; on
|
|
243
|
+
* restart the lagging on-disk cursor simply re-pulls the gap. Resets to
|
|
244
|
+
* false only on `resetReplicaInner` (full wipe + resync, disk is clean
|
|
245
|
+
* again). A storage-pressured tab thus degrades to "re-pull on restart"
|
|
246
|
+
* — like a memory-only client — instead of silently losing rows.
|
|
247
|
+
*/
|
|
248
|
+
private persistDegraded = false;
|
|
249
|
+
|
|
236
250
|
readonly store: LocalStore;
|
|
237
251
|
readonly mutations: MutationQueue;
|
|
238
252
|
|
|
@@ -556,13 +570,16 @@ export class SyncEngine {
|
|
|
556
570
|
this.cursor = cachedCursor;
|
|
557
571
|
}
|
|
558
572
|
|
|
559
|
-
// Auto-save changes to IndexedDB. Returns a Promise
|
|
560
|
-
//
|
|
561
|
-
//
|
|
573
|
+
// Auto-save changes to IndexedDB. Returns a Promise<boolean>
|
|
574
|
+
// (true = durable) so the async apply path (applyChangesAsync)
|
|
575
|
+
// can both await the write before the cursor advances AND hold
|
|
576
|
+
// the persisted cursor back when a write degraded — the fix for
|
|
577
|
+
// "cursor ahead of replica" on crash AND on quota/abort.
|
|
562
578
|
const persistence = this.persistence;
|
|
563
579
|
this.store._persistFn = async (change: ChangeEvent) => {
|
|
564
580
|
const { persistChange } = await import("./persistence");
|
|
565
|
-
if (persistence)
|
|
581
|
+
if (!persistence) return true;
|
|
582
|
+
return persistChange(persistence, change);
|
|
566
583
|
};
|
|
567
584
|
|
|
568
585
|
// Hydrate the mutation queue from disk. Any offline writes
|
|
@@ -663,7 +680,7 @@ export class SyncEngine {
|
|
|
663
680
|
// runs; the apply path's idempotent op_id-keyed merge handles the
|
|
664
681
|
// worst case (one re-applied batch on next cold pull if the tab
|
|
665
682
|
// crashes between this line and the saveCursor task completing).
|
|
666
|
-
if (this.persistence) {
|
|
683
|
+
if (this.persistence && !this.persistDegraded) {
|
|
667
684
|
void this.persistence.saveCursor(this.cursor);
|
|
668
685
|
}
|
|
669
686
|
|
|
@@ -756,8 +773,8 @@ export class SyncEngine {
|
|
|
756
773
|
fromBroadcast: true,
|
|
757
774
|
});
|
|
758
775
|
},
|
|
759
|
-
onResetReceived: () => {
|
|
760
|
-
void this.resetReplicaInner();
|
|
776
|
+
onResetReceived: (wipeMutations: boolean) => {
|
|
777
|
+
void this.resetReplicaInner({ wipeMutations });
|
|
761
778
|
},
|
|
762
779
|
onSessionReceived: (resolved: ResolvedSession) => {
|
|
763
780
|
// Funnel through the shared session chain so concurrent triggers
|
|
@@ -768,7 +785,15 @@ export class SyncEngine {
|
|
|
768
785
|
},
|
|
769
786
|
onMutationsForwarded: (ops: PendingMutation[]) => {
|
|
770
787
|
for (const op of ops) {
|
|
771
|
-
|
|
788
|
+
// Thread the follower's captured `prevRow` so a server
|
|
789
|
+
// rejection of this forwarded update/delete restores the
|
|
790
|
+
// canonical value rather than deleting it. Without it the
|
|
791
|
+
// leader's queue entry has prevRow === undefined, and
|
|
792
|
+
// failPushedMutation's restoreRow(undefined ?? null) would
|
|
793
|
+
// DELETE the leader's still-valid row. The follower's prevRow
|
|
794
|
+
// (its pre-edit value) equals the leader's canonical row, so
|
|
795
|
+
// restoring it is correct on both tabs.
|
|
796
|
+
this.mutations.add(op.change, op.prevRow);
|
|
772
797
|
}
|
|
773
798
|
void this.push();
|
|
774
799
|
},
|
|
@@ -777,8 +802,19 @@ export class SyncEngine {
|
|
|
777
802
|
this.mutations.clear();
|
|
778
803
|
},
|
|
779
804
|
onMutationsFailed: (ops: { opId: string; error: string }[]) => {
|
|
805
|
+
// The leader pushed this follower's forwarded mutation and the
|
|
806
|
+
// server rejected it. Roll back the follower's OWN optimistic
|
|
807
|
+
// ghost (the leader already rolled back its copy) — calling
|
|
808
|
+
// markFailed alone left the ghost row stuck in the very tab the
|
|
809
|
+
// user is looking at. failPushedMutation restores prevRow for
|
|
810
|
+
// update/delete and removes the insert ghost, then marks failed.
|
|
780
811
|
for (const op of ops) {
|
|
781
|
-
this.mutations.
|
|
812
|
+
const m = this.mutations.get(op.opId);
|
|
813
|
+
if (m) {
|
|
814
|
+
this.failPushedMutation(m, op.error);
|
|
815
|
+
} else {
|
|
816
|
+
this.mutations.markFailed(op.opId, op.error);
|
|
817
|
+
}
|
|
782
818
|
}
|
|
783
819
|
},
|
|
784
820
|
onBinaryReceived: (bytes: Uint8Array) => {
|
|
@@ -882,6 +918,28 @@ export class SyncEngine {
|
|
|
882
918
|
});
|
|
883
919
|
}
|
|
884
920
|
},
|
|
921
|
+
onEntityObserve: (entity: string) => {
|
|
922
|
+
// Leader path: a follower's useQuery observed this entity. Add
|
|
923
|
+
// it to our reconcile sweep and fetch it now if we have no local
|
|
924
|
+
// rows — the resulting `reconciled` batch is broadcast to every
|
|
925
|
+
// tab, so the follower's view populates. Same shape as the
|
|
926
|
+
// leader half of observeEntity; the `has` guard dedupes against
|
|
927
|
+
// our own interest.
|
|
928
|
+
if (!this.isMultiTabLeader) return;
|
|
929
|
+
if (this.observedEntities.has(entity)) return;
|
|
930
|
+
this.observedEntities.add(entity);
|
|
931
|
+
if (this.isHydrated() && this.store.list(entity).length === 0) {
|
|
932
|
+
void this.reconcile([entity]);
|
|
933
|
+
}
|
|
934
|
+
},
|
|
935
|
+
onReplayObservedEntities: () => {
|
|
936
|
+
// Follower path: re-declare every observed entity to the new
|
|
937
|
+
// leader so its reconcile sweep covers them after a leader flip.
|
|
938
|
+
if (this.isMultiTabLeader) return;
|
|
939
|
+
for (const entity of this.observedEntities) {
|
|
940
|
+
this.broadcastToTabs({ type: "entity-observe", entity });
|
|
941
|
+
}
|
|
942
|
+
},
|
|
885
943
|
};
|
|
886
944
|
}
|
|
887
945
|
|
|
@@ -1008,7 +1066,11 @@ export class SyncEngine {
|
|
|
1008
1066
|
(c) => typeof c.seq === "number" && c.seq > this.cursor.last_seq,
|
|
1009
1067
|
);
|
|
1010
1068
|
if (filtered.length > 0) {
|
|
1011
|
-
await this.store.applyChangesAsync(filtered);
|
|
1069
|
+
const durable = await this.store.applyChangesAsync(filtered);
|
|
1070
|
+
// A row in this batch didn't reach disk (quota / abort). Latch
|
|
1071
|
+
// the degraded flag so we never persist a cursor ahead of the
|
|
1072
|
+
// durable replica — the next cold start must re-pull this gap.
|
|
1073
|
+
if (!durable) this.persistDegraded = true;
|
|
1012
1074
|
}
|
|
1013
1075
|
// Pick the cursor target. Explicit `targetCursor` (from pull) wins
|
|
1014
1076
|
// — pull's response carries the server's authoritative current_seq
|
|
@@ -1020,8 +1082,12 @@ export class SyncEngine {
|
|
|
1020
1082
|
? { last_seq: filtered[filtered.length - 1].seq }
|
|
1021
1083
|
: null);
|
|
1022
1084
|
if (candidate && candidate.last_seq > this.cursor.last_seq) {
|
|
1085
|
+
// In-memory cursor ALWAYS advances — live sync stays correct.
|
|
1023
1086
|
this.cursor = candidate;
|
|
1024
|
-
|
|
1087
|
+
// The on-disk cursor only advances while persistence is healthy.
|
|
1088
|
+
// Once degraded, freezing it keeps disk self-consistent (cursor
|
|
1089
|
+
// never exceeds the rows actually written) so restart re-pulls.
|
|
1090
|
+
if (this.persistence && !this.persistDegraded) {
|
|
1025
1091
|
await this.persistence.saveCursor(this.cursor);
|
|
1026
1092
|
}
|
|
1027
1093
|
}
|
|
@@ -1126,16 +1192,41 @@ export class SyncEngine {
|
|
|
1126
1192
|
* rehydrated on the next page load — phantom rows that no purge of
|
|
1127
1193
|
* in-memory state could fix.
|
|
1128
1194
|
*/
|
|
1129
|
-
async resetReplica(): Promise<void> {
|
|
1195
|
+
async resetReplica(opts: { wipeMutations?: boolean } = {}): Promise<void> {
|
|
1130
1196
|
// Public callers go through the queue so a reset can't race with
|
|
1131
1197
|
// an in-flight pull / push / reconcile. Internal callers that
|
|
1132
1198
|
// already hold the queue slot use `resetReplicaInner` directly.
|
|
1133
|
-
return this.opQueue.enqueue("reset", () => this.resetReplicaInner());
|
|
1199
|
+
return this.opQueue.enqueue("reset", () => this.resetReplicaInner(opts));
|
|
1134
1200
|
}
|
|
1135
1201
|
|
|
1136
|
-
|
|
1202
|
+
/**
|
|
1203
|
+
* Drop the local replica and pull fresh. `wipeMutations` decides the
|
|
1204
|
+
* fate of the durable offline write queue:
|
|
1205
|
+
* - `false` (default, 410 RESYNC, SAME user): KEEP pending writes —
|
|
1206
|
+
* they survive the snapshot refresh and re-push under the same
|
|
1207
|
+
* session.
|
|
1208
|
+
* - `true` (token/tenant flip, DIFFERENT identity): DROP them — the
|
|
1209
|
+
* queued writes belong to the outgoing identity and must never be
|
|
1210
|
+
* replayed as the incoming one (cross-identity write leak).
|
|
1211
|
+
*/
|
|
1212
|
+
private async resetReplicaInner(
|
|
1213
|
+
opts: { wipeMutations?: boolean } = {},
|
|
1214
|
+
): Promise<void> {
|
|
1215
|
+
const wipeMutations = opts.wipeMutations === true;
|
|
1137
1216
|
this.cursor = { last_seq: 0 };
|
|
1138
1217
|
this.store.clearAll();
|
|
1218
|
+
// Disk is about to be wiped + re-pulled from 0, so any prior
|
|
1219
|
+
// persist degradation is moot — start the durability invariant
|
|
1220
|
+
// fresh. (If the fresh snapshot also fails to persist, enqueueApply
|
|
1221
|
+
// re-latches the flag.)
|
|
1222
|
+
this.persistDegraded = false;
|
|
1223
|
+
if (wipeMutations) {
|
|
1224
|
+
// Identity flip: discard the outgoing identity's pending offline
|
|
1225
|
+
// writes (and persist the empty queue to disk via the mutation
|
|
1226
|
+
// backend). persistence.clear() deliberately leaves MUTATIONS_STORE
|
|
1227
|
+
// alone for the 410 path, so this is the only site that drops them.
|
|
1228
|
+
this.mutations.clearAll();
|
|
1229
|
+
}
|
|
1139
1230
|
// The cache is now empty. The next pull will start from 0 and
|
|
1140
1231
|
// return a full snapshot — that's a true cold start, so the
|
|
1141
1232
|
// onConnected fast-path may skip the post-pull reconcile. Without
|
|
@@ -1154,9 +1245,11 @@ export class SyncEngine {
|
|
|
1154
1245
|
}
|
|
1155
1246
|
// Leader broadcasts the reset so follower replicas wipe their
|
|
1156
1247
|
// own copies in lockstep — otherwise a follower keeps stale
|
|
1157
|
-
// rows under the old identity until its own pull catches up.
|
|
1248
|
+
// rows under the old identity until its own pull catches up. The
|
|
1249
|
+
// `wipeMutations` flag rides along so followers make the same
|
|
1250
|
+
// keep-vs-drop decision for THEIR forwarded offline writes.
|
|
1158
1251
|
if (this.isMultiTabLeader) {
|
|
1159
|
-
this.broadcastToTabs({ type: "reset" });
|
|
1252
|
+
this.broadcastToTabs({ type: "reset", wipeMutations });
|
|
1160
1253
|
}
|
|
1161
1254
|
}
|
|
1162
1255
|
|
|
@@ -1264,8 +1357,9 @@ export class SyncEngine {
|
|
|
1264
1357
|
const { tokenChanged } = this.session.observeToken(this.currentToken());
|
|
1265
1358
|
if (tokenChanged) {
|
|
1266
1359
|
// We're holding the "pull" slot in the op queue — bypass the
|
|
1267
|
-
// queue's reset path to avoid self-deadlock.
|
|
1268
|
-
|
|
1360
|
+
// queue's reset path to avoid self-deadlock. Identity flipped, so
|
|
1361
|
+
// wipe the old identity's pending offline writes.
|
|
1362
|
+
await this.resetReplicaInner({ wipeMutations: true });
|
|
1269
1363
|
// Token flipped → the cached tenant is for the previous user. Pull
|
|
1270
1364
|
// the fresh session in parallel with the cursor catch-up below.
|
|
1271
1365
|
void this.refreshResolvedSession();
|
|
@@ -1301,12 +1395,18 @@ export class SyncEngine {
|
|
|
1301
1395
|
// Continue paginating in the same loop iteration so we don't
|
|
1302
1396
|
// leave a fresh client with a partial replica.
|
|
1303
1397
|
snapshotAfter = resp.snapshot_after ?? undefined;
|
|
1304
|
-
// The change-log tail also paginates via `has_more` —
|
|
1305
|
-
//
|
|
1306
|
-
//
|
|
1307
|
-
//
|
|
1398
|
+
// The change-log tail also paginates via `has_more` — drain it
|
|
1399
|
+
// by recursing into `pullInner` directly. We are INSIDE the
|
|
1400
|
+
// `pull` op-queue slot right now; calling the public `pull()`
|
|
1401
|
+
// would re-enqueue under the same "pull" key, which coalesces
|
|
1402
|
+
// to the promise we're currently running inside (op-queue.ts
|
|
1403
|
+
// deletes the key only after `fn` resolves) and `await` it →
|
|
1404
|
+
// permanent self-deadlock that bricks the entire pull path for
|
|
1405
|
+
// the session. This is the exact hazard the 410 handler avoids;
|
|
1406
|
+
// `pullInner` re-reads `this.cursor.last_seq` (already advanced
|
|
1407
|
+
// by enqueueApply) so the recursion resumes at the right cursor.
|
|
1308
1408
|
if (!snapshotAfter && resp.has_more) {
|
|
1309
|
-
await this.
|
|
1409
|
+
await this.pullInner();
|
|
1310
1410
|
break;
|
|
1311
1411
|
}
|
|
1312
1412
|
}
|
|
@@ -1397,6 +1497,12 @@ export class SyncEngine {
|
|
|
1397
1497
|
* that doesn't throw a 410. */
|
|
1398
1498
|
private consecutive_410s = 0;
|
|
1399
1499
|
|
|
1500
|
+
/** Consecutive TRANSIENT push failures (offline / 5xx / 429 / 401)
|
|
1501
|
+
* since the last server response. Drives the exponential backoff on
|
|
1502
|
+
* the retry of a transient-failed push so an offline tab doesn't
|
|
1503
|
+
* hot-loop. Reset to 0 the moment the server returns any response. */
|
|
1504
|
+
private pushFailureCount = 0;
|
|
1505
|
+
|
|
1400
1506
|
/** Set by pullInner whenever the just-completed pull started with
|
|
1401
1507
|
* `cursor.last_seq === 0` (cold load OR post-reset). The WS
|
|
1402
1508
|
* onConnected hook reads this to skip the reconcile() that would
|
|
@@ -1414,6 +1520,17 @@ export class SyncEngine {
|
|
|
1414
1520
|
* entity twice within seconds. Configurable via `reconcileMinIntervalMs`. */
|
|
1415
1521
|
private lastReconcileAt = 0;
|
|
1416
1522
|
|
|
1523
|
+
/** Entities the app has subscribed to via `useQuery` / `useQueryOne`,
|
|
1524
|
+
* even ones the local replica has zero rows for. The reconcile
|
|
1525
|
+
* safety net defaults to `store.entityNames()` — entities with at
|
|
1526
|
+
* least one local row — so a server row in a NEVER-cached entity (a
|
|
1527
|
+
* row created on another surface, or a freshly-added entity) stayed
|
|
1528
|
+
* invisible until a full snapshot / cache clear: `useQuery` reads
|
|
1529
|
+
* the local store and a delta `pull()` can't recover a row created
|
|
1530
|
+
* before the cursor. Tracking observed entities lets the no-arg
|
|
1531
|
+
* reconcile sweep them too. See `observeEntity`. */
|
|
1532
|
+
private observedEntities = new Set<string>();
|
|
1533
|
+
|
|
1417
1534
|
/**
|
|
1418
1535
|
* Reconcile the local replica against server truth.
|
|
1419
1536
|
*
|
|
@@ -1447,8 +1564,46 @@ export class SyncEngine {
|
|
|
1447
1564
|
*
|
|
1448
1565
|
* Pass an explicit entity list to scope the reconcile (callers like
|
|
1449
1566
|
* `db.useQueryOne` that know what they care about). When called with
|
|
1450
|
-
* no arg, every entity with local rows
|
|
1567
|
+
* no arg, every entity with local rows OR observed via `useQuery`
|
|
1568
|
+
* (see `observeEntity`) is checked.
|
|
1451
1569
|
*/
|
|
1570
|
+
/**
|
|
1571
|
+
* Register interest in an entity — called by `useQuery` /
|
|
1572
|
+
* `useQueryOne` on mount. Two effects:
|
|
1573
|
+
*
|
|
1574
|
+
* 1. Adds the entity to the reconcile sweep so the safety net
|
|
1575
|
+
* covers it even with zero local rows (see `observedEntities`).
|
|
1576
|
+
* 2. The FIRST time an entity is observed while the replica is
|
|
1577
|
+
* hydrated and that entity is locally empty, fires a one-shot
|
|
1578
|
+
* scoped reconcile so a server row this client never cached
|
|
1579
|
+
* appears on page-open — instead of waiting for the next
|
|
1580
|
+
* reconnect / visibility-change trigger. Bounded: at most once
|
|
1581
|
+
* per entity per engine (the `observedEntities` guard).
|
|
1582
|
+
*
|
|
1583
|
+
* Genuinely-empty entities just pay one cheap policy-filtered fetch;
|
|
1584
|
+
* entities where the client missed an insert get the row back.
|
|
1585
|
+
*/
|
|
1586
|
+
observeEntity(entity: string): void {
|
|
1587
|
+
if (this.observedEntities.has(entity)) return;
|
|
1588
|
+
this.observedEntities.add(entity);
|
|
1589
|
+
if (!this.isMultiTabLeader) {
|
|
1590
|
+
// Follower: only the leader talks to the network. Forward the
|
|
1591
|
+
// interest so the LEADER adds this entity to its reconcile sweep
|
|
1592
|
+
// and fetches any server row we never cached — then converge via
|
|
1593
|
+
// the `reconciled` broadcast. Without the forward, a follower's
|
|
1594
|
+
// useQuery on a never-cached entity renders empty forever (the
|
|
1595
|
+
// leader never sweeps an entity it has no local rows for and was
|
|
1596
|
+
// never told a peer cares about).
|
|
1597
|
+
this.broadcastToTabs({ type: "entity-observe", entity });
|
|
1598
|
+
return;
|
|
1599
|
+
}
|
|
1600
|
+
if (this.isHydrated() && this.store.list(entity).length === 0) {
|
|
1601
|
+
// Scoped reconcile bypasses the no-arg debounce and reuses the
|
|
1602
|
+
// session-flip / cursor-drift guards in reconcileInner.
|
|
1603
|
+
void this.reconcile([entity]);
|
|
1604
|
+
}
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1452
1607
|
async reconcile(entities?: string[]): Promise<void> {
|
|
1453
1608
|
const minIntervalMs = this.config.reconcileMinIntervalMs ?? 2_000;
|
|
1454
1609
|
const now = Date.now();
|
|
@@ -1472,7 +1627,13 @@ export class SyncEngine {
|
|
|
1472
1627
|
// Same reasoning as pullInner: the leader reconciles, broadcasts
|
|
1473
1628
|
// results, and follower replicas converge via the channel.
|
|
1474
1629
|
if (!this.isMultiTabLeader) return;
|
|
1475
|
-
|
|
1630
|
+
// Sweep entities with local rows PLUS entities the app has observed
|
|
1631
|
+
// via useQuery (even when empty locally). Without the observed set,
|
|
1632
|
+
// a server row in a never-cached entity is never reconciled and
|
|
1633
|
+
// stays invisible until a full snapshot.
|
|
1634
|
+
const names =
|
|
1635
|
+
entities ??
|
|
1636
|
+
[...new Set([...this.store.entityNames(), ...this.observedEntities])];
|
|
1476
1637
|
if (names.length === 0) return;
|
|
1477
1638
|
// Tombstone seq for any local row the server doesn't return. Using
|
|
1478
1639
|
// the current cursor means future inserts (which have higher seqs)
|
|
@@ -1730,8 +1891,9 @@ export class SyncEngine {
|
|
|
1730
1891
|
// transitions but NOT the apply queue — without queuing
|
|
1731
1892
|
// the reset, a concurrent applyChangesAsync could write
|
|
1732
1893
|
// rows AFTER we clear the store, leaving stale data under
|
|
1733
|
-
// the new identity.
|
|
1734
|
-
|
|
1894
|
+
// the new identity. Identity flipped → wipe the outgoing
|
|
1895
|
+
// identity's pending offline writes too.
|
|
1896
|
+
await this.resetReplica({ wipeMutations: true });
|
|
1735
1897
|
}
|
|
1736
1898
|
if (this.isMultiTabLeader) {
|
|
1737
1899
|
// Only the leader pulls — followers receive subsequent
|
|
@@ -1960,6 +2122,10 @@ export class SyncEngine {
|
|
|
1960
2122
|
changes: pending.map((m) => m.change),
|
|
1961
2123
|
client_id: this.clientId,
|
|
1962
2124
|
});
|
|
2125
|
+
// The request reached the server and returned a response — clear
|
|
2126
|
+
// the transient-failure backoff counter (success or per-op
|
|
2127
|
+
// rejections both mean "we're online and the server answered").
|
|
2128
|
+
this.pushFailureCount = 0;
|
|
1963
2129
|
|
|
1964
2130
|
// Per-op `results` mapping: match by op_id when present, fall
|
|
1965
2131
|
// back to positional. Invariant: a partial-failure batch lands
|
|
@@ -2073,33 +2239,55 @@ export class SyncEngine {
|
|
|
2073
2239
|
}, 250);
|
|
2074
2240
|
}
|
|
2075
2241
|
} catch (err) {
|
|
2076
|
-
//
|
|
2077
|
-
//
|
|
2078
|
-
//
|
|
2079
|
-
//
|
|
2080
|
-
//
|
|
2081
|
-
//
|
|
2242
|
+
// Whole-request failure. CRITICAL distinction:
|
|
2243
|
+
//
|
|
2244
|
+
// - TRANSIENT (offline / network drop / 5xx / 429 / 401 / 408):
|
|
2245
|
+
// the server never durably rejected the write. We MUST keep
|
|
2246
|
+
// the mutations `pending` and the optimistic ghost intact, and
|
|
2247
|
+
// retry with backoff. Marking them failed + rolling back here
|
|
2248
|
+
// is what broke offline support — an offline insert vanished
|
|
2249
|
+
// from the UI and was never re-sent (it became `failed`, and
|
|
2250
|
+
// pushInner only ships `pending`). A network `fetch` throw has
|
|
2251
|
+
// NO `.status`, so it lands here as transient. op_id makes the
|
|
2252
|
+
// eventual retry idempotent even if the server HAD committed.
|
|
2082
2253
|
//
|
|
2083
|
-
//
|
|
2084
|
-
//
|
|
2085
|
-
//
|
|
2086
|
-
// the server if the failure was a transient transport error
|
|
2087
|
-
// — the next push() will re-include the user's intent.
|
|
2254
|
+
// - PERMANENT (400/403/404/409/422): a client error that won't
|
|
2255
|
+
// change on retry (malformed batch, forbidden, gone). Fail +
|
|
2256
|
+
// roll back the optimistic ghost + surface mutations-failed.
|
|
2088
2257
|
const msg = err instanceof Error ? err.message : String(err);
|
|
2089
|
-
const
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
const
|
|
2093
|
-
|
|
2094
|
-
|
|
2258
|
+
const status = (err as { status?: number })?.status;
|
|
2259
|
+
if (isPermanentPushError(status)) {
|
|
2260
|
+
const failedOps: { opId: string; error: string }[] = [];
|
|
2261
|
+
for (const m of pending) {
|
|
2262
|
+
this.failPushedMutation(m, msg);
|
|
2263
|
+
const opId = m.change.op_id;
|
|
2264
|
+
if (typeof opId === "string") {
|
|
2265
|
+
failedOps.push({ opId, error: msg });
|
|
2266
|
+
}
|
|
2095
2267
|
}
|
|
2268
|
+
if (failedOps.length > 0) {
|
|
2269
|
+
this.broadcastToTabs({ type: "mutations-failed", ops: failedOps });
|
|
2270
|
+
}
|
|
2271
|
+
this.mutations.clear();
|
|
2272
|
+
// eslint-disable-next-line no-console
|
|
2273
|
+
console.warn(`[sync] /api/sync/push rejected (status ${status}):`, msg);
|
|
2274
|
+
} else {
|
|
2275
|
+
// Transient: leave the queue + ghosts alone, retry with bounded
|
|
2276
|
+
// exponential backoff. Resets on the next response (success or
|
|
2277
|
+
// per-op rejection). A 429 also pushes the WS reconnect out so a
|
|
2278
|
+
// rate-limited push doesn't drive a tight loop.
|
|
2279
|
+
if (status === 429) this.transport?.bumpReconnect(3);
|
|
2280
|
+
const attempt = this.pushFailureCount;
|
|
2281
|
+
this.pushFailureCount += 1;
|
|
2282
|
+
const delayMs = Math.min(30_000, 1000 * 2 ** Math.min(attempt, 5));
|
|
2283
|
+
// eslint-disable-next-line no-console
|
|
2284
|
+
console.warn(
|
|
2285
|
+
`[sync] /api/sync/push transient failure (status ${status ?? "offline"}); keeping ${pending.length} mutation(s) pending, retrying in ${delayMs}ms`,
|
|
2286
|
+
);
|
|
2287
|
+
setTimeout(() => {
|
|
2288
|
+
void this.push();
|
|
2289
|
+
}, delayMs);
|
|
2096
2290
|
}
|
|
2097
|
-
if (failedOps.length > 0) {
|
|
2098
|
-
this.broadcastToTabs({ type: "mutations-failed", ops: failedOps });
|
|
2099
|
-
}
|
|
2100
|
-
this.mutations.clear();
|
|
2101
|
-
// eslint-disable-next-line no-console
|
|
2102
|
-
console.warn("[sync] /api/sync/push failed:", msg);
|
|
2103
2291
|
}
|
|
2104
2292
|
}
|
|
2105
2293
|
|
|
@@ -2122,8 +2310,23 @@ export class SyncEngine {
|
|
|
2122
2310
|
* channel) so insert-only rollback is the right shape to ship now.
|
|
2123
2311
|
*/
|
|
2124
2312
|
private failPushedMutation(m: PendingMutation, error: string): void {
|
|
2125
|
-
|
|
2126
|
-
|
|
2313
|
+
const { entity, row_id, kind } = m.change;
|
|
2314
|
+
if (kind === "insert") {
|
|
2315
|
+
// No tombstone — a future legitimate insert of this id must work.
|
|
2316
|
+
this.store.rollbackOptimisticInsert(entity, row_id);
|
|
2317
|
+
} else if (kind === "update" || kind === "delete") {
|
|
2318
|
+
// Restore the captured pre-mutation row (update: prior field
|
|
2319
|
+
// values; delete: bring it back AND clear the optimistic tombstone
|
|
2320
|
+
// fence). `prevRow === null` means the row didn't exist pre-mutation
|
|
2321
|
+
// → remove + un-fence. `prevRow === undefined` means THIS engine
|
|
2322
|
+
// never captured a snapshot — i.e. the optimistic change wasn't
|
|
2323
|
+
// applied to this store (a forwarded op whose prevRow didn't
|
|
2324
|
+
// thread). Touching the store then would delete a canonical row we
|
|
2325
|
+
// still hold, so leave it untouched and let pull/reconcile
|
|
2326
|
+
// reconverge. The `!== undefined` guard distinguishes the two.
|
|
2327
|
+
if (m.prevRow !== undefined) {
|
|
2328
|
+
this.store.restoreRow(entity, row_id, m.prevRow);
|
|
2329
|
+
}
|
|
2127
2330
|
}
|
|
2128
2331
|
this.mutations.markFailed(m.id, error);
|
|
2129
2332
|
}
|
|
@@ -2151,24 +2354,39 @@ export class SyncEngine {
|
|
|
2151
2354
|
|
|
2152
2355
|
/** Update a row with optimistic local update. */
|
|
2153
2356
|
async update(entity: string, id: string, data: Partial<Row>): Promise<void> {
|
|
2357
|
+
// Snapshot the pre-update row BEFORE applying the optimistic merge so
|
|
2358
|
+
// a rejected push can restore the exact prior value (see
|
|
2359
|
+
// failPushedMutation). Clone — the live row is mutated in place.
|
|
2360
|
+
const before = this.store.get(entity, id);
|
|
2361
|
+
const prev = before ? { ...before } : null;
|
|
2154
2362
|
this.store.optimisticUpdate(entity, id, data);
|
|
2155
|
-
this.mutations.add(
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2363
|
+
this.mutations.add(
|
|
2364
|
+
{
|
|
2365
|
+
entity,
|
|
2366
|
+
row_id: id,
|
|
2367
|
+
kind: "update",
|
|
2368
|
+
data: data as Row,
|
|
2369
|
+
},
|
|
2370
|
+
prev,
|
|
2371
|
+
);
|
|
2161
2372
|
await this.push();
|
|
2162
2373
|
}
|
|
2163
2374
|
|
|
2164
2375
|
/** Delete a row with optimistic local update. */
|
|
2165
2376
|
async delete(entity: string, id: string): Promise<void> {
|
|
2377
|
+
// Snapshot the row before removing it so a rejected delete can bring
|
|
2378
|
+
// it back (and clear the optimistic tombstone).
|
|
2379
|
+
const before = this.store.get(entity, id);
|
|
2380
|
+
const prev = before ? { ...before } : null;
|
|
2166
2381
|
this.store.optimisticDelete(entity, id);
|
|
2167
|
-
this.mutations.add(
|
|
2168
|
-
|
|
2169
|
-
|
|
2170
|
-
|
|
2171
|
-
|
|
2382
|
+
this.mutations.add(
|
|
2383
|
+
{
|
|
2384
|
+
entity,
|
|
2385
|
+
row_id: id,
|
|
2386
|
+
kind: "delete",
|
|
2387
|
+
},
|
|
2388
|
+
prev,
|
|
2389
|
+
);
|
|
2172
2390
|
await this.push();
|
|
2173
2391
|
}
|
|
2174
2392
|
|
|
@@ -2868,6 +3086,32 @@ function rowsDiffer(a: Row, b: Row): boolean {
|
|
|
2868
3086
|
return stableStringify(a) !== stableStringify(b);
|
|
2869
3087
|
}
|
|
2870
3088
|
|
|
3089
|
+
/**
|
|
3090
|
+
* Is a whole-request push failure PERMANENT (the write was durably
|
|
3091
|
+
* rejected and won't succeed on retry) vs TRANSIENT (offline / server
|
|
3092
|
+
* hiccup / rate limit — retry will eventually land)?
|
|
3093
|
+
*
|
|
3094
|
+
* - `undefined` status = a `fetch` network throw (offline, DNS, CORS,
|
|
3095
|
+
* connection reset) → transient.
|
|
3096
|
+
* - 400/403/404/409/422 = client errors that are stable across retries
|
|
3097
|
+
* (malformed batch, forbidden, gone, conflict, unprocessable) →
|
|
3098
|
+
* permanent.
|
|
3099
|
+
* - everything else (5xx, 429 rate-limit, 408 timeout, 401 needs
|
|
3100
|
+
* re-auth, 502/503/504) → transient: keep the mutation queued and
|
|
3101
|
+
* retry. Per-op policy rejections do NOT come through here — they
|
|
3102
|
+
* arrive as a 200 with per-op `results`, handled on the success path.
|
|
3103
|
+
*/
|
|
3104
|
+
function isPermanentPushError(status?: number): boolean {
|
|
3105
|
+
if (status === undefined) return false;
|
|
3106
|
+
return (
|
|
3107
|
+
status === 400 ||
|
|
3108
|
+
status === 403 ||
|
|
3109
|
+
status === 404 ||
|
|
3110
|
+
status === 409 ||
|
|
3111
|
+
status === 422
|
|
3112
|
+
);
|
|
3113
|
+
}
|
|
3114
|
+
|
|
2871
3115
|
function stableStringify(value: unknown): string {
|
|
2872
3116
|
if (value === null || typeof value !== "object") return JSON.stringify(value);
|
|
2873
3117
|
if (Array.isArray(value)) {
|
package/src/local-store.ts
CHANGED
|
@@ -199,12 +199,20 @@ export class LocalStore {
|
|
|
199
199
|
* between the memory apply and the eventual disk write can persist
|
|
200
200
|
* a cursor that's ahead of the replica, skipping those rows
|
|
201
201
|
* forever on restart.
|
|
202
|
+
*
|
|
203
|
+
* Returns `true` when every persist write reached disk durably,
|
|
204
|
+
* `false` when at least one degraded (quota / abort). The engine
|
|
205
|
+
* uses the result to hold the PERSISTED cursor back: a row that
|
|
206
|
+
* didn't reach disk must not be skipped by an advanced on-disk
|
|
207
|
+
* cursor on the next cold start. The in-memory replica always
|
|
208
|
+
* reflects the change regardless.
|
|
202
209
|
*/
|
|
203
|
-
async applyChangesAsync(changes: ChangeEvent[]): Promise<
|
|
210
|
+
async applyChangesAsync(changes: ChangeEvent[]): Promise<boolean> {
|
|
204
211
|
for (const change of changes) {
|
|
205
212
|
this.applyChange(change);
|
|
206
213
|
}
|
|
207
214
|
this.notify();
|
|
215
|
+
let allDurable = true;
|
|
208
216
|
if (this._persistFn) {
|
|
209
217
|
// Sequential await — concurrent IDB writes can resolve out of
|
|
210
218
|
// order, racing an update behind its own delete on disk. The
|
|
@@ -213,10 +221,12 @@ export class LocalStore {
|
|
|
213
221
|
for (const change of changes) {
|
|
214
222
|
const result = this._persistFn(this.hydrateFromMemory(change));
|
|
215
223
|
if (result instanceof Promise) {
|
|
216
|
-
await result;
|
|
224
|
+
const durable = await result;
|
|
225
|
+
if (durable === false) allDurable = false;
|
|
217
226
|
}
|
|
218
227
|
}
|
|
219
228
|
}
|
|
229
|
+
return allDurable;
|
|
220
230
|
}
|
|
221
231
|
|
|
222
232
|
/**
|
|
@@ -234,9 +244,11 @@ export class LocalStore {
|
|
|
234
244
|
}
|
|
235
245
|
|
|
236
246
|
/** Persistence callback for auto-saving changes. Returns
|
|
237
|
-
* `Promise<
|
|
238
|
-
*
|
|
239
|
-
|
|
247
|
+
* `Promise<boolean>` (true = durable, false = degraded) so
|
|
248
|
+
* `applyChangesAsync` can gate the on-disk cursor on durability.
|
|
249
|
+
* Void-returning callbacks are accepted for backwards compatibility
|
|
250
|
+
* (treated as durable / fire-and-forget). */
|
|
251
|
+
_persistFn: ((change: ChangeEvent) => void | Promise<boolean>) | null = null;
|
|
240
252
|
|
|
241
253
|
/** Subscribe to store changes. Returns unsubscribe function. */
|
|
242
254
|
subscribe(listener: () => void): () => void {
|
|
@@ -307,6 +319,46 @@ export class LocalStore {
|
|
|
307
319
|
}
|
|
308
320
|
}
|
|
309
321
|
|
|
322
|
+
/**
|
|
323
|
+
* Undo a rejected optimistic update/delete by restoring the row to its
|
|
324
|
+
* captured pre-mutation value and clearing any optimistic tombstone
|
|
325
|
+
* for it. `failPushedMutation` calls this when the server rejects an
|
|
326
|
+
* update (restore the prior field values) or a delete (bring the row
|
|
327
|
+
* back AND un-fence it so the row — and any future server insert of
|
|
328
|
+
* the id — isn't blocked by the lingering optimistic tombstone).
|
|
329
|
+
*
|
|
330
|
+
* `prev === null` means the row didn't exist before the mutation
|
|
331
|
+
* (e.g. an update on a row that was itself an un-acked insert) — in
|
|
332
|
+
* that case we just remove it + clear the fence.
|
|
333
|
+
*
|
|
334
|
+
* A REAL (server-issued) tombstone wins over the restore: if an
|
|
335
|
+
* authoritative delete/revocation for this id landed on the applyQueue
|
|
336
|
+
* while the rejected push was in flight (the opQueue and applyQueue run
|
|
337
|
+
* independently), resurrecting `prev` here would briefly un-delete a row
|
|
338
|
+
* the server says is gone — healed only at the next reconcile. So when a
|
|
339
|
+
* server tombstone is present we drop the row and let the canonical
|
|
340
|
+
* state stand; the failed mutation's own optimistic fence is cleared
|
|
341
|
+
* regardless so a later legitimate re-create of the id isn't blocked.
|
|
342
|
+
*/
|
|
343
|
+
restoreRow(entity: string, id: string, prev: Row | null): void {
|
|
344
|
+
// The failed mutation's own optimistic fence always clears.
|
|
345
|
+
this.optimisticTombstones.get(entity)?.delete(id);
|
|
346
|
+
if (this.tombstones.get(entity)?.has(id)) {
|
|
347
|
+
// Server authoritatively removed this row mid-flight — its
|
|
348
|
+
// deletion outranks our local rollback.
|
|
349
|
+
this.tables.get(entity)?.delete(id);
|
|
350
|
+
this.notify();
|
|
351
|
+
return;
|
|
352
|
+
}
|
|
353
|
+
if (prev) {
|
|
354
|
+
if (!this.tables.has(entity)) this.tables.set(entity, new Map());
|
|
355
|
+
this.tables.get(entity)!.set(id, prev);
|
|
356
|
+
} else {
|
|
357
|
+
this.tables.get(entity)?.delete(id);
|
|
358
|
+
}
|
|
359
|
+
this.notify();
|
|
360
|
+
}
|
|
361
|
+
|
|
310
362
|
/** Apply an optimistic delete. Block any incoming insert/update
|
|
311
363
|
* for this id until the server's authoritative delete arrives. */
|
|
312
364
|
optimisticDelete(entity: string, id: string): void {
|