@pylonsync/sync 0.3.227 → 0.3.229

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -233,6 +233,20 @@ export class SyncEngine {
233
233
  */
234
234
  private _hadCachedReplica = false;
235
235
 
236
+ /**
237
+ * Sticky flag: a persisted row/cursor write degraded (IDB quota /
238
+ * abort), so the on-disk replica is known to be behind the in-memory
239
+ * cursor. Once set, `enqueueApply` STOPS advancing the persisted
240
+ * cursor — persisting a cursor ahead of the durable rows would make
241
+ * the next cold start skip them forever (cursor-ahead-of-replica). The
242
+ * in-memory replica stays authoritative for the live session; on
243
+ * restart the lagging on-disk cursor simply re-pulls the gap. Resets to
244
+ * false only on `resetReplicaInner` (full wipe + resync, disk is clean
245
+ * again). A storage-pressured tab thus degrades to "re-pull on restart"
246
+ * — like a memory-only client — instead of silently losing rows.
247
+ */
248
+ private persistDegraded = false;
249
+
236
250
  readonly store: LocalStore;
237
251
  readonly mutations: MutationQueue;
238
252
 
@@ -556,13 +570,16 @@ export class SyncEngine {
556
570
  this.cursor = cachedCursor;
557
571
  }
558
572
 
559
- // Auto-save changes to IndexedDB. Returns a Promise so the async
560
- // apply path (applyChangesAsync) can await the write before the
561
- // cursor advances the fix for "cursor ahead of replica" on crash.
573
+ // Auto-save changes to IndexedDB. Returns a Promise<boolean>
574
+ // (true = durable) so the async apply path (applyChangesAsync)
575
+ // can both await the write before the cursor advances AND hold
576
+ // the persisted cursor back when a write degraded — the fix for
577
+ // "cursor ahead of replica" on crash AND on quota/abort.
562
578
  const persistence = this.persistence;
563
579
  this.store._persistFn = async (change: ChangeEvent) => {
564
580
  const { persistChange } = await import("./persistence");
565
- if (persistence) await persistChange(persistence, change);
581
+ if (!persistence) return true;
582
+ return persistChange(persistence, change);
566
583
  };
567
584
 
568
585
  // Hydrate the mutation queue from disk. Any offline writes
@@ -663,7 +680,7 @@ export class SyncEngine {
663
680
  // runs; the apply path's idempotent op_id-keyed merge handles the
664
681
  // worst case (one re-applied batch on next cold pull if the tab
665
682
  // crashes between this line and the saveCursor task completing).
666
- if (this.persistence) {
683
+ if (this.persistence && !this.persistDegraded) {
667
684
  void this.persistence.saveCursor(this.cursor);
668
685
  }
669
686
 
@@ -756,8 +773,8 @@ export class SyncEngine {
756
773
  fromBroadcast: true,
757
774
  });
758
775
  },
759
- onResetReceived: () => {
760
- void this.resetReplicaInner();
776
+ onResetReceived: (wipeMutations: boolean) => {
777
+ void this.resetReplicaInner({ wipeMutations });
761
778
  },
762
779
  onSessionReceived: (resolved: ResolvedSession) => {
763
780
  // Funnel through the shared session chain so concurrent triggers
@@ -768,7 +785,15 @@ export class SyncEngine {
768
785
  },
769
786
  onMutationsForwarded: (ops: PendingMutation[]) => {
770
787
  for (const op of ops) {
771
- this.mutations.add(op.change);
788
+ // Thread the follower's captured `prevRow` so a server
789
+ // rejection of this forwarded update/delete restores the
790
+ // canonical value rather than deleting it. Without it the
791
+ // leader's queue entry has prevRow === undefined, and
792
+ // failPushedMutation's restoreRow(undefined ?? null) would
793
+ // DELETE the leader's still-valid row. The follower's prevRow
794
+ // (its pre-edit value) equals the leader's canonical row, so
795
+ // restoring it is correct on both tabs.
796
+ this.mutations.add(op.change, op.prevRow);
772
797
  }
773
798
  void this.push();
774
799
  },
@@ -777,8 +802,19 @@ export class SyncEngine {
777
802
  this.mutations.clear();
778
803
  },
779
804
  onMutationsFailed: (ops: { opId: string; error: string }[]) => {
805
+ // The leader pushed this follower's forwarded mutation and the
806
+ // server rejected it. Roll back the follower's OWN optimistic
807
+ // ghost (the leader already rolled back its copy) — calling
808
+ // markFailed alone left the ghost row stuck in the very tab the
809
+ // user is looking at. failPushedMutation restores prevRow for
810
+ // update/delete and removes the insert ghost, then marks failed.
780
811
  for (const op of ops) {
781
- this.mutations.markFailed(op.opId, op.error);
812
+ const m = this.mutations.get(op.opId);
813
+ if (m) {
814
+ this.failPushedMutation(m, op.error);
815
+ } else {
816
+ this.mutations.markFailed(op.opId, op.error);
817
+ }
782
818
  }
783
819
  },
784
820
  onBinaryReceived: (bytes: Uint8Array) => {
@@ -882,6 +918,28 @@ export class SyncEngine {
882
918
  });
883
919
  }
884
920
  },
921
+ onEntityObserve: (entity: string) => {
922
+ // Leader path: a follower's useQuery observed this entity. Add
923
+ // it to our reconcile sweep and fetch it now if we have no local
924
+ // rows — the resulting `reconciled` batch is broadcast to every
925
+ // tab, so the follower's view populates. Same shape as the
926
+ // leader half of observeEntity; the `has` guard dedupes against
927
+ // our own interest.
928
+ if (!this.isMultiTabLeader) return;
929
+ if (this.observedEntities.has(entity)) return;
930
+ this.observedEntities.add(entity);
931
+ if (this.isHydrated() && this.store.list(entity).length === 0) {
932
+ void this.reconcile([entity]);
933
+ }
934
+ },
935
+ onReplayObservedEntities: () => {
936
+ // Follower path: re-declare every observed entity to the new
937
+ // leader so its reconcile sweep covers them after a leader flip.
938
+ if (this.isMultiTabLeader) return;
939
+ for (const entity of this.observedEntities) {
940
+ this.broadcastToTabs({ type: "entity-observe", entity });
941
+ }
942
+ },
885
943
  };
886
944
  }
887
945
 
@@ -1008,7 +1066,11 @@ export class SyncEngine {
1008
1066
  (c) => typeof c.seq === "number" && c.seq > this.cursor.last_seq,
1009
1067
  );
1010
1068
  if (filtered.length > 0) {
1011
- await this.store.applyChangesAsync(filtered);
1069
+ const durable = await this.store.applyChangesAsync(filtered);
1070
+ // A row in this batch didn't reach disk (quota / abort). Latch
1071
+ // the degraded flag so we never persist a cursor ahead of the
1072
+ // durable replica — the next cold start must re-pull this gap.
1073
+ if (!durable) this.persistDegraded = true;
1012
1074
  }
1013
1075
  // Pick the cursor target. Explicit `targetCursor` (from pull) wins
1014
1076
  // — pull's response carries the server's authoritative current_seq
@@ -1020,8 +1082,12 @@ export class SyncEngine {
1020
1082
  ? { last_seq: filtered[filtered.length - 1].seq }
1021
1083
  : null);
1022
1084
  if (candidate && candidate.last_seq > this.cursor.last_seq) {
1085
+ // In-memory cursor ALWAYS advances — live sync stays correct.
1023
1086
  this.cursor = candidate;
1024
- if (this.persistence) {
1087
+ // The on-disk cursor only advances while persistence is healthy.
1088
+ // Once degraded, freezing it keeps disk self-consistent (cursor
1089
+ // never exceeds the rows actually written) so restart re-pulls.
1090
+ if (this.persistence && !this.persistDegraded) {
1025
1091
  await this.persistence.saveCursor(this.cursor);
1026
1092
  }
1027
1093
  }
@@ -1126,16 +1192,41 @@ export class SyncEngine {
1126
1192
  * rehydrated on the next page load — phantom rows that no purge of
1127
1193
  * in-memory state could fix.
1128
1194
  */
1129
- async resetReplica(): Promise<void> {
1195
+ async resetReplica(opts: { wipeMutations?: boolean } = {}): Promise<void> {
1130
1196
  // Public callers go through the queue so a reset can't race with
1131
1197
  // an in-flight pull / push / reconcile. Internal callers that
1132
1198
  // already hold the queue slot use `resetReplicaInner` directly.
1133
- return this.opQueue.enqueue("reset", () => this.resetReplicaInner());
1199
+ return this.opQueue.enqueue("reset", () => this.resetReplicaInner(opts));
1134
1200
  }
1135
1201
 
1136
- private async resetReplicaInner(): Promise<void> {
1202
+ /**
1203
+ * Drop the local replica and pull fresh. `wipeMutations` decides the
1204
+ * fate of the durable offline write queue:
1205
+ * - `false` (default, 410 RESYNC, SAME user): KEEP pending writes —
1206
+ * they survive the snapshot refresh and re-push under the same
1207
+ * session.
1208
+ * - `true` (token/tenant flip, DIFFERENT identity): DROP them — the
1209
+ * queued writes belong to the outgoing identity and must never be
1210
+ * replayed as the incoming one (cross-identity write leak).
1211
+ */
1212
+ private async resetReplicaInner(
1213
+ opts: { wipeMutations?: boolean } = {},
1214
+ ): Promise<void> {
1215
+ const wipeMutations = opts.wipeMutations === true;
1137
1216
  this.cursor = { last_seq: 0 };
1138
1217
  this.store.clearAll();
1218
+ // Disk is about to be wiped + re-pulled from 0, so any prior
1219
+ // persist degradation is moot — start the durability invariant
1220
+ // fresh. (If the fresh snapshot also fails to persist, enqueueApply
1221
+ // re-latches the flag.)
1222
+ this.persistDegraded = false;
1223
+ if (wipeMutations) {
1224
+ // Identity flip: discard the outgoing identity's pending offline
1225
+ // writes (and persist the empty queue to disk via the mutation
1226
+ // backend). persistence.clear() deliberately leaves MUTATIONS_STORE
1227
+ // alone for the 410 path, so this is the only site that drops them.
1228
+ this.mutations.clearAll();
1229
+ }
1139
1230
  // The cache is now empty. The next pull will start from 0 and
1140
1231
  // return a full snapshot — that's a true cold start, so the
1141
1232
  // onConnected fast-path may skip the post-pull reconcile. Without
@@ -1154,9 +1245,11 @@ export class SyncEngine {
1154
1245
  }
1155
1246
  // Leader broadcasts the reset so follower replicas wipe their
1156
1247
  // own copies in lockstep — otherwise a follower keeps stale
1157
- // rows under the old identity until its own pull catches up.
1248
+ // rows under the old identity until its own pull catches up. The
1249
+ // `wipeMutations` flag rides along so followers make the same
1250
+ // keep-vs-drop decision for THEIR forwarded offline writes.
1158
1251
  if (this.isMultiTabLeader) {
1159
- this.broadcastToTabs({ type: "reset" });
1252
+ this.broadcastToTabs({ type: "reset", wipeMutations });
1160
1253
  }
1161
1254
  }
1162
1255
 
@@ -1264,8 +1357,9 @@ export class SyncEngine {
1264
1357
  const { tokenChanged } = this.session.observeToken(this.currentToken());
1265
1358
  if (tokenChanged) {
1266
1359
  // We're holding the "pull" slot in the op queue — bypass the
1267
- // queue's reset path to avoid self-deadlock.
1268
- await this.resetReplicaInner();
1360
+ // queue's reset path to avoid self-deadlock. Identity flipped, so
1361
+ // wipe the old identity's pending offline writes.
1362
+ await this.resetReplicaInner({ wipeMutations: true });
1269
1363
  // Token flipped → the cached tenant is for the previous user. Pull
1270
1364
  // the fresh session in parallel with the cursor catch-up below.
1271
1365
  void this.refreshResolvedSession();
@@ -1301,12 +1395,18 @@ export class SyncEngine {
1301
1395
  // Continue paginating in the same loop iteration so we don't
1302
1396
  // leave a fresh client with a partial replica.
1303
1397
  snapshotAfter = resp.snapshot_after ?? undefined;
1304
- // The change-log tail also paginates via `has_more` — handle
1305
- // that one recursively after the snapshot loop completes so
1306
- // backpressure on the change-log path uses the existing
1307
- // tail-pull semantics.
1398
+ // The change-log tail also paginates via `has_more` — drain it
1399
+ // by recursing into `pullInner` directly. We are INSIDE the
1400
+ // `pull` op-queue slot right now; calling the public `pull()`
1401
+ // would re-enqueue under the same "pull" key, which coalesces
1402
+ // to the promise we're currently running inside (op-queue.ts
1403
+ // deletes the key only after `fn` resolves) and `await` it →
1404
+ // permanent self-deadlock that bricks the entire pull path for
1405
+ // the session. This is the exact hazard the 410 handler avoids;
1406
+ // `pullInner` re-reads `this.cursor.last_seq` (already advanced
1407
+ // by enqueueApply) so the recursion resumes at the right cursor.
1308
1408
  if (!snapshotAfter && resp.has_more) {
1309
- await this.pull();
1409
+ await this.pullInner();
1310
1410
  break;
1311
1411
  }
1312
1412
  }
@@ -1397,6 +1497,12 @@ export class SyncEngine {
1397
1497
  * that doesn't throw a 410. */
1398
1498
  private consecutive_410s = 0;
1399
1499
 
1500
+ /** Consecutive TRANSIENT push failures (offline / 5xx / 429 / 401)
1501
+ * since the last server response. Drives the exponential backoff on
1502
+ * the retry of a transient-failed push so an offline tab doesn't
1503
+ * hot-loop. Reset to 0 the moment the server returns any response. */
1504
+ private pushFailureCount = 0;
1505
+
1400
1506
  /** Set by pullInner whenever the just-completed pull started with
1401
1507
  * `cursor.last_seq === 0` (cold load OR post-reset). The WS
1402
1508
  * onConnected hook reads this to skip the reconcile() that would
@@ -1414,6 +1520,17 @@ export class SyncEngine {
1414
1520
  * entity twice within seconds. Configurable via `reconcileMinIntervalMs`. */
1415
1521
  private lastReconcileAt = 0;
1416
1522
 
1523
+ /** Entities the app has subscribed to via `useQuery` / `useQueryOne`,
1524
+ * even ones the local replica has zero rows for. The reconcile
1525
+ * safety net defaults to `store.entityNames()` — entities with at
1526
+ * least one local row — so a server row in a NEVER-cached entity (a
1527
+ * row created on another surface, or a freshly-added entity) stayed
1528
+ * invisible until a full snapshot / cache clear: `useQuery` reads
1529
+ * the local store and a delta `pull()` can't recover a row created
1530
+ * before the cursor. Tracking observed entities lets the no-arg
1531
+ * reconcile sweep them too. See `observeEntity`. */
1532
+ private observedEntities = new Set<string>();
1533
+
1417
1534
  /**
1418
1535
  * Reconcile the local replica against server truth.
1419
1536
  *
@@ -1447,8 +1564,46 @@ export class SyncEngine {
1447
1564
  *
1448
1565
  * Pass an explicit entity list to scope the reconcile (callers like
1449
1566
  * `db.useQueryOne` that know what they care about). When called with
1450
- * no arg, every entity with local rows is checked.
1567
+ * no arg, every entity with local rows OR observed via `useQuery`
1568
+ * (see `observeEntity`) is checked.
1451
1569
  */
1570
+ /**
1571
+ * Register interest in an entity — called by `useQuery` /
1572
+ * `useQueryOne` on mount. Two effects:
1573
+ *
1574
+ * 1. Adds the entity to the reconcile sweep so the safety net
1575
+ * covers it even with zero local rows (see `observedEntities`).
1576
+ * 2. The FIRST time an entity is observed while the replica is
1577
+ * hydrated and that entity is locally empty, fires a one-shot
1578
+ * scoped reconcile so a server row this client never cached
1579
+ * appears on page-open — instead of waiting for the next
1580
+ * reconnect / visibility-change trigger. Bounded: at most once
1581
+ * per entity per engine (the `observedEntities` guard).
1582
+ *
1583
+ * Genuinely-empty entities just pay one cheap policy-filtered fetch;
1584
+ * entities where the client missed an insert get the row back.
1585
+ */
1586
+ observeEntity(entity: string): void {
1587
+ if (this.observedEntities.has(entity)) return;
1588
+ this.observedEntities.add(entity);
1589
+ if (!this.isMultiTabLeader) {
1590
+ // Follower: only the leader talks to the network. Forward the
1591
+ // interest so the LEADER adds this entity to its reconcile sweep
1592
+ // and fetches any server row we never cached — then converge via
1593
+ // the `reconciled` broadcast. Without the forward, a follower's
1594
+ // useQuery on a never-cached entity renders empty forever (the
1595
+ // leader never sweeps an entity it has no local rows for and was
1596
+ // never told a peer cares about).
1597
+ this.broadcastToTabs({ type: "entity-observe", entity });
1598
+ return;
1599
+ }
1600
+ if (this.isHydrated() && this.store.list(entity).length === 0) {
1601
+ // Scoped reconcile bypasses the no-arg debounce and reuses the
1602
+ // session-flip / cursor-drift guards in reconcileInner.
1603
+ void this.reconcile([entity]);
1604
+ }
1605
+ }
1606
+
1452
1607
  async reconcile(entities?: string[]): Promise<void> {
1453
1608
  const minIntervalMs = this.config.reconcileMinIntervalMs ?? 2_000;
1454
1609
  const now = Date.now();
@@ -1472,7 +1627,13 @@ export class SyncEngine {
1472
1627
  // Same reasoning as pullInner: the leader reconciles, broadcasts
1473
1628
  // results, and follower replicas converge via the channel.
1474
1629
  if (!this.isMultiTabLeader) return;
1475
- const names = entities ?? this.store.entityNames();
1630
+ // Sweep entities with local rows PLUS entities the app has observed
1631
+ // via useQuery (even when empty locally). Without the observed set,
1632
+ // a server row in a never-cached entity is never reconciled and
1633
+ // stays invisible until a full snapshot.
1634
+ const names =
1635
+ entities ??
1636
+ [...new Set([...this.store.entityNames(), ...this.observedEntities])];
1476
1637
  if (names.length === 0) return;
1477
1638
  // Tombstone seq for any local row the server doesn't return. Using
1478
1639
  // the current cursor means future inserts (which have higher seqs)
@@ -1730,8 +1891,9 @@ export class SyncEngine {
1730
1891
  // transitions but NOT the apply queue — without queuing
1731
1892
  // the reset, a concurrent applyChangesAsync could write
1732
1893
  // rows AFTER we clear the store, leaving stale data under
1733
- // the new identity.
1734
- await this.resetReplica();
1894
+ // the new identity. Identity flipped → wipe the outgoing
1895
+ // identity's pending offline writes too.
1896
+ await this.resetReplica({ wipeMutations: true });
1735
1897
  }
1736
1898
  if (this.isMultiTabLeader) {
1737
1899
  // Only the leader pulls — followers receive subsequent
@@ -1960,6 +2122,10 @@ export class SyncEngine {
1960
2122
  changes: pending.map((m) => m.change),
1961
2123
  client_id: this.clientId,
1962
2124
  });
2125
+ // The request reached the server and returned a response — clear
2126
+ // the transient-failure backoff counter (success or per-op
2127
+ // rejections both mean "we're online and the server answered").
2128
+ this.pushFailureCount = 0;
1963
2129
 
1964
2130
  // Per-op `results` mapping: match by op_id when present, fall
1965
2131
  // back to positional. Invariant: a partial-failure batch lands
@@ -2073,33 +2239,55 @@ export class SyncEngine {
2073
2239
  }, 250);
2074
2240
  }
2075
2241
  } catch (err) {
2076
- // Transport-level failure (network down, CORS, 5xx without a
2077
- // typed body, parse error). Pre-0.3.224 swallowed silently:
2078
- // the mutation stayed `pending` forever and the optimistic
2079
- // ghost survived even though the server never accepted the
2080
- // write. That's the "I sent it, it's there, then it's gone"
2081
- // pattern users see after a reload.
2242
+ // Whole-request failure. CRITICAL distinction:
2243
+ //
2244
+ // - TRANSIENT (offline / network drop / 5xx / 429 / 401 / 408):
2245
+ // the server never durably rejected the write. We MUST keep
2246
+ // the mutations `pending` and the optimistic ghost intact, and
2247
+ // retry with backoff. Marking them failed + rolling back here
2248
+ // is what broke offline support — an offline insert vanished
2249
+ // from the UI and was never re-sent (it became `failed`, and
2250
+ // pushInner only ships `pending`). A network `fetch` throw has
2251
+ // NO `.status`, so it lands here as transient. op_id makes the
2252
+ // eventual retry idempotent even if the server HAD committed.
2082
2253
  //
2083
- // Now: fail every pending mutation in this batch, roll back
2084
- // any optimistic ghost, surface via mutations-failed so the
2085
- // UI can prompt + retry. op_id keeps a retry idempotent on
2086
- // the server if the failure was a transient transport error
2087
- // — the next push() will re-include the user's intent.
2254
+ // - PERMANENT (400/403/404/409/422): a client error that won't
2255
+ // change on retry (malformed batch, forbidden, gone). Fail +
2256
+ // roll back the optimistic ghost + surface mutations-failed.
2088
2257
  const msg = err instanceof Error ? err.message : String(err);
2089
- const failedOps: { opId: string; error: string }[] = [];
2090
- for (const m of pending) {
2091
- this.failPushedMutation(m, msg);
2092
- const opId = m.change.op_id;
2093
- if (typeof opId === "string") {
2094
- failedOps.push({ opId, error: msg });
2258
+ const status = (err as { status?: number })?.status;
2259
+ if (isPermanentPushError(status)) {
2260
+ const failedOps: { opId: string; error: string }[] = [];
2261
+ for (const m of pending) {
2262
+ this.failPushedMutation(m, msg);
2263
+ const opId = m.change.op_id;
2264
+ if (typeof opId === "string") {
2265
+ failedOps.push({ opId, error: msg });
2266
+ }
2095
2267
  }
2268
+ if (failedOps.length > 0) {
2269
+ this.broadcastToTabs({ type: "mutations-failed", ops: failedOps });
2270
+ }
2271
+ this.mutations.clear();
2272
+ // eslint-disable-next-line no-console
2273
+ console.warn(`[sync] /api/sync/push rejected (status ${status}):`, msg);
2274
+ } else {
2275
+ // Transient: leave the queue + ghosts alone, retry with bounded
2276
+ // exponential backoff. Resets on the next response (success or
2277
+ // per-op rejection). A 429 also pushes the WS reconnect out so a
2278
+ // rate-limited push doesn't drive a tight loop.
2279
+ if (status === 429) this.transport?.bumpReconnect(3);
2280
+ const attempt = this.pushFailureCount;
2281
+ this.pushFailureCount += 1;
2282
+ const delayMs = Math.min(30_000, 1000 * 2 ** Math.min(attempt, 5));
2283
+ // eslint-disable-next-line no-console
2284
+ console.warn(
2285
+ `[sync] /api/sync/push transient failure (status ${status ?? "offline"}); keeping ${pending.length} mutation(s) pending, retrying in ${delayMs}ms`,
2286
+ );
2287
+ setTimeout(() => {
2288
+ void this.push();
2289
+ }, delayMs);
2096
2290
  }
2097
- if (failedOps.length > 0) {
2098
- this.broadcastToTabs({ type: "mutations-failed", ops: failedOps });
2099
- }
2100
- this.mutations.clear();
2101
- // eslint-disable-next-line no-console
2102
- console.warn("[sync] /api/sync/push failed:", msg);
2103
2291
  }
2104
2292
  }
2105
2293
 
@@ -2122,8 +2310,23 @@ export class SyncEngine {
2122
2310
  * channel) so insert-only rollback is the right shape to ship now.
2123
2311
  */
2124
2312
  private failPushedMutation(m: PendingMutation, error: string): void {
2125
- if (m.change.kind === "insert") {
2126
- this.store.rollbackOptimisticInsert(m.change.entity, m.change.row_id);
2313
+ const { entity, row_id, kind } = m.change;
2314
+ if (kind === "insert") {
2315
+ // No tombstone — a future legitimate insert of this id must work.
2316
+ this.store.rollbackOptimisticInsert(entity, row_id);
2317
+ } else if (kind === "update" || kind === "delete") {
2318
+ // Restore the captured pre-mutation row (update: prior field
2319
+ // values; delete: bring it back AND clear the optimistic tombstone
2320
+ // fence). `prevRow === null` means the row didn't exist pre-mutation
2321
+ // → remove + un-fence. `prevRow === undefined` means THIS engine
2322
+ // never captured a snapshot — i.e. the optimistic change wasn't
2323
+ // applied to this store (a forwarded op whose prevRow didn't
2324
+ // thread). Touching the store then would delete a canonical row we
2325
+ // still hold, so leave it untouched and let pull/reconcile
2326
+ // reconverge. The `!== undefined` guard distinguishes the two.
2327
+ if (m.prevRow !== undefined) {
2328
+ this.store.restoreRow(entity, row_id, m.prevRow);
2329
+ }
2127
2330
  }
2128
2331
  this.mutations.markFailed(m.id, error);
2129
2332
  }
@@ -2151,24 +2354,39 @@ export class SyncEngine {
2151
2354
 
2152
2355
  /** Update a row with optimistic local update. */
2153
2356
  async update(entity: string, id: string, data: Partial<Row>): Promise<void> {
2357
+ // Snapshot the pre-update row BEFORE applying the optimistic merge so
2358
+ // a rejected push can restore the exact prior value (see
2359
+ // failPushedMutation). Clone — the live row is mutated in place.
2360
+ const before = this.store.get(entity, id);
2361
+ const prev = before ? { ...before } : null;
2154
2362
  this.store.optimisticUpdate(entity, id, data);
2155
- this.mutations.add({
2156
- entity,
2157
- row_id: id,
2158
- kind: "update",
2159
- data: data as Row,
2160
- });
2363
+ this.mutations.add(
2364
+ {
2365
+ entity,
2366
+ row_id: id,
2367
+ kind: "update",
2368
+ data: data as Row,
2369
+ },
2370
+ prev,
2371
+ );
2161
2372
  await this.push();
2162
2373
  }
2163
2374
 
2164
2375
  /** Delete a row with optimistic local update. */
2165
2376
  async delete(entity: string, id: string): Promise<void> {
2377
+ // Snapshot the row before removing it so a rejected delete can bring
2378
+ // it back (and clear the optimistic tombstone).
2379
+ const before = this.store.get(entity, id);
2380
+ const prev = before ? { ...before } : null;
2166
2381
  this.store.optimisticDelete(entity, id);
2167
- this.mutations.add({
2168
- entity,
2169
- row_id: id,
2170
- kind: "delete",
2171
- });
2382
+ this.mutations.add(
2383
+ {
2384
+ entity,
2385
+ row_id: id,
2386
+ kind: "delete",
2387
+ },
2388
+ prev,
2389
+ );
2172
2390
  await this.push();
2173
2391
  }
2174
2392
 
@@ -2868,6 +3086,32 @@ function rowsDiffer(a: Row, b: Row): boolean {
2868
3086
  return stableStringify(a) !== stableStringify(b);
2869
3087
  }
2870
3088
 
3089
+ /**
3090
+ * Is a whole-request push failure PERMANENT (the write was durably
3091
+ * rejected and won't succeed on retry) vs TRANSIENT (offline / server
3092
+ * hiccup / rate limit — retry will eventually land)?
3093
+ *
3094
+ * - `undefined` status = a `fetch` network throw (offline, DNS, CORS,
3095
+ * connection reset) → transient.
3096
+ * - 400/403/404/409/422 = client errors that are stable across retries
3097
+ * (malformed batch, forbidden, gone, conflict, unprocessable) →
3098
+ * permanent.
3099
+ * - everything else (5xx, 429 rate-limit, 408 timeout, 401 needs
3100
+ * re-auth, 502/503/504) → transient: keep the mutation queued and
3101
+ * retry. Per-op policy rejections do NOT come through here — they
3102
+ * arrive as a 200 with per-op `results`, handled on the success path.
3103
+ */
3104
+ function isPermanentPushError(status?: number): boolean {
3105
+ if (status === undefined) return false;
3106
+ return (
3107
+ status === 400 ||
3108
+ status === 403 ||
3109
+ status === 404 ||
3110
+ status === 409 ||
3111
+ status === 422
3112
+ );
3113
+ }
3114
+
2871
3115
  function stableStringify(value: unknown): string {
2872
3116
  if (value === null || typeof value !== "object") return JSON.stringify(value);
2873
3117
  if (Array.isArray(value)) {
@@ -199,12 +199,20 @@ export class LocalStore {
199
199
  * between the memory apply and the eventual disk write can persist
200
200
  * a cursor that's ahead of the replica, skipping those rows
201
201
  * forever on restart.
202
+ *
203
+ * Returns `true` when every persist write reached disk durably,
204
+ * `false` when at least one degraded (quota / abort). The engine
205
+ * uses the result to hold the PERSISTED cursor back: a row that
206
+ * didn't reach disk must not be skipped by an advanced on-disk
207
+ * cursor on the next cold start. The in-memory replica always
208
+ * reflects the change regardless.
202
209
  */
203
- async applyChangesAsync(changes: ChangeEvent[]): Promise<void> {
210
+ async applyChangesAsync(changes: ChangeEvent[]): Promise<boolean> {
204
211
  for (const change of changes) {
205
212
  this.applyChange(change);
206
213
  }
207
214
  this.notify();
215
+ let allDurable = true;
208
216
  if (this._persistFn) {
209
217
  // Sequential await — concurrent IDB writes can resolve out of
210
218
  // order, racing an update behind its own delete on disk. The
@@ -213,10 +221,12 @@ export class LocalStore {
213
221
  for (const change of changes) {
214
222
  const result = this._persistFn(this.hydrateFromMemory(change));
215
223
  if (result instanceof Promise) {
216
- await result;
224
+ const durable = await result;
225
+ if (durable === false) allDurable = false;
217
226
  }
218
227
  }
219
228
  }
229
+ return allDurable;
220
230
  }
221
231
 
222
232
  /**
@@ -234,9 +244,11 @@ export class LocalStore {
234
244
  }
235
245
 
236
246
  /** Persistence callback for auto-saving changes. Returns
237
- * `Promise<void>` so callers can await. Void-returning callbacks
238
- * are accepted for backwards compatibility (just not awaitable). */
239
- _persistFn: ((change: ChangeEvent) => void | Promise<void>) | null = null;
247
+ * `Promise<boolean>` (true = durable, false = degraded) so
248
+ * `applyChangesAsync` can gate the on-disk cursor on durability.
249
+ * Void-returning callbacks are accepted for backwards compatibility
250
+ * (treated as durable / fire-and-forget). */
251
+ _persistFn: ((change: ChangeEvent) => void | Promise<boolean>) | null = null;
240
252
 
241
253
  /** Subscribe to store changes. Returns unsubscribe function. */
242
254
  subscribe(listener: () => void): () => void {
@@ -307,6 +319,46 @@ export class LocalStore {
307
319
  }
308
320
  }
309
321
 
322
+ /**
323
+ * Undo a rejected optimistic update/delete by restoring the row to its
324
+ * captured pre-mutation value and clearing any optimistic tombstone
325
+ * for it. `failPushedMutation` calls this when the server rejects an
326
+ * update (restore the prior field values) or a delete (bring the row
327
+ * back AND un-fence it so the row — and any future server insert of
328
+ * the id — isn't blocked by the lingering optimistic tombstone).
329
+ *
330
+ * `prev === null` means the row didn't exist before the mutation
331
+ * (e.g. an update on a row that was itself an un-acked insert) — in
332
+ * that case we just remove it + clear the fence.
333
+ *
334
+ * A REAL (server-issued) tombstone wins over the restore: if an
335
+ * authoritative delete/revocation for this id landed on the applyQueue
336
+ * while the rejected push was in flight (the opQueue and applyQueue run
337
+ * independently), resurrecting `prev` here would briefly un-delete a row
338
+ * the server says is gone — healed only at the next reconcile. So when a
339
+ * server tombstone is present we drop the row and let the canonical
340
+ * state stand; the failed mutation's own optimistic fence is cleared
341
+ * regardless so a later legitimate re-create of the id isn't blocked.
342
+ */
343
+ restoreRow(entity: string, id: string, prev: Row | null): void {
344
+ // The failed mutation's own optimistic fence always clears.
345
+ this.optimisticTombstones.get(entity)?.delete(id);
346
+ if (this.tombstones.get(entity)?.has(id)) {
347
+ // Server authoritatively removed this row mid-flight — its
348
+ // deletion outranks our local rollback.
349
+ this.tables.get(entity)?.delete(id);
350
+ this.notify();
351
+ return;
352
+ }
353
+ if (prev) {
354
+ if (!this.tables.has(entity)) this.tables.set(entity, new Map());
355
+ this.tables.get(entity)!.set(id, prev);
356
+ } else {
357
+ this.tables.get(entity)?.delete(id);
358
+ }
359
+ this.notify();
360
+ }
361
+
310
362
  /** Apply an optimistic delete. Block any incoming insert/update
311
363
  * for this id until the server's authoritative delete arrives. */
312
364
  optimisticDelete(entity: string, id: string): void {