@pylonsync/sync 0.3.212 → 0.3.215

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -202,6 +202,24 @@ export class SyncEngine {
202
202
  return this._hydrated;
203
203
  }
204
204
 
205
+ /**
206
+ * True when the engine drained at least one row OR a saved cursor
207
+ * out of IndexedDB during `start()`. Distinguishes a returning user
208
+ * (cached replica may contain rows the server has since deleted) from
209
+ * a true first-time user (cache empty, pull-from-0 IS canonical
210
+ * truth).
211
+ *
212
+ * Used by the WS `onConnected` fast-path: `lastPullStartedFromZero`
213
+ * only fires the reconcile-skip when this flag is ALSO false. A
214
+ * returning user whose IDB cursor somehow rolled back to 0 (rare:
215
+ * partial wipe, corrupt write) must still get the reconcile pass —
216
+ * otherwise rows deleted on the server while the tab was closed
217
+ * survive forever.
218
+ *
219
+ * Read-only after start() observes the IDB load.
220
+ */
221
+ private _hadCachedReplica = false;
222
+
205
223
  readonly store: LocalStore;
206
224
  readonly mutations: MutationQueue;
207
225
 
@@ -433,12 +451,38 @@ export class SyncEngine {
433
451
  const shouldPersist = this.config.persist !== false && typeof indexedDB !== "undefined";
434
452
  if (shouldPersist) {
435
453
  try {
436
- const { IndexedDBPersistence, persistChange } = await import("./persistence");
454
+ const { IndexedDBPersistence } = await import("./persistence");
437
455
  this.persistence = new IndexedDBPersistence(this.config.appName);
438
456
  await this.persistence.open();
439
457
 
440
- // Load cached data into the store.
441
- const cached = await this.persistence.loadAllEntities();
458
+ // Warm-load entities + cursor in ONE readonly transaction so
459
+ // the hydrated rows and the cursor we'll advance from are a
460
+ // consistent snapshot. Separate reads could (in a multi-tab
461
+ // race) interleave a mid-load save and read (rows@C, cursor@C+1)
462
+ // — the pull would then skip seqs we never applied. The
463
+ // post-load timing log surfaces cold-IDB pages so a regression
464
+ // (50MB cache, slow disk) is observable.
465
+ const idbLoadStart =
466
+ typeof performance !== "undefined" ? performance.now() : Date.now();
467
+ const { entities: cached, cursor: cachedCursor, hadCache } =
468
+ await this.persistence.loadSnapshot();
469
+ const idbLoadMs =
470
+ (typeof performance !== "undefined" ? performance.now() : Date.now()) -
471
+ idbLoadStart;
472
+ if (idbLoadMs > 100) {
473
+ console.warn(
474
+ `[persistence] cold IDB load took ${idbLoadMs.toFixed(0)}ms (${
475
+ Object.keys(cached).length
476
+ } entities)`,
477
+ );
478
+ }
479
+ // Record whether IDB had a prior session's state. The cold-load
480
+ // fast-path in onConnected (skip post-pull reconcile when the
481
+ // pull was a full snapshot from cursor=0) is only safe when
482
+ // there was no cached replica to begin with — a returning user
483
+ // whose pull-from-cursor misses an offline server-side delete
484
+ // depends on that reconcile pass to catch the ghost row.
485
+ this._hadCachedReplica = hadCache;
442
486
  let hydrated = false;
443
487
  for (const [entity, rows] of Object.entries(cached)) {
444
488
  for (const row of rows) {
@@ -459,10 +503,13 @@ export class SyncEngine {
459
503
  else this.store.notify(); // notify even on empty cache so useQuery
460
504
  // sees `isHydrated()` flip and can drop its initial loading state.
461
505
 
462
- // Load cursor.
463
- const savedCursor = await this.persistence.loadCursor();
464
- if (savedCursor) {
465
- this.cursor = savedCursor;
506
+ // Apply the cached cursor BEFORE pull so the first pull is a
507
+ // delta against where we left off, not a full re-snapshot.
508
+ // Already part of the single loadAll() tx above — assigning
509
+ // here can't race a concurrent save because pull/push haven't
510
+ // started yet (initMultiTab is still ahead).
511
+ if (cachedCursor) {
512
+ this.cursor = cachedCursor;
466
513
  }
467
514
 
468
515
  // Auto-save changes to IndexedDB. Returns a Promise so the async
@@ -512,12 +559,30 @@ export class SyncEngine {
512
559
  // applied changes broadcast by the leader. The election settles
513
560
  // in ~250ms; if the broker is unavailable (no BroadcastChannel)
514
561
  // every tab is implicitly its own leader.
515
- await this.initMultiTab();
562
+ //
563
+ // Bootstrap parallelization: the election (~250ms) and
564
+ // /api/auth/me (~60ms) are independent — kick both off, then
565
+ // await election first. If we lose the election we discard the
566
+ // session result and let the leader broadcast its session over
567
+ // the multi-tab channel; the "leader-only network writes"
568
+ // invariant is preserved because no peers have observed our
569
+ // pending /api/auth/me request and no apply has happened yet.
570
+ const electionPromise = this.initMultiTab();
571
+ const sessionPromise = this.fetchSessionBootstrap().catch(() => null);
572
+ await electionPromise;
516
573
 
517
574
  if (!this.isMultiTabLeader) {
518
575
  // Follower path: rely on the leader's broadcasts for session +
519
576
  // applied changes. Nothing else to do here — the broker is
520
- // wired to forward inbound messages into the engine.
577
+ // wired to forward inbound messages into the engine. The
578
+ // sessionPromise we kicked off above resolves into the void;
579
+ // the leader's broadcast will deliver the authoritative view.
580
+ // Swallow any pending error so it doesn't surface as an
581
+ // unhandled rejection.
582
+ void sessionPromise.then(
583
+ () => {},
584
+ () => {},
585
+ );
521
586
  return;
522
587
  }
523
588
 
@@ -533,15 +598,29 @@ export class SyncEngine {
533
598
  // Seed the server-resolved session before the first pull so
534
599
  // `useSession` subscribers see the right tenant from frame one,
535
600
  // and the resolver's lastSeenTenant is populated before any
536
- // subsequent flip can race with it.
537
- await this.refreshResolvedSession();
601
+ // subsequent flip can race with it. We pre-fired the HTTP fetch
602
+ // above (in parallel with election); apply its result now.
603
+ // Falls through to a normal refresh on network/parse error so
604
+ // we don't get stuck without a session.
605
+ const bootstrapSession = await sessionPromise;
606
+ if (bootstrapSession !== null) {
607
+ await this.applySessionTransition(bootstrapSession, /* broadcast */ true);
608
+ } else {
609
+ await this.refreshResolvedSession();
610
+ }
538
611
 
539
612
  // Pull from server, then connect real-time transport.
540
613
  await this.pull();
541
614
 
542
- // Save cursor after pull.
615
+ // Save cursor after pull. Fire-and-forget on bootstrap — the
616
+ // enqueueApply path already persists per-batch as pull lands rows,
617
+ // so this final save is belt-and-braces. Awaiting it adds 5-30ms
618
+ // of IDB tail latency to the critical path before transport.start
619
+ // runs; the apply path's idempotent op_id-keyed merge handles the
620
+ // worst case (one re-applied batch on next cold pull if the tab
621
+ // crashes between this line and the saveCursor task completing).
543
622
  if (this.persistence) {
544
- await this.persistence.saveCursor(this.cursor);
623
+ void this.persistence.saveCursor(this.cursor);
545
624
  }
546
625
 
547
626
  // First-load reconciliation pass — closes the "phantom row" gap when
@@ -919,6 +998,14 @@ export class SyncEngine {
919
998
  private async resetReplicaInner(): Promise<void> {
920
999
  this.cursor = { last_seq: 0 };
921
1000
  this.store.clearAll();
1001
+ // The cache is now empty. The next pull will start from 0 and
1002
+ // return a full snapshot — that's a true cold start, so the
1003
+ // onConnected fast-path may skip the post-pull reconcile. Without
1004
+ // this flip, a sign-out → sign-in inside the same tab would
1005
+ // forever re-run reconcile after every pull because
1006
+ // `_hadCachedReplica` was set to true at start() time and never
1007
+ // cleared.
1008
+ this._hadCachedReplica = false;
922
1009
  if (this.persistence) {
923
1010
  try {
924
1011
  await this.persistence.clear();
@@ -1046,6 +1133,12 @@ export class SyncEngine {
1046
1133
  void this.refreshResolvedSession();
1047
1134
  }
1048
1135
 
1136
+ // Capture whether this pull started from cursor=0 BEFORE the
1137
+ // snapshot loop mutates the cursor. On successful exhaustion the
1138
+ // WS onConnected hook reads the flag to skip the redundant
1139
+ // bootstrap reconcile (the snapshot path already returned every
1140
+ // policy-visible row, per-entity refetch right after is waste).
1141
+ const startedFromZero = this.cursor.last_seq === 0;
1049
1142
  try {
1050
1143
  // Snapshot pagination: when the cursor is 0 and the server's
1051
1144
  // table is larger than a single batch, the response carries
@@ -1080,6 +1173,11 @@ export class SyncEngine {
1080
1173
  break;
1081
1174
  }
1082
1175
  }
1176
+ // Snapshot+tail loop exhausted without throwing: if we started
1177
+ // from cursor=0 we just hydrated the full replica from server
1178
+ // truth. Record it so onConnected skips the reconcile that would
1179
+ // otherwise re-fetch every entity via cursor pagination.
1180
+ this.lastPullStartedFromZero = startedFromZero;
1083
1181
  } catch (err) {
1084
1182
  // Swallow network + transient errors so the poll/reconnect loop
1085
1183
  // keeps trying — but on 429 bump the backoff counter so the next
@@ -1137,6 +1235,17 @@ export class SyncEngine {
1137
1235
  * that doesn't throw a 410. */
1138
1236
  private consecutive_410s = 0;
1139
1237
 
1238
+ /** Set by pullInner whenever the just-completed pull started with
1239
+ * `cursor.last_seq === 0` (cold load OR post-reset). The WS
1240
+ * onConnected hook reads this to skip the reconcile() that would
1241
+ * otherwise fire immediately after the bootstrap pull — the
1242
+ * snapshot path of pull already returned every row visible under
1243
+ * current policy, so per-entity reconcile fetches right after are
1244
+ * pure waste (~300ms on the critical path). One-shot: the flag is
1245
+ * cleared on read so a subsequent reconnect-after-disconnect still
1246
+ * runs reconcile normally. */
1247
+ private lastPullStartedFromZero = false;
1248
+
1140
1249
  /** Timestamp of the last `reconcile()` invocation. Used to debounce —
1141
1250
  * reconcile runs on connect, WS reconnect, AND visibility-change, so
1142
1251
  * a quick tab-flick after a normal reconnect shouldn't refetch every
@@ -1207,64 +1316,73 @@ export class SyncEngine {
1207
1316
  // the current cursor means future inserts (which have higher seqs)
1208
1317
  // bypass the tombstone — re-creation server-side still propagates.
1209
1318
  const tombstoneSeq = this.cursor.last_seq;
1210
- for (const entity of names) {
1211
- // Capture cursor + resolved session BEFORE the fetch so we can
1212
- // detect drift mid-reconcile. Two distinct races:
1213
- //
1214
- // 1. Cursor moves: a WS event for this (or another) entity
1215
- // landed while the page-paginated fetch was in flight. Our
1216
- // snapshot is stale; applying it would clobber the fresher
1217
- // WS-delivered row.
1218
- //
1219
- // 2. Session flips: the resolved tenant/user changed while
1220
- // the fetch was in flight (e.g., the app called
1221
- // /api/auth/select-org just after we issued the fetch).
1222
- // The server filtered the response under the OLD tenant
1223
- // context, so applying the result would tombstone rows
1224
- // that ARE visible under the NEW tenant. This is the
1225
- // "dashboard flashes data away on first load" bug — the
1226
- // engine starts before the app calls selectOrg, fetches
1227
- // under tenant=null, returns 0 rows, then the apply pass
1228
- // nukes every locally-cached row. Skip the apply when
1229
- // the session signature changed; the next reconcile
1230
- // (triggered by session-changed envelope) will re-fetch
1231
- // under the new context.
1232
- const cursorBeforeFetch = this.cursor.last_seq;
1233
- const sessionBeforeFetch = this.session.signature();
1234
- let serverRows: Row[];
1235
- try {
1236
- serverRows = await this.fetchEntityRows(entity);
1237
- } catch (err) {
1238
- // Network errors are expected (offline, transient 5xx). Skip
1239
- // this entity; the next reconcile trigger will retry.
1240
- const status = (err as { status?: number })?.status;
1241
- if (status === 403 || status === 404) {
1242
- // Entity is no longer readable (policy revoked) or removed
1243
- // from the manifest. Drop every local row for it — keeping
1244
- // them around just leaks invisible state.
1245
- await this.dropEntity(entity, tombstoneSeq);
1319
+ // Fan out the per-entity fetches in parallel. Bootstrap reconcile
1320
+ // used to serialize 5 entities × ~60ms each 300ms of dead time
1321
+ // on the critical path before channels render. The per-entity
1322
+ // drift checks (cursor + session signature) are captured inside
1323
+ // each task's closure, so each entity still bails individually
1324
+ // if its OWN fetch raced a WS event or a session flip — parallel
1325
+ // fan-out doesn't weaken either guard.
1326
+ await Promise.all(
1327
+ names.map(async (entity) => {
1328
+ // Capture cursor + resolved session BEFORE the fetch so we can
1329
+ // detect drift mid-reconcile. Two distinct races:
1330
+ //
1331
+ // 1. Cursor moves: a WS event for this (or another) entity
1332
+ // landed while the page-paginated fetch was in flight. Our
1333
+ // snapshot is stale; applying it would clobber the fresher
1334
+ // WS-delivered row.
1335
+ //
1336
+ // 2. Session flips: the resolved tenant/user changed while
1337
+ // the fetch was in flight (e.g., the app called
1338
+ // /api/auth/select-org just after we issued the fetch).
1339
+ // The server filtered the response under the OLD tenant
1340
+ // context, so applying the result would tombstone rows
1341
+ // that ARE visible under the NEW tenant. This is the
1342
+ // "dashboard flashes data away on first load" bug — the
1343
+ // engine starts before the app calls selectOrg, fetches
1344
+ // under tenant=null, returns 0 rows, then the apply pass
1345
+ // nukes every locally-cached row. Skip the apply when
1346
+ // the session signature changed; the next reconcile
1347
+ // (triggered by session-changed envelope) will re-fetch
1348
+ // under the new context.
1349
+ const cursorBeforeFetch = this.cursor.last_seq;
1350
+ const sessionBeforeFetch = this.session.signature();
1351
+ let serverRows: Row[];
1352
+ try {
1353
+ serverRows = await this.fetchEntityRows(entity);
1354
+ } catch (err) {
1355
+ // Network errors are expected (offline, transient 5xx). Skip
1356
+ // this entity; the next reconcile trigger will retry.
1357
+ const status = (err as { status?: number })?.status;
1358
+ if (status === 403 || status === 404) {
1359
+ // Entity is no longer readable (policy revoked) or removed
1360
+ // from the manifest. Drop every local row for it — keeping
1361
+ // them around just leaks invisible state.
1362
+ await this.dropEntity(entity, tombstoneSeq);
1363
+ }
1364
+ return;
1246
1365
  }
1247
- continue;
1248
- }
1249
- if (this.cursor.last_seq !== cursorBeforeFetch) {
1250
- // Cursor moved during fetch at least one WS event for this
1251
- // (or another) entity landed and might have a fresher value
1252
- // for a row our snapshot just captured. Bail out for this
1253
- // entity; reconcile() is triggered again on visibility-change
1254
- // and reconnect, and the WS event already carried the latest
1255
- // state for the affected row.
1256
- continue;
1257
- }
1258
- if (this.session.signature() !== sessionBeforeFetch) {
1259
- // Session changed (token flipped, tenant switched, user
1260
- // signed out in, etc.). The rows we fetched reflect the
1261
- // OLD session's policy view; applying them now would
1262
- // tombstone rows visible under the NEW session. Bail and let
1263
- // the session-changed envelope drive the next reconcile.
1264
- continue;
1265
- }
1266
- await this.applyEntityReconcile(entity, serverRows, tombstoneSeq);
1267
- }
1366
+ if (this.cursor.last_seq !== cursorBeforeFetch) {
1367
+ // Cursor moved during fetch — at least one WS event for this
1368
+ // (or another) entity landed and might have a fresher value
1369
+ // for a row our snapshot just captured. Bail out for this
1370
+ // entity; reconcile() is triggered again on visibility-change
1371
+ // and reconnect, and the WS event already carried the latest
1372
+ // state for the affected row.
1373
+ return;
1374
+ }
1375
+ if (this.session.signature() !== sessionBeforeFetch) {
1376
+ // Session changed (token flipped, tenant switched, user
1377
+ // signed out → in, etc.). The rows we fetched reflect the
1378
+ // OLD session's policy view; applying them now would
1379
+ // tombstone rows visible under the NEW session. Bail and let
1380
+ // the session-changed envelope drive the next reconcile.
1381
+ return;
1382
+ }
1383
+ await this.applyEntityReconcile(entity, serverRows, tombstoneSeq);
1384
+ }),
1385
+ );
1268
1386
  }
1269
1387
 
1270
1388
  /** Fetch every row for an entity. Uses cursor pagination so big tables
@@ -1375,17 +1493,36 @@ export class SyncEngine {
1375
1493
  // broadcasts the result, which `handleMultiTabMessage` routes
1376
1494
  // into the resolver.
1377
1495
  if (!this.isMultiTabLeader) return;
1378
- let next: ResolvedSession;
1496
+ const next = await this.fetchSessionBootstrap();
1497
+ if (next === null) return;
1498
+ await this.applySessionTransition(next, /* broadcast */ true);
1499
+ }
1500
+
1501
+ /**
1502
+ * Pure HTTP fetch of /api/auth/me → ResolvedSession. Unlike
1503
+ * `refreshResolvedSession`, this does NOT gate on `isMultiTabLeader`
1504
+ * — bootstrap callers in `start()` fire this in PARALLEL with the
1505
+ * multi-tab election to overlap two independent latency windows
1506
+ * (election ~250ms || auth/me ~60ms). At that point no other tabs'
1507
+ * messages have been observed yet, so there's no broadcast-policy
1508
+ * violation; the caller is responsible for discarding the result
1509
+ * if it lost the election.
1510
+ *
1511
+ * Returns null on HTTP error / network failure / parse error — the
1512
+ * caller's next pull cycle (or the WS `session-changed` envelope)
1513
+ * will retry. Errors must not abort bootstrap.
1514
+ */
1515
+ private async fetchSessionBootstrap(): Promise<ResolvedSession | null> {
1379
1516
  try {
1380
1517
  const res = await this.rawFetch("/api/auth/me");
1381
- if (!res.ok) return;
1518
+ if (!res.ok) return null;
1382
1519
  const raw = (await res.json()) as {
1383
1520
  user_id?: string | null;
1384
1521
  tenant_id?: string | null;
1385
1522
  is_admin?: boolean;
1386
1523
  roles?: string[];
1387
1524
  };
1388
- next = {
1525
+ return {
1389
1526
  userId: raw.user_id ?? null,
1390
1527
  tenantId: raw.tenant_id ?? null,
1391
1528
  isAdmin: raw.is_admin ?? false,
@@ -1394,9 +1531,8 @@ export class SyncEngine {
1394
1531
  } catch {
1395
1532
  // Swallow — /api/auth/me errors are transient and the next pull
1396
1533
  // will retry. Don't take down the sync loop for this.
1397
- return;
1534
+ return null;
1398
1535
  }
1399
- await this.applySessionTransition(next, /* broadcast */ true);
1400
1536
  }
1401
1537
 
1402
1538
  /**
@@ -2057,7 +2193,31 @@ export class SyncEngine {
2057
2193
  // opening. Reconcile fires after the pull since pull is the
2058
2194
  // cheap incremental path; reconcile is the server-truth
2059
2195
  // backstop for anything pull couldn't replay.
2060
- void this.pull().then(() => this.reconcile());
2196
+ //
2197
+ // Cold-load fast path: if pull just hydrated a full snapshot
2198
+ // from cursor=0, the snapshot already returned every row
2199
+ // visible under current policy. The reconcile pass that would
2200
+ // normally follow is pure waste — same rows, second time,
2201
+ // ~60ms × N entities. Skip it once; visibility-change and
2202
+ // reconnect-after-disconnect paths invoke reconcile() directly
2203
+ // (not gated by this flag) so the safety net still triggers.
2204
+ void this.pull().then(() => {
2205
+ // Cold-load fast-path: skip reconcile only when this WAS a
2206
+ // true cold start (no IDB cache → the pull-from-0 returned
2207
+ // every visible row, reconcile would refetch the same set).
2208
+ // A returning user whose pull happened to start from 0
2209
+ // (cursor rolled back, partial cache wipe) MUST still run
2210
+ // reconcile to catch rows deleted on the server while the
2211
+ // tab was closed — the snapshot path only returns currently-
2212
+ // visible rows, never tombstones, so ghost rows on the
2213
+ // cached side persist without the reconcile pass.
2214
+ if (this.lastPullStartedFromZero && !this._hadCachedReplica) {
2215
+ this.lastPullStartedFromZero = false;
2216
+ return;
2217
+ }
2218
+ this.lastPullStartedFromZero = false;
2219
+ return this.reconcile();
2220
+ });
2061
2221
  },
2062
2222
  onDisconnected: () => {
2063
2223
  /* Engine has no work on disconnect — the transport's own
@@ -162,6 +162,61 @@ export class IndexedDBPersistence {
162
162
  });
163
163
  }
164
164
 
165
+ /**
166
+ * Atomic warm-load: returns entities + cursor in a single IDB
167
+ * read transaction. Used by `SyncEngine.start()` to hydrate the
168
+ * in-memory replica BEFORE the network pull resolves so React
169
+ * hooks see real data on first render (no empty-then-populated
170
+ * flash on returning visits).
171
+ *
172
+ * `hadCache` is true when at least one row OR a saved cursor
173
+ * was found. The engine uses it to distinguish "true cold start
174
+ * — pull-from-0 IS a full snapshot, skip the post-snapshot
175
+ * reconcile" from "returning user with cached state — pull-from-
176
+ * cursor may miss server-side deletes that happened offline, the
177
+ * onConnected reconcile MUST run". Without that distinction, a
178
+ * returning user whose cursor somehow rolled back to 0 (rare:
179
+ * IDB partial corruption, cleared-by-mistake) would end up with
180
+ * ghost rows that survive forever.
181
+ *
182
+ * Single readonly tx is intentional — two separate reads could
183
+ * race a mid-load saveCursor/saveRow from another tab's apply
184
+ * pipeline and read an inconsistent (cursor C', rows for cursor C)
185
+ * pair. One tx guarantees a consistent snapshot.
186
+ */
187
+ async loadSnapshot(): Promise<{
188
+ entities: Record<string, Row[]>;
189
+ cursor: SyncCursor | null;
190
+ hadCache: boolean;
191
+ }> {
192
+ if (!this.db) return { entities: {}, cursor: null, hadCache: false };
193
+ const tx = this.db.transaction([STORE_NAME, CURSOR_STORE], "readonly");
194
+ const entitiesReq = tx.objectStore(STORE_NAME).getAll();
195
+ const cursorReq = tx.objectStore(CURSOR_STORE).get("cursor");
196
+ return new Promise((resolve) => {
197
+ tx.oncomplete = () => {
198
+ const entities: Record<string, Row[]> = {};
199
+ for (const item of (entitiesReq.result ?? []) as {
200
+ entity: string;
201
+ id: string;
202
+ data: Row;
203
+ }[]) {
204
+ if (!entities[item.entity]) entities[item.entity] = [];
205
+ entities[item.entity].push({ id: item.id, ...item.data });
206
+ }
207
+ const cursorRec = cursorReq.result as { last_seq?: number } | undefined;
208
+ const cursor: SyncCursor | null = cursorRec
209
+ ? { last_seq: cursorRec.last_seq ?? 0 }
210
+ : null;
211
+ const hadCache =
212
+ Object.keys(entities).length > 0 || cursor !== null;
213
+ resolve({ entities, cursor, hadCache });
214
+ };
215
+ tx.onerror = () => resolve({ entities: {}, cursor: null, hadCache: false });
216
+ tx.onabort = () => resolve({ entities: {}, cursor: null, hadCache: false });
217
+ });
218
+ }
219
+
165
220
  /** Save the sync cursor. */
166
221
  async saveCursor(cursor: SyncCursor): Promise<void> {
167
222
  if (!this.db) return;