@fairfox/polly 0.64.0 → 0.66.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -164,6 +164,56 @@ export interface CreateMeshClientOptions {
164
164
  * production call site is inside {@link createMeshClient}.
165
165
  */
166
166
  export declare function resolveIceServers(rtc: CreateMeshClientOptions["rtc"]): Promise<RTCIceServer[] | undefined>;
167
+ /** Per-(docId, peerId) view into Automerge's `CollectionSynchronizer`.
168
+ * Built once per snapshot read and consulted from {@link buildHandleEntry}.
169
+ * Structured this way so the docSynchronizer lookup is paid once per
170
+ * docId, not once per (docId × peer). */
171
+ interface PerPeerDocSyncView {
172
+ /** `true` iff Automerge's `CollectionSynchronizer.docSynchronizers`
173
+ * has an entry for this documentId. A handle that exists in
174
+ * `repo.handles` but has no corresponding `docSynchronizer` is the
175
+ * `addDocument`-was-never-called fingerprint — Automerge's
176
+ * NetworkSubsystem can never invoke `send` for it because no
177
+ * synchronizer is wired up. */
178
+ docSynchronizerExists: boolean;
179
+ /** `true` iff the docSynchronizer for this documentId has registered
180
+ * this peer in its internal peer list (`#peers`). `false` with
181
+ * `docSynchronizerExists: true` is the symmetric polly#107 gap: the
182
+ * synchronizer exists (`addDocument` ran) but `addPeer` was never
183
+ * called for this peer on this doc — typically because the handle
184
+ * was created AFTER `peer-candidate` fired and Automerge's
185
+ * `addDocument`-iterates-known-peers path missed it. `undefined`
186
+ * when no docSynchronizer exists. */
187
+ docSynchronizerKnowsPeer: boolean | undefined;
188
+ /** Automerge's `DocSynchronizer.peerStates[peerId]` — one of
189
+ * `"unknown"`, `"has"`, `"unavailable"`, `"wants"`. `"unknown"`
190
+ * with `lastSyncMessageOutAt` set means the local side sent its
191
+ * opening handshake but never advanced past it; this is the
192
+ * wedged-pair fingerprint #112 names. `undefined` when no
193
+ * docSynchronizer exists OR when the synchronizer exists but
194
+ * doesn't track this peer. */
195
+ peerDocumentStatus: string | undefined;
196
+ }
197
+ /** Minimal structural shape we lift off Automerge's hidden
198
+ * `Repo.synchronizer` / `CollectionSynchronizer` types. Kept narrow
199
+ * so a future Automerge bump that renames a private rip doesn't
200
+ * topple the snapshot path. */
201
+ interface SynchronizerStructural {
202
+ docSynchronizers?: Record<string, {
203
+ hasPeer?: (peerId: string) => boolean;
204
+ peerStates?: Record<string, string>;
205
+ }>;
206
+ }
207
+ /** Internal — exported for direct unit-test coverage of the #112
208
+ * structural reader and snapshot wiring. Not part of the public API. */
209
+ export declare const __test__: {
210
+ buildSyncView: (synchronizer: SynchronizerStructural | undefined, docId: string, peerId: string) => PerPeerDocSyncView;
211
+ EMPTY_SYNC_VIEW: PerPeerDocSyncView;
212
+ getCollectionSynchronizer: (repo: Repo) => SynchronizerStructural | undefined;
213
+ enrichPeerSlot: (peer: ReturnType<MeshWebRTCAdapter["getPeerStateSnapshot"]>["peers"][number], knownHandleIds: string[], repoHandles: Record<string, {
214
+ state: unknown;
215
+ } | undefined>, synchronizer: SynchronizerStructural | undefined) => MeshClientPeerStateSnapshot["peers"][number];
216
+ };
167
217
  /** Polly#107 post-v0.60 instrumentation. Walks the lazy-wrapper log
168
218
  * and groups records by `docId`; emits one entry per `docId` that
169
219
  * appears in more than one record. Surfaces the "17 wrappers / 16
@@ -221,6 +271,31 @@ export interface MeshClientHandleSnapshot {
221
271
  * `"sync"` after handshake, `"request"` while the local side is
222
272
  * asking. */
223
273
  lastSyncMessageOutType: string | undefined;
274
+ /** #112 diagnostic. `true` iff Automerge's `CollectionSynchronizer`
275
+ * has a `docSynchronizer` entry for this document. A handle in
276
+ * `repo.handles` with no docSynchronizer is the
277
+ * `addDocument`-never-ran fingerprint: Automerge's NetworkSubsystem
278
+ * can never call `send` for it because no synchronizer is wired up. */
279
+ docSynchronizerExists: boolean;
280
+ /** #112 diagnostic. `true` iff the docSynchronizer for this document
281
+ * has this peer in its internal peer list. `false` with
282
+ * `docSynchronizerExists: true` is the symmetric polly#107 gap —
283
+ * the synchronizer exists but `addPeer` was never invoked on it for
284
+ * this peer, typically because the handle was created AFTER
285
+ * `peer-candidate` fired and Automerge's
286
+ * `addDocument`-iterates-peers path missed it. `undefined` when no
287
+ * docSynchronizer exists. */
288
+ docSynchronizerKnowsPeer: boolean | undefined;
289
+ /** #112 diagnostic. Automerge's
290
+ * `DocSynchronizer.peerStates[peerId]` — one of `"unknown"`,
291
+ * `"has"`, `"unavailable"`, `"wants"`. `"unknown"` together with a
292
+ * set `lastSyncMessageOutAt` is the wedged-pair fingerprint this
293
+ * ticket names: the opening handshake left the wire but the
294
+ * synchronizer never learned whether the remote has the doc, so
295
+ * `generateSyncMessage` quiesces. `undefined` when no
296
+ * docSynchronizer exists OR when the synchronizer does not track
297
+ * this peer. */
298
+ peerDocumentStatus: string | undefined;
224
299
  }
225
300
  /** Polly#107 H5 diagnostics: surfaces the `mesh-state` module
226
301
  * instance identity so a single snapshot read tells the operator
@@ -414,3 +489,4 @@ export interface MeshClient {
414
489
  * peer connections negotiate asynchronously in the background.
415
490
  */
416
491
  export declare function createMeshClient(options: CreateMeshClientOptions): Promise<MeshClient>;
492
+ export {};
@@ -51,6 +51,30 @@ import type { MeshSignalingClient } from "./mesh-signaling-client";
51
51
  * need TURN fallback for peers behind symmetric NATs should replace this
52
52
  * with their own ICE server list. */
53
53
  export declare const DEFAULT_ICE_SERVERS: RTCIceServer[];
54
+ /** Polly issue #109: how long a slot can sit at `connectionState` in
55
+ * `new` or `connecting` before the watchdog tears it down so the
56
+ * recovery sweep can re-attempt. Healthy ICE completes in single-digit
57
+ * seconds; 30s is well past anything legitimate and short enough that
58
+ * a silent `createOffer`/`setLocalDescription` rejection no longer
59
+ * leaves an unrecoverable wedged slot. */
60
+ export declare const SLOT_NEVER_CONNECTED_TIMEOUT_MS = 30000;
61
+ /** Polly issue #110: how long a slot whose data channel is open and
62
+ * whose `connectionState` is `connected` can have no inbound bytes
63
+ * before the watchdog assumes the remote process is dead and tears
64
+ * the slot down. The ICE keepalive timer can take minutes to notice a
65
+ * remote SIGKILL or network partition; an application-layer
66
+ * liveness check that runs an order of magnitude faster keeps
67
+ * paired devices from sending sync traffic into the void after the
68
+ * remote daemon restarts. 120s is conservative — Automerge's idle
69
+ * cadence is well below this, so a healthy slot never crosses the
70
+ * threshold. */
71
+ export declare const SLOT_IDLE_TIMEOUT_MS = 120000;
72
+ /** Polly issue #109/#110: how often the watchdog evaluates teardown
73
+ * decisions across every slot. 5s is well below either threshold so
74
+ * teardown happens promptly after a deadline lapses without
75
+ * dominating runtime cost (one `connectionState` read per slot per
76
+ * tick). */
77
+ export declare const SLOT_WATCHDOG_INTERVAL_MS = 5000;
54
78
  /** Options for constructing a {@link MeshWebRTCAdapter}. */
55
79
  export interface MeshWebRTCAdapterOptions {
56
80
  /** The signalling client the adapter uses to exchange SDP and ICE
@@ -154,6 +178,22 @@ export interface MeshWebRTCAdapterOptions {
154
178
  * `POLLY_104_DISABLE_FIX=1` falsification path. Production callers
155
179
  * should leave this at the default. */
156
180
  syncFragmentChunkSizeOverride?: number;
181
+ /** How long a slot can sit at `connectionState` in `new` or
182
+ * `connecting` before the watchdog tears it down. Defaults to
183
+ * {@link SLOT_NEVER_CONNECTED_TIMEOUT_MS}. Set to 0 to disable the
184
+ * gate. Polly issue #109. */
185
+ slotNeverConnectedTimeoutMs?: number;
186
+ /** How long a connected slot can have no inbound bytes before the
187
+ * watchdog tears it down as idle. Defaults to
188
+ * {@link SLOT_IDLE_TIMEOUT_MS}. Set to 0 to disable the gate.
189
+ * Polly issue #110. */
190
+ slotIdleTimeoutMs?: number;
191
+ /** How often the slot watchdog evaluates teardown decisions.
192
+ * Defaults to {@link SLOT_WATCHDOG_INTERVAL_MS}; tests override to
193
+ * tens of milliseconds. Set to 0 to disable the watchdog entirely
194
+ * (the pre-#109/#110 behaviour, kept only for migration and the
195
+ * falsification path). */
196
+ slotWatchdogIntervalMs?: number;
157
197
  }
158
198
  /** Payload of the polly-specific `"sync-progress"` event emitted by
159
199
  * {@link MeshWebRTCAdapter}. Consumers can subscribe via the adapter's
@@ -239,7 +279,10 @@ export interface TransportSnapshot {
239
279
  * negotiation state.
240
280
  * - `fatal-error`: an exception was thrown while attempting to build
241
281
  * the slot. The accompanying {@link SlotInitiationDecision.error}
242
- * string carries the message.
282
+ * string carries the message. This is also stamped when the slot
283
+ * watchdog tears a wedged slot down (polly#109's silent throw, or
284
+ * polly#110's idle-but-still-`connected` post-mortem), so the next
285
+ * sweep tick finds no slot and retries.
243
286
  */
244
287
  export type SlotInitiationRejectionReason = "self" | "not-in-keyring" | "not-present" | "tie-break-other-side" | "slot-already-exists" | "fatal-error";
245
288
  /** Most-recent slot-initiation decision for a peer. Computed at
@@ -478,6 +521,14 @@ export declare class MeshWebRTCAdapter extends NetworkAdapter {
478
521
  * {@link sweepRunCount} so a stalled sweep is visible at a glance
479
522
  * via the snapshot's `sweep` block. */
480
523
  private lastSweepAt;
524
+ /** Watchdog interval (polly#109 + polly#110). Tears down slots that
525
+ * never reach `connected`, or that look connected but have not
526
+ * received bytes for {@link slotIdleTimeoutMs}, so the recovery
527
+ * sweep can re-attempt. Cleared in {@link disconnect}. */
528
+ private slotWatchdogTimer;
529
+ private readonly slotNeverConnectedTimeoutMs;
530
+ private readonly slotIdleTimeoutMs;
531
+ private readonly slotWatchdogIntervalMs;
481
532
  /** The peers this adapter will dial. Backward-compatible read accessor
482
533
  * for callers that previously iterated the `knownPeerIds` array. With
483
534
  * a {@link MeshWebRTCAdapterOptions.keyringSource} configured, the
@@ -543,6 +594,8 @@ export declare class MeshWebRTCAdapter extends NetworkAdapter {
543
594
  transport: TransportSnapshot | undefined;
544
595
  lastSyncHandshakeAttempt: SyncHandshakeAttemptSnapshot;
545
596
  handles: Record<string, HandleSyncSnapshot>;
597
+ createdAt: number;
598
+ lastInboundAt: number | undefined;
546
599
  };
547
600
  }>;
548
601
  };
@@ -664,6 +717,41 @@ export declare class MeshWebRTCAdapter extends NetworkAdapter {
664
717
  * #106 item 7. */
665
718
  private startKnownPeersSweep;
666
719
  private stopKnownPeersSweep;
720
+ /** Close a slot's data channel and connection, remove it from the
721
+ * map, and emit `peer-disconnected` upward so Automerge stops
722
+ * routing through the dead pair. Used by both the polly#109
723
+ * `.catch` in {@link createInitiatingSlot} and the polly#109/#110
724
+ * watchdog in {@link sweepWedgedSlots}. */
725
+ private tearDownWedgedSlot;
726
+ /** Start the slot watchdog (polly#109 + polly#110). Walks every
727
+ * active slot every {@link slotWatchdogIntervalMs} and tears down
728
+ * the two named wedge shapes:
729
+ *
730
+ * - `connectionState` in `new`/`connecting` for longer than
731
+ * {@link slotNeverConnectedTimeoutMs} (polly#109: silent
732
+ * `createOffer`/`setLocalDescription` rejection, or a network
733
+ * condition under which ICE never gathers).
734
+ *
735
+ * - `connectionState === "connected"` AND data channel `open` AND
736
+ * no inbound bytes for {@link slotIdleTimeoutMs} (polly#110:
737
+ * remote process killed without OS-layer FIN — ICE keepalives
738
+ * take many minutes to fail, the slot sends sync traffic into
739
+ * the void until then).
740
+ *
741
+ * Each teardown stamps `fatal-error` on the per-peer decision so
742
+ * the named gate is visible on the next snapshot, then emits
743
+ * `peer-disconnected` so Automerge stops routing through the dead
744
+ * slot. The next sweep tick re-evaluates and the recovery path
745
+ * creates a fresh slot. No-op when configured to 0. */
746
+ private startSlotWatchdog;
747
+ private stopSlotWatchdog;
748
+ private sweepWedgedSlots;
749
+ /** Decide whether a slot is wedged at the moment of inspection,
750
+ * returning the human-readable diagnosis when it is. Pulled out of
751
+ * {@link sweepWedgedSlots} so the per-slot decision stays under
752
+ * biome's cognitive-complexity ceiling and so a future test can
753
+ * exercise the gates directly. */
754
+ private classifyWedgedSlot;
667
755
  /**
668
756
  * Send a sync message to a specific remote peer. If no RTCPeerConnection
669
757
  * exists yet, the adapter initiates one by producing an SDP offer and
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fairfox/polly",
3
- "version": "0.64.0",
3
+ "version": "0.66.0",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "description": "Multi-execution-context framework with reactive state and cross-context messaging for Chrome extensions, PWAs, and worker-based applications",