@stigmer/react 3.0.8-dev.20260612100207 → 3.0.8-dev.20260613041848
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/execution/useExecutionStream.d.ts +49 -5
- package/execution/useExecutionStream.d.ts.map +1 -1
- package/execution/useExecutionStream.js +118 -18
- package/execution/useExecutionStream.js.map +1 -1
- package/internal/backoff.d.ts +61 -0
- package/internal/backoff.d.ts.map +1 -0
- package/internal/backoff.js +79 -0
- package/internal/backoff.js.map +1 -0
- package/internal/store/conversation-store.d.ts +12 -0
- package/internal/store/conversation-store.d.ts.map +1 -1
- package/internal/store/conversation-store.js +7 -0
- package/internal/store/conversation-store.js.map +1 -1
- package/internal/store/workflow-execution-event-store.d.ts +12 -0
- package/internal/store/workflow-execution-event-store.d.ts.map +1 -1
- package/internal/store/workflow-execution-event-store.js +7 -0
- package/internal/store/workflow-execution-event-store.js.map +1 -1
- package/internal/stream-controller.d.ts +11 -19
- package/internal/stream-controller.d.ts.map +1 -1
- package/internal/stream-controller.js +24 -1
- package/internal/stream-controller.js.map +1 -1
- package/package.json +4 -4
- package/session/SessionViewer.js +4 -1
- package/session/SessionViewer.js.map +1 -1
- package/session/useSessionConversation.d.ts +7 -1
- package/session/useSessionConversation.d.ts.map +1 -1
- package/session/useSessionConversation.js +1 -0
- package/session/useSessionConversation.js.map +1 -1
- package/src/execution/__tests__/useExecutionStream.test.tsx +184 -0
- package/src/execution/useExecutionStream.ts +174 -30
- package/src/internal/__tests__/backoff.test.ts +99 -0
- package/src/internal/backoff.ts +100 -0
- package/src/internal/store/conversation-store.ts +22 -0
- package/src/internal/store/workflow-execution-event-store.ts +22 -0
- package/src/internal/stream-controller.ts +30 -25
- package/src/session/SessionViewer.tsx +27 -0
- package/src/session/useSessionConversation.ts +8 -1
- package/src/workflow/WorkflowExecutionHeader.tsx +4 -1
- package/src/workflow/WorkflowExecutionTimeline.tsx +2 -1
- package/src/workflow/__tests__/useWorkflowExecutionEventStream.test.tsx +117 -1
- package/src/workflow/execution/useWaterfallEntries.ts +2 -1
- package/src/workflow/useWorkflowExecutionEventStream.ts +122 -41
- package/src/workflow/waterfall/WaterfallTimeline.tsx +2 -1
- package/styles.css +1 -1
- package/workflow/WorkflowExecutionHeader.d.ts.map +1 -1
- package/workflow/WorkflowExecutionHeader.js +3 -1
- package/workflow/WorkflowExecutionHeader.js.map +1 -1
- package/workflow/WorkflowExecutionTimeline.d.ts.map +1 -1
- package/workflow/WorkflowExecutionTimeline.js +1 -1
- package/workflow/WorkflowExecutionTimeline.js.map +1 -1
- package/workflow/execution/useWaterfallEntries.d.ts.map +1 -1
- package/workflow/execution/useWaterfallEntries.js +1 -1
- package/workflow/execution/useWaterfallEntries.js.map +1 -1
- package/workflow/useWorkflowExecutionEventStream.d.ts +32 -4
- package/workflow/useWorkflowExecutionEventStream.d.ts.map +1 -1
- package/workflow/useWorkflowExecutionEventStream.js +75 -32
- package/workflow/useWorkflowExecutionEventStream.js.map +1 -1
- package/workflow/waterfall/WaterfallTimeline.d.ts.map +1 -1
- package/workflow/waterfall/WaterfallTimeline.js +1 -1
- package/workflow/waterfall/WaterfallTimeline.js.map +1 -1
|
@@ -11,6 +11,7 @@ import {
|
|
|
11
11
|
} from "react";
|
|
12
12
|
import type { AgentExecution } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/api_pb";
|
|
13
13
|
import { ExecutionPhase } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/enum_pb";
|
|
14
|
+
import { isTransientStreamError } from "@stigmer/sdk";
|
|
14
15
|
import { useStigmer } from "../hooks";
|
|
15
16
|
import { toError } from "../internal/toError";
|
|
16
17
|
import { useStreamRate } from "../internal/dev";
|
|
@@ -18,7 +19,13 @@ import {
|
|
|
18
19
|
StreamController,
|
|
19
20
|
type StreamControllerSink,
|
|
20
21
|
} from "../internal/stream-controller";
|
|
21
|
-
import {
|
|
22
|
+
import {
|
|
23
|
+
computeBackoffDelay,
|
|
24
|
+
sleep,
|
|
25
|
+
DEFAULT_RECONNECT_MAX_ATTEMPTS,
|
|
26
|
+
type BackoffOptions,
|
|
27
|
+
} from "../internal/backoff";
|
|
28
|
+
import { ConversationStore } from "../internal/store";
|
|
22
29
|
import { isTerminalPhase } from "./execution-phases";
|
|
23
30
|
|
|
24
31
|
/** Return value of {@link useExecutionStream}. */
|
|
@@ -37,14 +44,31 @@ export interface UseExecutionStreamReturn {
|
|
|
37
44
|
readonly isStreaming: boolean;
|
|
38
45
|
/** `true` after subscription starts but before the first snapshot arrives. */
|
|
39
46
|
readonly isConnecting: boolean;
|
|
40
|
-
/**
|
|
47
|
+
/**
|
|
48
|
+
* `true` while a transient drop is being retried automatically in the
|
|
49
|
+
* background. The last snapshot stays visible and `error` remains `null` —
|
|
50
|
+
* surface a subtle "Reconnecting…" affordance, not an error. Becomes
|
|
51
|
+
* `false` once a snapshot is received (back to `isStreaming`) or retries
|
|
52
|
+
* are exhausted (then `error` is set).
|
|
53
|
+
*/
|
|
54
|
+
readonly isReconnecting: boolean;
|
|
55
|
+
/** 1-based count of the in-flight reconnect attempt; `0` when not reconnecting. */
|
|
56
|
+
readonly reconnectAttempt: number;
|
|
57
|
+
/**
|
|
58
|
+
* Error from the last failed stream attempt, or `null` when healthy.
|
|
59
|
+
*
|
|
60
|
+
* Only set once auto-reconnect has exhausted its attempts (or for a
|
|
61
|
+
* non-transient failure that is not retried). It stays `null` throughout
|
|
62
|
+
* background reconnection so a recoverable hiccup never shows as an error.
|
|
63
|
+
*/
|
|
41
64
|
readonly error: Error | null;
|
|
42
65
|
/**
|
|
43
66
|
* Reset error state and re-establish the stream subscription.
|
|
44
67
|
*
|
|
45
|
-
*
|
|
46
|
-
*
|
|
47
|
-
*
|
|
68
|
+
* The fallback after auto-reconnect exhausts, and a manual escape hatch in
|
|
69
|
+
* any lifecycle state — error, complete, or mid-stream. Resets the retry
|
|
70
|
+
* counter and preserves the last snapshot (no flash to empty). Uses the
|
|
71
|
+
* `connectKey` counter pattern consistent with `refetch()` in other SDK hooks.
|
|
48
72
|
*/
|
|
49
73
|
readonly reconnect: () => void;
|
|
50
74
|
}
|
|
@@ -65,6 +89,21 @@ export interface UseExecutionStreamOptions {
|
|
|
65
89
|
* preserving backward compatibility for standalone usage.
|
|
66
90
|
*/
|
|
67
91
|
readonly store?: ConversationStore;
|
|
92
|
+
/**
|
|
93
|
+
* Automatically re-establish the subscription with exponential backoff
|
|
94
|
+
* when a non-terminal stream drops (transport error, idle timeout, laptop
|
|
95
|
+
* sleep). Defaults to `true`. Set `false` to opt out and surface every
|
|
96
|
+
* drop as an immediate `error` for manual `reconnect()`.
|
|
97
|
+
*/
|
|
98
|
+
readonly autoReconnect?: boolean;
|
|
99
|
+
/**
|
|
100
|
+
* Tune the auto-reconnect backoff schedule and attempt cap. Omitted fields
|
|
101
|
+
* fall back to SDK defaults (base 1s, ×2, max 30s, 10 attempts).
|
|
102
|
+
*/
|
|
103
|
+
readonly reconnectOptions?: BackoffOptions & {
|
|
104
|
+
/** Max attempts before surfacing a terminal `error`. */
|
|
105
|
+
readonly maxAttempts?: number;
|
|
106
|
+
};
|
|
68
107
|
}
|
|
69
108
|
|
|
70
109
|
/**
|
|
@@ -73,7 +112,18 @@ export interface UseExecutionStreamOptions {
|
|
|
73
112
|
*
|
|
74
113
|
* Manages the full subscription lifecycle through a finite state
|
|
75
114
|
* machine: connection establishment, rAF-coalesced snapshot streaming,
|
|
76
|
-
* terminal-phase detection,
|
|
115
|
+
* terminal-phase detection, automatic reconnection with exponential
|
|
116
|
+
* backoff on transient drops, and manual reconnection as the fallback.
|
|
117
|
+
*
|
|
118
|
+
* **Resilience:** a non-terminal stream drop — whether a thrown transport
|
|
119
|
+
* error (WebKit "Load failed", `fetch failed`, `Unavailable`) or a graceful
|
|
120
|
+
* server close mid-run (idle timeout, load-balancer recycle) — is retried
|
|
121
|
+
* automatically with backoff. The last snapshot stays visible
|
|
122
|
+
* (`isReconnecting`), the access token is re-read on each attempt via the
|
|
123
|
+
* per-request interceptor, and `error` is surfaced only once attempts are
|
|
124
|
+
* exhausted. Completion is decided by the terminal phase, never by the
|
|
125
|
+
* stream merely ending (a graceful close of a running execution reconnects
|
|
126
|
+
* rather than falsely reporting "complete"). Opt out via `autoReconnect: false`.
|
|
77
127
|
*
|
|
78
128
|
* **Performance characteristics:**
|
|
79
129
|
* - Non-terminal snapshots are coalesced via `requestAnimationFrame`
|
|
@@ -152,6 +202,17 @@ export function useExecutionStream(
|
|
|
152
202
|
const streamRateRef = useRef(streamRate);
|
|
153
203
|
streamRateRef.current = streamRate;
|
|
154
204
|
|
|
205
|
+
// -- Reconnect config (ref-backed so option identity churn never resubscribes)
|
|
206
|
+
const autoReconnect = options?.autoReconnect ?? true;
|
|
207
|
+
const reconnectOptions = options?.reconnectOptions;
|
|
208
|
+
const configRef = useRef({ autoReconnect, reconnectOptions });
|
|
209
|
+
configRef.current = { autoReconnect, reconnectOptions };
|
|
210
|
+
|
|
211
|
+
// Tracks the execution the store currently holds, so we reset the store on
|
|
212
|
+
// a genuine identity change (A → B) but preserve it across reconnects of the
|
|
213
|
+
// SAME execution. Mirrors useWorkflowExecutionEventStream / useFetch.
|
|
214
|
+
const prevExecutionIdRef = useRef<string | null>(null);
|
|
215
|
+
|
|
155
216
|
// -- Subscription effect --------------------------------------------------
|
|
156
217
|
// Note: controller, store, and streamRate are ref-backed stable objects —
|
|
157
218
|
// they MUST NOT appear in the deps array. Including them would cause
|
|
@@ -160,45 +221,117 @@ export function useExecutionStream(
|
|
|
160
221
|
if (!executionId) {
|
|
161
222
|
controller.reset();
|
|
162
223
|
store.reset();
|
|
224
|
+
prevExecutionIdRef.current = null;
|
|
163
225
|
return;
|
|
164
226
|
}
|
|
165
227
|
|
|
228
|
+
// Reset only when switching to a different execution. Crucially we do NOT
|
|
229
|
+
// reset the store on reconnect (connectKey bump) or on cleanup — that
|
|
230
|
+
// would wipe the conversation to an empty "Connecting…" on every retry.
|
|
231
|
+
// The full-snapshot subscribe re-delivers the entire state on reconnect,
|
|
232
|
+
// so keeping the last-known-good snapshot is both correct and seamless.
|
|
233
|
+
if (
|
|
234
|
+
prevExecutionIdRef.current !== null &&
|
|
235
|
+
prevExecutionIdRef.current !== executionId
|
|
236
|
+
) {
|
|
237
|
+
store.reset();
|
|
238
|
+
}
|
|
239
|
+
prevExecutionIdRef.current = executionId;
|
|
240
|
+
|
|
166
241
|
const abortController = new AbortController();
|
|
242
|
+
const signal = abortController.signal;
|
|
167
243
|
controller.start(executionId);
|
|
168
244
|
|
|
169
245
|
(async () => {
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
246
|
+
const { autoReconnect: auto, reconnectOptions: backoff } =
|
|
247
|
+
configRef.current;
|
|
248
|
+
const maxAttempts = backoff?.maxAttempts ?? DEFAULT_RECONNECT_MAX_ATTEMPTS;
|
|
249
|
+
|
|
250
|
+
// 1-based count of consecutive failed attempts. Reset to 0 by any
|
|
251
|
+
// successful snapshot, so each healthy stretch gets a fresh backoff
|
|
252
|
+
// budget rather than inheriting the previous outage's attempt count.
|
|
253
|
+
let attempt = 0;
|
|
254
|
+
|
|
255
|
+
// Schedule the next retry after `error`, or stop. Returns `true` when
|
|
256
|
+
// the loop should continue (a retry was scheduled), `false` when it
|
|
257
|
+
// should exit (opted out, exhausted, or aborted). Shared by the
|
|
258
|
+
// thrown-error and premature-end paths so both converge on one policy.
|
|
259
|
+
const scheduleRetry = async (error: Error): Promise<boolean> => {
|
|
260
|
+
if (!auto || attempt >= maxAttempts) {
|
|
261
|
+
controller.handleError(error);
|
|
262
|
+
return false;
|
|
186
263
|
}
|
|
264
|
+
attempt += 1;
|
|
265
|
+
controller.handleReconnecting(attempt, error);
|
|
266
|
+
try {
|
|
267
|
+
await sleep(computeBackoffDelay(attempt, backoff), signal);
|
|
268
|
+
} catch {
|
|
269
|
+
return false; // aborted mid-backoff
|
|
270
|
+
}
|
|
271
|
+
return !signal.aborted;
|
|
272
|
+
};
|
|
273
|
+
|
|
274
|
+
while (!signal.aborted) {
|
|
275
|
+
let sawTerminal = false;
|
|
276
|
+
try {
|
|
277
|
+
for await (const snapshot of stigmer.agentExecution.subscribe(
|
|
278
|
+
executionId,
|
|
279
|
+
signal,
|
|
280
|
+
)) {
|
|
281
|
+
if (signal.aborted) return;
|
|
282
|
+
|
|
283
|
+
attempt = 0; // a snapshot proves the connection is healthy
|
|
284
|
+
controller.handleSnapshot(snapshot);
|
|
285
|
+
streamRateRef.current.tick(snapshot.status?.messages?.length ?? 0);
|
|
187
286
|
|
|
188
|
-
|
|
287
|
+
const phase =
|
|
288
|
+
snapshot.status?.phase ??
|
|
289
|
+
ExecutionPhase.EXECUTION_PHASE_UNSPECIFIED;
|
|
290
|
+
if (isTerminalPhase(phase)) {
|
|
291
|
+
sawTerminal = true;
|
|
292
|
+
break;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
} catch (err) {
|
|
296
|
+
if (signal.aborted) return;
|
|
297
|
+
const error = toError(err);
|
|
298
|
+
// Only known-transient transport noise is retried. A non-transient
|
|
299
|
+
// error (not-found, invalid-argument, …) is deterministic — the
|
|
300
|
+
// same request would fail identically, so surface it immediately.
|
|
301
|
+
if (!auto || !isTransientStreamError(error)) {
|
|
302
|
+
controller.handleError(error);
|
|
303
|
+
return;
|
|
304
|
+
}
|
|
305
|
+
if (await scheduleRetry(error)) continue;
|
|
306
|
+
return;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
if (signal.aborted) return;
|
|
310
|
+
|
|
311
|
+
if (sawTerminal) {
|
|
312
|
+
// handleSnapshot already transitioned to `complete`; flush any
|
|
313
|
+
// buffered frame and finish. Completion is decided by the terminal
|
|
314
|
+
// phase, never by the stream merely ending.
|
|
189
315
|
controller.handleStreamEnd();
|
|
190
316
|
streamRateRef.current.summary();
|
|
317
|
+
return;
|
|
191
318
|
}
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
319
|
+
|
|
320
|
+
// The iterator finished without a terminal phase: the server closed a
|
|
321
|
+
// still-running stream (idle timeout, load-balancer recycle, pod
|
|
322
|
+
// restart). This is transient by definition — reconnect and the next
|
|
323
|
+
// full snapshot reconciles whatever changed (including, if it ended
|
|
324
|
+
// meanwhile, the terminal state we missed).
|
|
325
|
+
if (await scheduleRetry(new Error("The connection was interrupted."))) {
|
|
326
|
+
continue;
|
|
327
|
+
}
|
|
328
|
+
return;
|
|
195
329
|
}
|
|
196
330
|
})();
|
|
197
331
|
|
|
198
332
|
return () => {
|
|
199
333
|
abortController.abort();
|
|
200
334
|
controller.reset();
|
|
201
|
-
store.reset();
|
|
202
335
|
};
|
|
203
336
|
}, [executionId, stigmer, connectKey]);
|
|
204
337
|
|
|
@@ -218,8 +351,19 @@ export function useExecutionStream(
|
|
|
218
351
|
|
|
219
352
|
const isStreaming = streamState.stage === "streaming";
|
|
220
353
|
const isConnecting = streamState.stage === "connecting";
|
|
221
|
-
const
|
|
222
|
-
|
|
354
|
+
const isReconnecting = streamState.stage === "reconnecting";
|
|
355
|
+
const reconnectAttempt =
|
|
356
|
+
streamState.stage === "reconnecting" ? streamState.attempt : 0;
|
|
357
|
+
const error = streamState.stage === "error" ? streamState.error : null;
|
|
223
358
|
|
|
224
|
-
return {
|
|
359
|
+
return {
|
|
360
|
+
execution,
|
|
361
|
+
phase,
|
|
362
|
+
isStreaming,
|
|
363
|
+
isConnecting,
|
|
364
|
+
isReconnecting,
|
|
365
|
+
reconnectAttempt,
|
|
366
|
+
error,
|
|
367
|
+
reconnect,
|
|
368
|
+
};
|
|
225
369
|
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
computeBackoffDelay,
|
|
4
|
+
sleep,
|
|
5
|
+
AbortError,
|
|
6
|
+
DEFAULT_RECONNECT_BASE_DELAY_MS,
|
|
7
|
+
DEFAULT_RECONNECT_MAX_DELAY_MS,
|
|
8
|
+
} from "../backoff";
|
|
9
|
+
|
|
10
|
+
describe("computeBackoffDelay", () => {
|
|
11
|
+
// random=()=>1 collapses full jitter to its upper bound, exposing the raw
|
|
12
|
+
// exponential schedule for exact assertions.
|
|
13
|
+
const noJitter = () => 1;
|
|
14
|
+
|
|
15
|
+
it("grows exponentially from the base delay", () => {
|
|
16
|
+
expect(computeBackoffDelay(1, undefined, noJitter)).toBe(
|
|
17
|
+
DEFAULT_RECONNECT_BASE_DELAY_MS,
|
|
18
|
+
);
|
|
19
|
+
expect(computeBackoffDelay(2, undefined, noJitter)).toBe(2_000);
|
|
20
|
+
expect(computeBackoffDelay(3, undefined, noJitter)).toBe(4_000);
|
|
21
|
+
expect(computeBackoffDelay(5, undefined, noJitter)).toBe(16_000);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("caps at maxDelayMs", () => {
|
|
25
|
+
// attempt 6 → 32_000 raw, clamped to the 30_000 ceiling.
|
|
26
|
+
expect(computeBackoffDelay(6, undefined, noJitter)).toBe(
|
|
27
|
+
DEFAULT_RECONNECT_MAX_DELAY_MS,
|
|
28
|
+
);
|
|
29
|
+
expect(computeBackoffDelay(50, undefined, noJitter)).toBe(
|
|
30
|
+
DEFAULT_RECONNECT_MAX_DELAY_MS,
|
|
31
|
+
);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("applies full jitter within [0, capped]", () => {
|
|
35
|
+
expect(computeBackoffDelay(3, undefined, () => 0)).toBe(0);
|
|
36
|
+
expect(computeBackoffDelay(3, undefined, () => 0.5)).toBe(2_000);
|
|
37
|
+
for (let i = 0; i < 200; i++) {
|
|
38
|
+
const d = computeBackoffDelay(4); // real Math.random
|
|
39
|
+
expect(d).toBeGreaterThanOrEqual(0);
|
|
40
|
+
expect(d).toBeLessThanOrEqual(8_000);
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it("honors custom options", () => {
|
|
45
|
+
const opts = { baseDelayMs: 100, factor: 3, maxDelayMs: 1_000 };
|
|
46
|
+
expect(computeBackoffDelay(1, opts, noJitter)).toBe(100);
|
|
47
|
+
expect(computeBackoffDelay(2, opts, noJitter)).toBe(300);
|
|
48
|
+
expect(computeBackoffDelay(3, opts, noJitter)).toBe(900);
|
|
49
|
+
expect(computeBackoffDelay(4, opts, noJitter)).toBe(1_000); // 2700 capped
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("treats attempt < 1 as the first attempt", () => {
|
|
53
|
+
expect(computeBackoffDelay(0, undefined, noJitter)).toBe(
|
|
54
|
+
DEFAULT_RECONNECT_BASE_DELAY_MS,
|
|
55
|
+
);
|
|
56
|
+
expect(computeBackoffDelay(-5, undefined, noJitter)).toBe(
|
|
57
|
+
DEFAULT_RECONNECT_BASE_DELAY_MS,
|
|
58
|
+
);
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
describe("sleep", () => {
|
|
63
|
+
beforeEach(() => vi.useFakeTimers());
|
|
64
|
+
afterEach(() => vi.useRealTimers());
|
|
65
|
+
|
|
66
|
+
it("resolves after the delay", async () => {
|
|
67
|
+
const settled = vi.fn();
|
|
68
|
+
const p = sleep(1_000).then(settled);
|
|
69
|
+
await vi.advanceTimersByTimeAsync(999);
|
|
70
|
+
expect(settled).not.toHaveBeenCalled();
|
|
71
|
+
await vi.advanceTimersByTimeAsync(1);
|
|
72
|
+
await p;
|
|
73
|
+
expect(settled).toHaveBeenCalledOnce();
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it("rejects immediately with AbortError when the signal is already aborted", async () => {
|
|
77
|
+
const ac = new AbortController();
|
|
78
|
+
ac.abort();
|
|
79
|
+
await expect(sleep(1_000, ac.signal)).rejects.toBeInstanceOf(AbortError);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it("rejects when aborted mid-wait and leaves no pending timer", async () => {
|
|
83
|
+
const ac = new AbortController();
|
|
84
|
+
const p = sleep(10_000, ac.signal);
|
|
85
|
+
ac.abort();
|
|
86
|
+
await expect(p).rejects.toBeInstanceOf(AbortError);
|
|
87
|
+
// No timer should survive the abort — advancing time settles nothing.
|
|
88
|
+
expect(vi.getTimerCount()).toBe(0);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("does not reject after resolving (listener removed on success)", async () => {
|
|
92
|
+
const ac = new AbortController();
|
|
93
|
+
const p = sleep(500, ac.signal);
|
|
94
|
+
await vi.advanceTimersByTimeAsync(500);
|
|
95
|
+
await expect(p).resolves.toBeUndefined();
|
|
96
|
+
// Aborting after the fact must not produce an unhandled rejection.
|
|
97
|
+
expect(() => ac.abort()).not.toThrow();
|
|
98
|
+
});
|
|
99
|
+
});
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Exponential-backoff scheduling for resilient stream reconnection.
|
|
3
|
+
*
|
|
4
|
+
* Pure and framework-agnostic — the timing math is a plain function and the
|
|
5
|
+
* wait is a cancelable promise, so both are exhaustively unit-testable
|
|
6
|
+
* without React or fake DOM (mirrors the codebase's extract-the-pure-core
|
|
7
|
+
* convention, e.g. `computeFollowCenter` / `isRecoveryTransition`).
|
|
8
|
+
*
|
|
9
|
+
* @internal Not part of the public `@stigmer/react` API.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
/** Tunable backoff schedule. All fields optional — sensible defaults apply. */
|
|
13
|
+
export interface BackoffOptions {
|
|
14
|
+
/** Delay before the first retry, in milliseconds. */
|
|
15
|
+
readonly baseDelayMs?: number;
|
|
16
|
+
/** Upper bound on any single delay, in milliseconds. */
|
|
17
|
+
readonly maxDelayMs?: number;
|
|
18
|
+
/** Multiplier applied per attempt (`base * factor^(attempt-1)`). */
|
|
19
|
+
readonly factor?: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** Delay before the first reconnect attempt. */
|
|
23
|
+
export const DEFAULT_RECONNECT_BASE_DELAY_MS = 1_000;
|
|
24
|
+
/** Ceiling for any single reconnect delay. */
|
|
25
|
+
export const DEFAULT_RECONNECT_MAX_DELAY_MS = 30_000;
|
|
26
|
+
/** Per-attempt growth multiplier. */
|
|
27
|
+
export const DEFAULT_RECONNECT_FACTOR = 2;
|
|
28
|
+
/**
|
|
29
|
+
* Attempts before giving up and surfacing a terminal error. With the
|
|
30
|
+
* defaults above this is ≈ several minutes of outage before the user sees
|
|
31
|
+
* an error banner — long enough to ride out sleep/wake and network blips,
|
|
32
|
+
* bounded enough to avoid an unbounded background loop against a stream
|
|
33
|
+
* that will never recover (e.g. a deleted execution).
|
|
34
|
+
*/
|
|
35
|
+
export const DEFAULT_RECONNECT_MAX_ATTEMPTS = 10;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Compute the backoff delay (ms) for a 1-based reconnect attempt.
|
|
39
|
+
*
|
|
40
|
+
* Exponential growth (`base * factor^(attempt-1)`) capped at `maxDelayMs`,
|
|
41
|
+
* then **full jitter** — a uniform random point in `[0, capped]`. Full
|
|
42
|
+
* jitter (AWS, "Exponential Backoff And Jitter") de-synchronizes a fleet of
|
|
43
|
+
* clients that all dropped at the same instant, preventing a reconnect
|
|
44
|
+
* thundering herd against a recovering server.
|
|
45
|
+
*
|
|
46
|
+
* `random` is injectable purely so tests can assert exact values; callers
|
|
47
|
+
* should omit it.
|
|
48
|
+
*/
|
|
49
|
+
export function computeBackoffDelay(
|
|
50
|
+
attempt: number,
|
|
51
|
+
opts?: BackoffOptions,
|
|
52
|
+
random: () => number = Math.random,
|
|
53
|
+
): number {
|
|
54
|
+
const base = opts?.baseDelayMs ?? DEFAULT_RECONNECT_BASE_DELAY_MS;
|
|
55
|
+
const max = opts?.maxDelayMs ?? DEFAULT_RECONNECT_MAX_DELAY_MS;
|
|
56
|
+
const factor = opts?.factor ?? DEFAULT_RECONNECT_FACTOR;
|
|
57
|
+
|
|
58
|
+
const safeAttempt = Math.max(1, Math.floor(attempt));
|
|
59
|
+
const exponential = base * factor ** (safeAttempt - 1);
|
|
60
|
+
const capped = Math.min(exponential, max);
|
|
61
|
+
return Math.round(random() * capped);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/** Rejection reason for an aborted {@link sleep}, distinguishable by name. */
|
|
65
|
+
export class AbortError extends Error {
|
|
66
|
+
constructor() {
|
|
67
|
+
super("The operation was aborted.");
|
|
68
|
+
this.name = "AbortError";
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Promise-based delay that settles after `ms`, or rejects immediately with
|
|
74
|
+
* {@link AbortError} if `signal` is (or becomes) aborted.
|
|
75
|
+
*
|
|
76
|
+
* The timer is cleared and the abort listener removed on every exit path, so
|
|
77
|
+
* a reconnect wait leaves nothing pending when a component unmounts or the
|
|
78
|
+
* subscription is torn down mid-backoff — no leaked timer, no resubscribe
|
|
79
|
+
* after teardown.
|
|
80
|
+
*/
|
|
81
|
+
export function sleep(ms: number, signal?: AbortSignal): Promise<void> {
|
|
82
|
+
return new Promise<void>((resolve, reject) => {
|
|
83
|
+
if (signal?.aborted) {
|
|
84
|
+
reject(new AbortError());
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const onAbort = () => {
|
|
89
|
+
clearTimeout(timer);
|
|
90
|
+
reject(new AbortError());
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
const timer = setTimeout(() => {
|
|
94
|
+
signal?.removeEventListener("abort", onAbort);
|
|
95
|
+
resolve();
|
|
96
|
+
}, ms);
|
|
97
|
+
|
|
98
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
99
|
+
});
|
|
100
|
+
}
|
|
@@ -9,6 +9,19 @@ export type StreamState =
|
|
|
9
9
|
| { readonly stage: "idle" }
|
|
10
10
|
| { readonly stage: "connecting"; readonly executionId: string }
|
|
11
11
|
| { readonly stage: "streaming"; readonly executionId: string }
|
|
12
|
+
| {
|
|
13
|
+
/**
|
|
14
|
+
* A non-terminal stream drop is being retried in the background. The
|
|
15
|
+
* last-known-good snapshot stays visible and no error is surfaced —
|
|
16
|
+
* the public `error` only appears once retries are exhausted. `attempt`
|
|
17
|
+
* is the 1-based retry count; `error` is the transient cause, retained
|
|
18
|
+
* for diagnostics (it is not shown to the user while reconnecting).
|
|
19
|
+
*/
|
|
20
|
+
readonly stage: "reconnecting";
|
|
21
|
+
readonly executionId: string;
|
|
22
|
+
readonly attempt: number;
|
|
23
|
+
readonly error: Error;
|
|
24
|
+
}
|
|
12
25
|
| { readonly stage: "complete"; readonly executionId: string }
|
|
13
26
|
| {
|
|
14
27
|
readonly stage: "error";
|
|
@@ -122,6 +135,15 @@ function streamStateEqual(a: StreamState, b: StreamState): boolean {
|
|
|
122
135
|
a.error === b.error
|
|
123
136
|
)
|
|
124
137
|
return true;
|
|
138
|
+
// Each retry bumps `attempt`, so two reconnecting states are only equal
|
|
139
|
+
// when the attempt matches — every attempt must re-notify subscribers.
|
|
140
|
+
if (
|
|
141
|
+
a.stage === "reconnecting" &&
|
|
142
|
+
b.stage === "reconnecting" &&
|
|
143
|
+
a.executionId === b.executionId &&
|
|
144
|
+
a.attempt === b.attempt
|
|
145
|
+
)
|
|
146
|
+
return true;
|
|
125
147
|
if ("executionId" in a && "executionId" in b)
|
|
126
148
|
return a.executionId === b.executionId;
|
|
127
149
|
return false;
|
|
@@ -10,6 +10,19 @@ export type WorkflowEventStreamState =
|
|
|
10
10
|
| { readonly stage: "idle" }
|
|
11
11
|
| { readonly stage: "connecting"; readonly executionId: string }
|
|
12
12
|
| { readonly stage: "streaming"; readonly executionId: string }
|
|
13
|
+
| {
|
|
14
|
+
/**
|
|
15
|
+
* A non-terminal event stream drop is being retried in the background.
|
|
16
|
+
* Accumulated events stay visible and no error is surfaced until retries
|
|
17
|
+
* are exhausted. On reconnect the subscription resumes from the last
|
|
18
|
+
* received `sequence_number`, so no events are lost. `attempt` is the
|
|
19
|
+
* 1-based retry count; `error` is the transient cause (diagnostic only).
|
|
20
|
+
*/
|
|
21
|
+
readonly stage: "reconnecting";
|
|
22
|
+
readonly executionId: string;
|
|
23
|
+
readonly attempt: number;
|
|
24
|
+
readonly error: Error;
|
|
25
|
+
}
|
|
13
26
|
| { readonly stage: "complete"; readonly executionId: string }
|
|
14
27
|
| {
|
|
15
28
|
readonly stage: "error";
|
|
@@ -422,6 +435,15 @@ function streamStateEqual(
|
|
|
422
435
|
a.error === b.error
|
|
423
436
|
)
|
|
424
437
|
return true;
|
|
438
|
+
// Each retry bumps `attempt`, so two reconnecting states are only equal
|
|
439
|
+
// when the attempt matches — every attempt must re-notify subscribers.
|
|
440
|
+
if (
|
|
441
|
+
a.stage === "reconnecting" &&
|
|
442
|
+
b.stage === "reconnecting" &&
|
|
443
|
+
a.executionId === b.executionId &&
|
|
444
|
+
a.attempt === b.attempt
|
|
445
|
+
)
|
|
446
|
+
return true;
|
|
425
447
|
if ("executionId" in a && "executionId" in b)
|
|
426
448
|
return a.executionId === b.executionId;
|
|
427
449
|
return false;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { AgentExecution } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/api_pb";
|
|
2
|
-
import type {
|
|
2
|
+
import type { StreamState } from "./store/conversation-store";
|
|
3
3
|
import { isTerminalPhase } from "../execution/execution-phases";
|
|
4
4
|
import { ExecutionPhase } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/enum_pb";
|
|
5
5
|
|
|
@@ -7,25 +7,12 @@ import { ExecutionPhase } from "@stigmer/protos/ai/stigmer/agentic/agentexecutio
|
|
|
7
7
|
// Types
|
|
8
8
|
// ---------------------------------------------------------------------------
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
export type ControllerState =
|
|
18
|
-
| { readonly stage: "idle" }
|
|
19
|
-
| { readonly stage: "connecting"; readonly executionId: string }
|
|
20
|
-
| { readonly stage: "streaming"; readonly executionId: string }
|
|
21
|
-
| { readonly stage: "complete"; readonly executionId: string }
|
|
22
|
-
| {
|
|
23
|
-
readonly stage: "error";
|
|
24
|
-
readonly executionId: string;
|
|
25
|
-
readonly error: Error;
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
const IDLE: ControllerState = { stage: "idle" };
|
|
10
|
+
// The controller's FSM state is exactly the store's `StreamState` — they
|
|
11
|
+
// were once duplicated unions kept in lock-step by hand. The controller
|
|
12
|
+
// reuses the store's type so the lifecycle (including the `reconnecting`
|
|
13
|
+
// stage) is defined in one place and can never drift.
|
|
14
|
+
|
|
15
|
+
const IDLE: StreamState = { stage: "idle" };
|
|
29
16
|
|
|
30
17
|
/**
|
|
31
18
|
* Callback interface for the stream controller to communicate with
|
|
@@ -36,7 +23,7 @@ export interface StreamControllerSink {
|
|
|
36
23
|
/** Ingest a snapshot into the store (applies structural sharing). */
|
|
37
24
|
ingestSnapshot(snapshot: AgentExecution): void;
|
|
38
25
|
/** Transition the store's stream lifecycle state. */
|
|
39
|
-
setStreamState(state:
|
|
26
|
+
setStreamState(state: StreamState): void;
|
|
40
27
|
}
|
|
41
28
|
|
|
42
29
|
// ---------------------------------------------------------------------------
|
|
@@ -58,7 +45,7 @@ export interface StreamControllerSink {
|
|
|
58
45
|
* (typically `requestAnimationFrame`).
|
|
59
46
|
*/
|
|
60
47
|
export class StreamController {
|
|
61
|
-
private _state:
|
|
48
|
+
private _state: StreamState = IDLE;
|
|
62
49
|
private _bufferedSnapshot: AgentExecution | null = null;
|
|
63
50
|
private _rafId: number | null = null;
|
|
64
51
|
private _sink: StreamControllerSink;
|
|
@@ -80,7 +67,7 @@ export class StreamController {
|
|
|
80
67
|
}
|
|
81
68
|
|
|
82
69
|
/** Current FSM state (read-only). */
|
|
83
|
-
get state():
|
|
70
|
+
get state(): StreamState {
|
|
84
71
|
return this._state;
|
|
85
72
|
}
|
|
86
73
|
|
|
@@ -113,7 +100,12 @@ export class StreamController {
|
|
|
113
100
|
this._sink.ingestSnapshot(snapshot);
|
|
114
101
|
this._transition({ stage: "complete", executionId });
|
|
115
102
|
} else {
|
|
116
|
-
|
|
103
|
+
// A snapshot proves the (re)connection is healthy: advance from either
|
|
104
|
+
// the initial `connecting` or a `reconnecting` retry into `streaming`.
|
|
105
|
+
if (
|
|
106
|
+
this._state.stage === "connecting" ||
|
|
107
|
+
this._state.stage === "reconnecting"
|
|
108
|
+
) {
|
|
117
109
|
this._transition({ stage: "streaming", executionId });
|
|
118
110
|
}
|
|
119
111
|
this._bufferedSnapshot = snapshot;
|
|
@@ -121,6 +113,19 @@ export class StreamController {
|
|
|
121
113
|
}
|
|
122
114
|
}
|
|
123
115
|
|
|
116
|
+
/**
|
|
117
|
+
* Enter the `reconnecting` stage after a transient drop. Unlike
|
|
118
|
+
* {@link start}, this preserves the buffered snapshot and never resets the
|
|
119
|
+
* store, so the last-known-good conversation stays on screen while the
|
|
120
|
+
* background retry is in flight. No-op once idle (the subscription is
|
|
121
|
+
* already torn down).
|
|
122
|
+
*/
|
|
123
|
+
handleReconnecting(attempt: number, error: Error): void {
|
|
124
|
+
const executionId = this._activeExecutionId();
|
|
125
|
+
if (!executionId) return;
|
|
126
|
+
this._transition({ stage: "reconnecting", executionId, attempt, error });
|
|
127
|
+
}
|
|
128
|
+
|
|
124
129
|
/**
|
|
125
130
|
* Handle stream completion (iterator exhausted without error).
|
|
126
131
|
* If we still have a buffered snapshot, flush it first.
|
|
@@ -171,7 +176,7 @@ export class StreamController {
|
|
|
171
176
|
return this._state.executionId;
|
|
172
177
|
}
|
|
173
178
|
|
|
174
|
-
private _transition(next:
|
|
179
|
+
private _transition(next: StreamState): void {
|
|
175
180
|
this._state = next;
|
|
176
181
|
this._sink.setStreamState(next);
|
|
177
182
|
}
|