@stigmer/react 3.0.8-dev.20260612122433 → 3.0.8-dev.20260613051837

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/execution/ExecutionProgress.d.ts.map +1 -1
  2. package/execution/ExecutionProgress.js +5 -1
  3. package/execution/ExecutionProgress.js.map +1 -1
  4. package/execution/MessageThread.d.ts +32 -3
  5. package/execution/MessageThread.d.ts.map +1 -1
  6. package/execution/MessageThread.js +59 -10
  7. package/execution/MessageThread.js.map +1 -1
  8. package/execution/useExecutionStream.d.ts +76 -5
  9. package/execution/useExecutionStream.d.ts.map +1 -1
  10. package/execution/useExecutionStream.js +166 -23
  11. package/execution/useExecutionStream.js.map +1 -1
  12. package/internal/VirtualizedThread.d.ts +3 -1
  13. package/internal/VirtualizedThread.d.ts.map +1 -1
  14. package/internal/VirtualizedThread.js +4 -2
  15. package/internal/VirtualizedThread.js.map +1 -1
  16. package/internal/backoff.d.ts +61 -0
  17. package/internal/backoff.d.ts.map +1 -0
  18. package/internal/backoff.js +79 -0
  19. package/internal/backoff.js.map +1 -0
  20. package/internal/store/conversation-store.d.ts +34 -0
  21. package/internal/store/conversation-store.d.ts.map +1 -1
  22. package/internal/store/conversation-store.js +50 -2
  23. package/internal/store/conversation-store.js.map +1 -1
  24. package/internal/store/workflow-execution-event-store.d.ts +12 -0
  25. package/internal/store/workflow-execution-event-store.d.ts.map +1 -1
  26. package/internal/store/workflow-execution-event-store.js +7 -0
  27. package/internal/store/workflow-execution-event-store.js.map +1 -1
  28. package/internal/stream-controller.d.ts +57 -21
  29. package/internal/stream-controller.d.ts.map +1 -1
  30. package/internal/stream-controller.js +117 -3
  31. package/internal/stream-controller.js.map +1 -1
  32. package/internal/useFetch.d.ts +7 -0
  33. package/internal/useFetch.d.ts.map +1 -1
  34. package/internal/useFetch.js +21 -0
  35. package/internal/useFetch.js.map +1 -1
  36. package/package.json +4 -4
  37. package/session/SessionViewer.js +26 -1
  38. package/session/SessionViewer.js.map +1 -1
  39. package/session/useSessionConversation.d.ts +41 -4
  40. package/session/useSessionConversation.d.ts.map +1 -1
  41. package/session/useSessionConversation.js +74 -10
  42. package/session/useSessionConversation.js.map +1 -1
  43. package/session/useSessionExecutions.d.ts +17 -1
  44. package/session/useSessionExecutions.d.ts.map +1 -1
  45. package/session/useSessionExecutions.js +6 -2
  46. package/session/useSessionExecutions.js.map +1 -1
  47. package/src/execution/ExecutionProgress.tsx +12 -0
  48. package/src/execution/MessageThread.tsx +174 -5
  49. package/src/execution/__tests__/MessageThread.test.tsx +64 -0
  50. package/src/execution/__tests__/useExecutionStream.test.tsx +279 -0
  51. package/src/execution/useExecutionStream.ts +254 -34
  52. package/src/internal/VirtualizedThread.tsx +7 -1
  53. package/src/internal/__tests__/backoff.test.ts +99 -0
  54. package/src/internal/__tests__/stream-controller.test.ts +165 -10
  55. package/src/internal/__tests__/useFetch.test.tsx +59 -0
  56. package/src/internal/backoff.ts +100 -0
  57. package/src/internal/store/__tests__/conversation-store.test.ts +61 -0
  58. package/src/internal/store/conversation-store.ts +68 -3
  59. package/src/internal/store/workflow-execution-event-store.ts +22 -0
  60. package/src/internal/stream-controller.ts +151 -26
  61. package/src/internal/useFetch.ts +26 -0
  62. package/src/session/SessionViewer.tsx +89 -0
  63. package/src/session/__tests__/useSessionConversation.test.tsx +53 -0
  64. package/src/session/useSessionConversation.ts +121 -15
  65. package/src/session/useSessionExecutions.ts +23 -1
  66. package/src/workflow/WorkflowExecutionHeader.tsx +4 -1
  67. package/src/workflow/WorkflowExecutionTimeline.tsx +2 -1
  68. package/src/workflow/__tests__/useWorkflowExecutionEventStream.test.tsx +117 -1
  69. package/src/workflow/execution/useWaterfallEntries.ts +2 -1
  70. package/src/workflow/useWorkflowExecutionEventStream.ts +122 -41
  71. package/src/workflow/waterfall/WaterfallTimeline.tsx +2 -1
  72. package/styles.css +1 -1
  73. package/workflow/WorkflowExecutionHeader.d.ts.map +1 -1
  74. package/workflow/WorkflowExecutionHeader.js +3 -1
  75. package/workflow/WorkflowExecutionHeader.js.map +1 -1
  76. package/workflow/WorkflowExecutionTimeline.d.ts.map +1 -1
  77. package/workflow/WorkflowExecutionTimeline.js +1 -1
  78. package/workflow/WorkflowExecutionTimeline.js.map +1 -1
  79. package/workflow/execution/useWaterfallEntries.d.ts.map +1 -1
  80. package/workflow/execution/useWaterfallEntries.js +1 -1
  81. package/workflow/execution/useWaterfallEntries.js.map +1 -1
  82. package/workflow/useWorkflowExecutionEventStream.d.ts +32 -4
  83. package/workflow/useWorkflowExecutionEventStream.d.ts.map +1 -1
  84. package/workflow/useWorkflowExecutionEventStream.js +75 -32
  85. package/workflow/useWorkflowExecutionEventStream.js.map +1 -1
  86. package/workflow/waterfall/WaterfallTimeline.d.ts.map +1 -1
  87. package/workflow/waterfall/WaterfallTimeline.js +1 -1
  88. package/workflow/waterfall/WaterfallTimeline.js.map +1 -1
@@ -1,6 +1,6 @@
1
1
  "use client";
2
2
 
3
- import { useCallback, useEffect, useMemo, useState } from "react";
3
+ import { useCallback, useEffect, useMemo, useRef, useState } from "react";
4
4
  import type { AgentExecution } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/api_pb";
5
5
  import type { PendingApproval } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/approval_pb";
6
6
  import { ApprovalAction, ExecutionPhase } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/enum_pb";
@@ -17,6 +17,7 @@ import type {
17
17
  } from "@stigmer/sdk";
18
18
  import { isTerminalPhase } from "../execution/execution-phases";
19
19
  import { useStigmer } from "../hooks";
20
+ import { toError } from "../internal/toError";
20
21
  import { useConversationStoreRef } from "../internal/store";
21
22
  import { useCreateAgentExecution } from "../execution/useCreateAgentExecution";
22
23
  import { useExecutionStream } from "../execution/useExecutionStream";
@@ -31,6 +32,14 @@ import {
31
32
  specSkillRefsToInput,
32
33
  } from "./session-spec-converters";
33
34
 
35
+ /**
36
+ * Cadence for re-discovering the session's executions while the live stream
37
+ * cannot be relied on (a created-but-not-yet-listed execution, a silent
38
+ * connect-timeout, or an exhausted stream error). Disabled the instant the
39
+ * stream is healthy or terminal, so this never competes with the live feed.
40
+ */
41
+ const REDISCOVERY_POLL_INTERVAL_MS = 5_000;
42
+
34
43
  /**
35
44
  * Options for {@link UseSessionConversationReturn.sendFollowUp}.
36
45
  *
@@ -144,12 +153,29 @@ export interface UseSessionConversationReturn {
144
153
  readonly canSendFollowUp: boolean;
145
154
  /** True during the create RPC call (between submit and execution ID). */
146
155
  readonly isSending: boolean;
147
- /** Error from the last sendFollowUp attempt, or null. */
156
+ /**
157
+ * Error from the last `sendFollowUp` attempt, or `null`.
158
+ *
159
+ * Covers **both** failing paths — the optional `session.update()` and the
160
+ * `create()` RPC — so a follow-up never fails silently. When set, the user's
161
+ * message is preserved (see {@link pendingUserMessage}) and can be re-sent
162
+ * via {@link retryLastSend}.
163
+ */
148
164
  readonly sendError: Error | null;
149
- /** Reset `sendError` to `null`. */
165
+ /** Reset `sendError` to `null` (keeps the preserved pending message). */
150
166
  readonly clearSendError: () => void;
167
+ /**
168
+ * Re-send the most recent `sendFollowUp` (same message and options). No-op
169
+ * when nothing has been sent yet. Use as the "Retry" affordance on a failed
170
+ * turn; clears {@link sendError} for the new attempt.
171
+ */
172
+ readonly retryLastSend: () => void;
151
173
 
152
- /** The user's message text, shown in the thread before the stream delivers it. */
174
+ /**
175
+ * The user's message text, shown in the thread before the stream delivers it.
176
+ * Retained when a send fails so the typed message is never lost — pair with
177
+ * {@link sendError} to render the turn as failed with a retry control.
178
+ */
153
179
  readonly pendingUserMessage: string | null;
154
180
 
155
181
  /** Current workspace entries from the session spec. Empty array when session is not loaded. */
@@ -179,7 +205,27 @@ export interface UseSessionConversationReturn {
179
205
  /** Error from session or execution list loading, or `null` when healthy. */
180
206
  readonly loadError: Error | null;
181
207
 
182
- /** Error from the execution stream, or `null` when healthy. */
208
+ /**
209
+ * `true` while the execution stream is auto-reconnecting after a transient
210
+ * drop. The conversation stays visible and `streamError` remains `null` —
211
+ * surface a subtle "Reconnecting…" hint rather than an error banner.
212
+ */
213
+ readonly isReconnecting: boolean;
214
+ /**
215
+ * `true` when the stream opened but never delivered a first snapshot within
216
+ * the watchdog window (even after a silent retry) — the agent hasn't started.
217
+ * Distinct from `streamError`: nothing threw, the stream is simply silent.
218
+ * Surface an actionable "the agent hasn't started — Retry" banner wired to
219
+ * {@link reconnectStream}.
220
+ */
221
+ readonly connectTimedOut: boolean;
222
+ /**
223
+ * `true` when a live, non-terminal stream has been silent past the slow
224
+ * threshold. Purely informational ("still working — taking longer than
225
+ * usual"); cleared by the next update. Never an error.
226
+ */
227
+ readonly isSlow: boolean;
228
+ /** Error from the execution stream, or `null` when healthy or reconnecting. */
183
229
  readonly streamError: Error | null;
184
230
  /** Reset the stream error and re-establish the execution stream subscription. */
185
231
  readonly reconnectStream: () => void;
@@ -244,16 +290,24 @@ export function useSessionConversation(
244
290
  error: sessionError,
245
291
  refetch: refetchSession,
246
292
  } = useSession(sessionId);
293
+ // Bounded re-discovery (see REDISCOVERY_POLL_INTERVAL_MS). The gate depends on
294
+ // the stream below, so the decision is synced into state via an effect and fed
295
+ // back here on the next render — a one-frame lag that is immaterial at 5s.
296
+ const [rediscoveryActive, setRediscoveryActive] = useState(false);
247
297
  const {
248
298
  executions,
249
299
  isLoading: executionsLoading,
250
300
  error: executionsError,
251
301
  refetch,
252
- } = useSessionExecutions(sessionId);
302
+ } = useSessionExecutions(sessionId, {
303
+ refetchInterval: rediscoveryActive ? REDISCOVERY_POLL_INTERVAL_MS : false,
304
+ // Re-list on app-relaunch / tab refocus so an execution that appeared while
305
+ // backgrounded is picked up without the user having to act.
306
+ refetchOnWindowFocus: true,
307
+ });
253
308
  const {
254
309
  create,
255
310
  isCreating,
256
- error: createError,
257
311
  clearError: clearCreateError,
258
312
  } = useCreateAgentExecution();
259
313
  const { update: updateSession } = useUpdateSession();
@@ -274,6 +328,14 @@ export function useSessionConversation(
274
328
  const [pendingUserMessage, setPendingUserMessage] = useState<string | null>(
275
329
  null,
276
330
  );
331
+ // Dedicated send-failure state, distinct from the create hook's internal
332
+ // error so it can also cover the session.update() path. The last send's
333
+ // arguments are captured for an exact retry.
334
+ const [sendError, setSendError] = useState<Error | null>(null);
335
+ const lastSendRef = useRef<{
336
+ message: string;
337
+ options?: SendFollowUpOptions;
338
+ } | null>(null);
277
339
 
278
340
  const listActiveId = useMemo(() => {
279
341
  for (let i = executions.length - 1; i >= 0; i--) {
@@ -296,6 +358,25 @@ export function useSessionConversation(
296
358
  store: conversationStore,
297
359
  });
298
360
 
361
+ // Re-discovery gate. Poll only while the live stream cannot carry us:
362
+ // • a fresh session whose first execution is created but not yet listed
363
+ // (`executions.length === 0`) — the race this fix targets,
364
+ // • a silent connect-timeout, or an exhausted stream error.
365
+ // Never while the stream is healthy (`isStreaming`) or the active execution
366
+ // has reached a terminal phase — the live feed is then the source of truth.
367
+ const streamTerminal =
368
+ activeExecutionId !== null && isTerminalPhase(stream.phase);
369
+ const needsRediscovery =
370
+ !stream.isStreaming &&
371
+ !streamTerminal &&
372
+ ((activeExecutionId === null && executions.length === 0) ||
373
+ stream.connectTimedOut ||
374
+ stream.error !== null);
375
+
376
+ useEffect(() => {
377
+ setRediscoveryActive(needsRediscovery);
378
+ }, [needsRediscovery]);
379
+
299
380
  // Clear pendingExecutionId once the execution appears in the fetched list
300
381
  useEffect(() => {
301
382
  if (
@@ -306,12 +387,17 @@ export function useSessionConversation(
306
387
  }
307
388
  }, [pendingExecutionId, executions]);
308
389
 
309
- // Clear optimistic message once the stream delivers its first snapshot
390
+ // Clear the optimistic message and any stale send error — once the stream
391
+ // delivers a real snapshot. This also handles recovery: if a failed send's
392
+ // execution is later re-discovered and streams, the failed turn resolves into
393
+ // the live one instead of lingering. (At send time the composer is only
394
+ // enabled when no execution is active, so a *fresh* failure cannot be cleared
395
+ // here prematurely — `stream.execution` is null then.)
310
396
  useEffect(() => {
311
- if (pendingUserMessage && stream.execution) {
312
- setPendingUserMessage(null);
313
- }
314
- }, [pendingUserMessage, stream.execution]);
397
+ if (!stream.execution) return;
398
+ if (pendingUserMessage) setPendingUserMessage(null);
399
+ if (sendError) setSendError(null);
400
+ }, [pendingUserMessage, sendError, stream.execution]);
315
401
 
316
402
  // Refetch executions when stream reaches a terminal phase so the
317
403
  // fetched list reflects the completed status and listActiveId clears.
@@ -366,6 +452,9 @@ export function useSessionConversation(
366
452
  async (message: string, options?: SendFollowUpOptions): Promise<void> => {
367
453
  if (!sessionId || !session) return;
368
454
 
455
+ // Capture for retry and clear any prior failure before the new attempt.
456
+ lastSendRef.current = { message, options };
457
+ setSendError(null);
369
458
  setPendingUserMessage(message);
370
459
 
371
460
  try {
@@ -405,7 +494,10 @@ export function useSessionConversation(
405
494
  setPendingExecutionId(result.executionId);
406
495
  refetch();
407
496
  } catch (err) {
408
- setPendingUserMessage(null);
497
+ // Surface the failure and KEEP the user's message visible (do not clear
498
+ // pendingUserMessage) so the turn renders as failed-with-retry instead
499
+ // of vanishing. Covers both the update() and create() paths.
500
+ setSendError(toError(err));
409
501
  if (process.env.NODE_ENV !== "production") {
410
502
  console.error("[useSessionConversation] sendFollowUp failed:", err);
411
503
  }
@@ -414,6 +506,16 @@ export function useSessionConversation(
414
506
  [sessionId, session, org, stigmer, create, updateSession, refetch, refetchSession],
415
507
  );
416
508
 
509
+ const retryLastSend = useCallback(() => {
510
+ const last = lastSendRef.current;
511
+ if (last) void sendFollowUp(last.message, last.options);
512
+ }, [sendFollowUp]);
513
+
514
+ const clearSendError = useCallback(() => {
515
+ setSendError(null);
516
+ clearCreateError();
517
+ }, [clearCreateError]);
518
+
417
519
  const pendingApprovals = useMemo<readonly PendingApproval[]>(
418
520
  () => activeStreamExecution?.status?.pendingApprovals ?? [],
419
521
  [activeStreamExecution],
@@ -445,8 +547,9 @@ export function useSessionConversation(
445
547
  sendFollowUp,
446
548
  canSendFollowUp,
447
549
  isSending: isCreating,
448
- sendError: createError,
449
- clearSendError: clearCreateError,
550
+ sendError,
551
+ clearSendError,
552
+ retryLastSend,
450
553
 
451
554
  pendingUserMessage,
452
555
 
@@ -463,6 +566,9 @@ export function useSessionConversation(
463
566
  isLoading,
464
567
  loadError,
465
568
 
569
+ isReconnecting: stream.isReconnecting,
570
+ connectTimedOut: stream.connectTimedOut,
571
+ isSlow: stream.isSlow,
466
572
  streamError: stream.error,
467
573
  reconnectStream: stream.reconnect,
468
574
  };
@@ -6,6 +6,23 @@ import { ListAgentExecutionsBySessionRequestSchema } from "@stigmer/protos/ai/st
6
6
  import { useStigmer } from "../hooks";
7
7
  import { useFetch } from "../internal/useFetch";
8
8
 
9
+ /** Options for {@link useSessionExecutions}. */
10
+ export interface UseSessionExecutionsOptions {
11
+ /**
12
+ * Poll interval in milliseconds for re-listing the session's executions.
13
+ * Used by the conversation loop to re-discover a created-but-not-yet-listed
14
+ * execution. Pass `false` (the default) to disable polling and rely on the
15
+ * live stream plus imperative {@link UseSessionExecutionsReturn.refetch}.
16
+ */
17
+ readonly refetchInterval?: number | false;
18
+ /**
19
+ * Re-list when the window regains focus / the tab becomes visible — covers
20
+ * the app-relaunch case where an execution may have appeared while
21
+ * backgrounded. Defaults to `false`.
22
+ */
23
+ readonly refetchOnWindowFocus?: boolean;
24
+ }
25
+
9
26
  /** Return value of {@link useSessionExecutions}. */
10
27
  export interface UseSessionExecutionsReturn {
11
28
  /** All executions for the session, empty while loading or on error. */
@@ -56,6 +73,7 @@ export interface UseSessionExecutionsReturn {
56
73
  */
57
74
  export function useSessionExecutions(
58
75
  sessionId: string | null,
76
+ options?: UseSessionExecutionsOptions,
59
77
  ): UseSessionExecutionsReturn {
60
78
  const stigmer = useStigmer();
61
79
 
@@ -73,7 +91,11 @@ export function useSessionExecutions(
73
91
  : null,
74
92
  [sessionId, stigmer],
75
93
  [] as AgentExecution[],
76
- { cacheKey: sessionId ? `session-executions:${sessionId}` : undefined },
94
+ {
95
+ cacheKey: sessionId ? `session-executions:${sessionId}` : undefined,
96
+ refetchInterval: options?.refetchInterval,
97
+ refetchOnWindowFocus: options?.refetchOnWindowFocus,
98
+ },
77
99
  );
78
100
 
79
101
  return { executions, isLoading, isRefetching, error, refetch };
@@ -82,7 +82,10 @@ export const WorkflowExecutionHeader = memo(function WorkflowExecutionHeader({
82
82
  const isRunning = RUNNING_PHASES.has(phase);
83
83
  const isPaused = phase === ExecutionPhase.EXECUTION_PAUSED;
84
84
  const isFailed = phase === ExecutionPhase.EXECUTION_FAILED;
85
- const isLive = streamState.stage === "streaming" || streamState.stage === "connecting";
85
+ const isLive =
86
+ streamState.stage === "streaming" ||
87
+ streamState.stage === "connecting" ||
88
+ streamState.stage === "reconnecting";
86
89
 
87
90
  return (
88
91
  <header className={cn("flex items-center gap-3 border-b border-border px-4 py-3", className)}>
@@ -77,7 +77,8 @@ export const WorkflowExecutionTimeline = memo(function WorkflowExecutionTimeline
77
77
  }
78
78
  }, [events.length]);
79
79
 
80
- const isLive = streamState.stage === "streaming";
80
+ const isLive =
81
+ streamState.stage === "streaming" || streamState.stage === "reconnecting";
81
82
  const isConnecting = streamState.stage === "connecting";
82
83
  const isComplete = streamState.stage === "complete";
83
84
  const isError = streamState.stage === "error";
@@ -1,5 +1,5 @@
1
1
  import { describe, it, expect, vi, beforeEach } from "vitest";
2
- import { renderHook, act } from "@testing-library/react";
2
+ import { renderHook, act, waitFor } from "@testing-library/react";
3
3
  import type { ReactNode } from "react";
4
4
  import { createElement } from "react";
5
5
  import { create } from "@bufbuild/protobuf";
@@ -417,3 +417,119 @@ describe("useWorkflowExecutionEventStream", () => {
417
417
  expect(result.current.events[0]?.taskName).toBe("beta");
418
418
  });
419
419
  });
420
+
421
+ // ---------------------------------------------------------------------------
422
+ // Auto-reconnect (#174)
423
+ // ---------------------------------------------------------------------------
424
+
425
+ describe("useWorkflowExecutionEventStream — auto-reconnect", () => {
426
+ it("auto-reconnects on a transient drop and resumes from the latest sequence", async () => {
427
+ let call = 0;
428
+ const subscribeEvents = vi.fn((_req: any) => {
429
+ call += 1;
430
+ if (call === 1) {
431
+ return (async function* () {
432
+ yield makeTaskStartedEvent(5, "t5");
433
+ throw new TypeError("Load failed");
434
+ })();
435
+ }
436
+ return (async function* () {
437
+ yield makeTaskStartedEvent(6, "t6");
438
+ })();
439
+ });
440
+ const client = makeMockClient({ subscribeEvents });
441
+
442
+ const { result } = renderHook(
443
+ () =>
444
+ useWorkflowExecutionEventStream("wex-001", {
445
+ executionPhase: PHASE.IN_PROGRESS,
446
+ reconnectOptions: { baseDelayMs: 5, maxDelayMs: 5 },
447
+ }),
448
+ { wrapper: createWrapper(client) },
449
+ );
450
+
451
+ await waitFor(() => expect(subscribeEvents).toHaveBeenCalledTimes(2));
452
+ await waitFor(() => expect(result.current.events).toHaveLength(2));
453
+
454
+ // The resumed subscription continues after the last received sequence (5),
455
+ // so no events are lost or duplicated.
456
+ const secondReq = (subscribeEvents.mock.calls as unknown[][])[1]?.[0] as {
457
+ afterSequence: bigint;
458
+ };
459
+ expect(secondReq.afterSequence).toBe(BigInt(5));
460
+ expect(result.current.error).toBeNull();
461
+ });
462
+
463
+ it("surfaces an error after exhausting reconnect attempts", async () => {
464
+ const subscribeEvents = vi.fn(
465
+ () =>
466
+ (async function* () {
467
+ throw new TypeError("Load failed");
468
+ })(),
469
+ );
470
+ const client = makeMockClient({ subscribeEvents });
471
+
472
+ const { result } = renderHook(
473
+ () =>
474
+ useWorkflowExecutionEventStream("wex-001", {
475
+ executionPhase: PHASE.IN_PROGRESS,
476
+ reconnectOptions: { baseDelayMs: 1, maxDelayMs: 1, maxAttempts: 2 },
477
+ }),
478
+ { wrapper: createWrapper(client) },
479
+ );
480
+
481
+ await waitFor(() => expect(result.current.error).not.toBeNull(), {
482
+ timeout: 2000,
483
+ });
484
+ expect(result.current.streamState.stage).toBe("error");
485
+ // 1 initial attempt + 2 retries.
486
+ expect(subscribeEvents).toHaveBeenCalledTimes(3);
487
+ });
488
+
489
+ it("marks unsupported on UNIMPLEMENTED without retrying", async () => {
490
+ const subscribeEvents = vi.fn(
491
+ () =>
492
+ (async function* () {
493
+ throw new Error("UNIMPLEMENTED: event streaming not supported");
494
+ })(),
495
+ );
496
+ const client = makeMockClient({ subscribeEvents });
497
+
498
+ const { result } = renderHook(
499
+ () =>
500
+ useWorkflowExecutionEventStream("wex-001", {
501
+ executionPhase: PHASE.IN_PROGRESS,
502
+ reconnectOptions: { baseDelayMs: 1, maxDelayMs: 1 },
503
+ }),
504
+ { wrapper: createWrapper(client) },
505
+ );
506
+
507
+ await waitFor(() =>
508
+ expect(result.current.streamState.stage).toBe("unsupported"),
509
+ );
510
+ await new Promise((r) => setTimeout(r, 20));
511
+ expect(subscribeEvents).toHaveBeenCalledTimes(1);
512
+ });
513
+
514
+ it("treats a clean stream end as completion (never a reconnect loop)", async () => {
515
+ const subscribeEvents = vi.fn(async function* () {
516
+ /* no events, then clean end */
517
+ });
518
+ const client = makeMockClient({ subscribeEvents });
519
+
520
+ const { result } = renderHook(
521
+ () =>
522
+ useWorkflowExecutionEventStream("wex-001", {
523
+ executionPhase: PHASE.IN_PROGRESS,
524
+ reconnectOptions: { baseDelayMs: 1, maxDelayMs: 1 },
525
+ }),
526
+ { wrapper: createWrapper(client) },
527
+ );
528
+
529
+ await waitFor(() =>
530
+ expect(result.current.streamState.stage).toBe("complete"),
531
+ );
532
+ await new Promise((r) => setTimeout(r, 20));
533
+ expect(subscribeEvents).toHaveBeenCalledTimes(1);
534
+ });
535
+ });
@@ -61,7 +61,8 @@ export function useWaterfallEntries({
61
61
  executionStartIso,
62
62
  executionDurationMs,
63
63
  }: UseWaterfallEntriesOptions): UseWaterfallEntriesReturn {
64
- const isLive = streamState.stage === "streaming";
64
+ const isLive =
65
+ streamState.stage === "streaming" || streamState.stage === "reconnecting";
65
66
  const execStartEpoch = useMemo(
66
67
  () => (executionStartIso ? new Date(executionStartIso).getTime() : 0),
67
68
  [executionStartIso],
@@ -16,8 +16,15 @@ import {
16
16
  SubscribeEventsRequestSchema,
17
17
  } from "@stigmer/protos/ai/stigmer/agentic/workflowexecution/v1/io_pb";
18
18
  import { ExecutionPhase } from "@stigmer/protos/ai/stigmer/agentic/workflowexecution/v1/enum_pb";
19
+ import { isTransientStreamError } from "@stigmer/sdk";
19
20
  import { useStigmer } from "../hooks";
20
21
  import { toError } from "../internal/toError";
22
+ import {
23
+ computeBackoffDelay,
24
+ sleep,
25
+ DEFAULT_RECONNECT_MAX_ATTEMPTS,
26
+ type BackoffOptions,
27
+ } from "../internal/backoff";
21
28
  import {
22
29
  WorkflowExecutionEventStore,
23
30
  type WorkflowEventStreamState,
@@ -41,6 +48,20 @@ export interface UseWorkflowExecutionEventStreamOptions {
41
48
  * (terminal). When omitted, defaults to live streaming.
42
49
  */
43
50
  readonly executionPhase?: ExecutionPhase;
51
+ /**
52
+ * Automatically re-establish the live subscription with exponential
53
+ * backoff when it drops with a transient transport error, resuming from
54
+ * the last received `sequence_number` (no events lost). Defaults to `true`.
55
+ */
56
+ readonly autoReconnect?: boolean;
57
+ /**
58
+ * Tune the auto-reconnect backoff schedule and attempt cap. Omitted fields
59
+ * fall back to SDK defaults (base 1s, ×2, max 30s, 10 attempts).
60
+ */
61
+ readonly reconnectOptions?: BackoffOptions & {
62
+ /** Max attempts before surfacing a terminal `error`. */
63
+ readonly maxAttempts?: number;
64
+ };
44
65
  }
45
66
 
46
67
  /** Return value of {@link useWorkflowExecutionEventStream}. */
@@ -59,9 +80,20 @@ export interface UseWorkflowExecutionEventStreamReturn {
59
80
  readonly isStreaming: boolean;
60
81
  /** `true` while connecting to the event stream. */
61
82
  readonly isConnecting: boolean;
62
- /** Error from the last failed stream attempt, or `null`. */
83
+ /**
84
+ * `true` while a transient drop is being retried automatically. Accumulated
85
+ * events stay visible and `error` remains `null`; on success the
86
+ * subscription resumes from the last sequence number with no events lost.
87
+ */
88
+ readonly isReconnecting: boolean;
89
+ /** 1-based count of the in-flight reconnect attempt; `0` when not reconnecting. */
90
+ readonly reconnectAttempt: number;
91
+ /**
92
+ * Error from the last failed stream attempt, or `null`. Set only once
93
+ * auto-reconnect exhausts its attempts (or for a non-transient failure).
94
+ */
63
95
  readonly error: Error | null;
64
- /** Re-establish the stream subscription. */
96
+ /** Re-establish the stream subscription (manual fallback). */
65
97
  readonly reconnect: () => void;
66
98
  }
67
99
 
@@ -99,8 +131,10 @@ export function isRecoveryTransition(
99
131
  * integration.
100
132
  *
101
133
  * For running executions: subscribes via `subscribeEvents` with
102
- * replay+live-tail. On disconnect, reconnects from the last received
103
- * sequence number.
134
+ * replay+live-tail. A transient drop auto-reconnects with exponential
135
+ * backoff, resuming from the last received sequence number so no events are
136
+ * lost; `error` is surfaced only once retries are exhausted. A clean stream
137
+ * end is the server's completion signal and is never retried.
104
138
  *
105
139
  * For terminal executions: loads the full event log via paginated
106
140
  * `getEventLog` calls.
@@ -136,6 +170,8 @@ export function useWorkflowExecutionEventStream(
136
170
 
137
171
  const eventTypes = options?.eventTypes;
138
172
  const executionPhase = options?.executionPhase;
173
+ const autoReconnect = options?.autoReconnect ?? true;
174
+ const reconnectOptions = options?.reconnectOptions;
139
175
 
140
176
  // Stable ref for values that should not trigger re-subscription
141
177
  const storeRef = useRef(store);
@@ -239,50 +275,90 @@ export function useWorkflowExecutionEventStream(
239
275
  }
240
276
  })();
241
277
  } else {
242
- // Live-stream events for running executions
278
+ // Live-stream events for running executions, with auto-reconnect.
243
279
  currentStore.setStreamState({ stage: "connecting", executionId });
244
280
 
245
281
  (async () => {
246
- try {
247
- const afterSequence = currentStore.getLatestSequence();
248
-
249
- for await (const event of stigmer.workflowExecution.subscribeEvents(
250
- create(SubscribeEventsRequestSchema, {
251
- executionId,
252
- afterSequence,
253
- eventTypes: eventTypes ? [...eventTypes] : [],
254
- }),
255
- abortController.signal,
256
- )) {
257
- if (abortController.signal.aborted) return;
258
-
259
- startTransition(() => {
260
- currentStore.appendEvents([event]);
261
- if (currentStore.getStreamState().stage === "connecting") {
262
- currentStore.setStreamState({ stage: "streaming", executionId });
263
- }
264
- });
265
- }
266
-
267
- if (!abortController.signal.aborted) {
268
- currentStore.setStreamState({ stage: "complete", executionId });
269
- }
270
- } catch (err) {
271
- if (abortController.signal.aborted) return;
272
-
273
- const error = toError(err);
274
- const isUnimplemented =
275
- error.message.includes("UNIMPLEMENTED") ||
276
- error.message.includes("unimplemented");
277
-
278
- if (isUnimplemented) {
279
- currentStore.setStreamState({ stage: "unsupported", executionId });
280
- } else {
282
+ const signal = abortController.signal;
283
+ const maxAttempts =
284
+ reconnectOptions?.maxAttempts ?? DEFAULT_RECONNECT_MAX_ATTEMPTS;
285
+
286
+ // 1-based count of consecutive failed attempts, reset by any event.
287
+ let attempt = 0;
288
+
289
+ while (!signal.aborted) {
290
+ try {
291
+ // Re-read each attempt: after a drop we resume from the last
292
+ // sequence number, so the server replays only what we missed and
293
+ // no events are lost or duplicated.
294
+ const afterSequence = currentStore.getLatestSequence();
295
+
296
+ for await (const event of stigmer.workflowExecution.subscribeEvents(
297
+ create(SubscribeEventsRequestSchema, {
298
+ executionId,
299
+ afterSequence,
300
+ eventTypes: eventTypes ? [...eventTypes] : [],
301
+ }),
302
+ signal,
303
+ )) {
304
+ if (signal.aborted) return;
305
+
306
+ attempt = 0; // an event proves the connection is healthy
307
+ startTransition(() => {
308
+ currentStore.appendEvents([event]);
309
+ const stage = currentStore.getStreamState().stage;
310
+ if (stage === "connecting" || stage === "reconnecting") {
311
+ currentStore.setStreamState({ stage: "streaming", executionId });
312
+ }
313
+ });
314
+ }
315
+
316
+ // A clean end of the event stream is the server's completion
317
+ // signal (the execution finished). Unlike the agent snapshot
318
+ // stream, there is no separate terminal marker to re-check, so we
319
+ // must NOT treat this as a premature drop — doing so would loop
320
+ // forever re-subscribing past the final sequence. Transient drops
321
+ // surface as thrown errors (handled below), not a clean end.
322
+ if (!signal.aborted) {
323
+ currentStore.setStreamState({ stage: "complete", executionId });
324
+ }
325
+ return;
326
+ } catch (err) {
327
+ if (signal.aborted) return;
328
+
329
+ const error = toError(err);
330
+ const isUnimplemented =
331
+ error.message.includes("UNIMPLEMENTED") ||
332
+ error.message.includes("unimplemented");
333
+
334
+ // A server without event-stream support will never recover —
335
+ // surface the unsupported state immediately, never retry.
336
+ if (isUnimplemented) {
337
+ currentStore.setStreamState({ stage: "unsupported", executionId });
338
+ return;
339
+ }
340
+
341
+ if (
342
+ !autoReconnect ||
343
+ !isTransientStreamError(error) ||
344
+ attempt >= maxAttempts
345
+ ) {
346
+ currentStore.setStreamState({ stage: "error", executionId, error });
347
+ return;
348
+ }
349
+
350
+ attempt += 1;
281
351
  currentStore.setStreamState({
282
- stage: "error",
352
+ stage: "reconnecting",
283
353
  executionId,
354
+ attempt,
284
355
  error,
285
356
  });
357
+ try {
358
+ await sleep(computeBackoffDelay(attempt, reconnectOptions), signal);
359
+ } catch {
360
+ return; // aborted mid-backoff
361
+ }
286
362
  }
287
363
  }
288
364
  })();
@@ -302,6 +378,9 @@ export function useWorkflowExecutionEventStream(
302
378
 
303
379
  const isStreaming = streamState.stage === "streaming";
304
380
  const isConnecting = streamState.stage === "connecting";
381
+ const isReconnecting = streamState.stage === "reconnecting";
382
+ const reconnectAttempt =
383
+ streamState.stage === "reconnecting" ? streamState.attempt : 0;
305
384
  const error = streamState.stage === "error" ? streamState.error : null;
306
385
 
307
386
  return {
@@ -312,6 +391,8 @@ export function useWorkflowExecutionEventStream(
312
391
  totalTasks,
313
392
  isStreaming,
314
393
  isConnecting,
394
+ isReconnecting,
395
+ reconnectAttempt,
315
396
  error,
316
397
  reconnect,
317
398
  };