npm - @stigmer/react - Versions diffs - 3.0.8-dev.20260612122433 → 3.0.8-dev.20260613051837 - Mend

@stigmer/react 3.0.8-dev.20260612122433 → 3.0.8-dev.20260613051837

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/execution/ExecutionProgress.d.ts.map +1 -1
package/execution/ExecutionProgress.js +5 -1
package/execution/ExecutionProgress.js.map +1 -1
package/execution/MessageThread.d.ts +32 -3
package/execution/MessageThread.d.ts.map +1 -1
package/execution/MessageThread.js +59 -10
package/execution/MessageThread.js.map +1 -1
package/execution/useExecutionStream.d.ts +76 -5
package/execution/useExecutionStream.d.ts.map +1 -1
package/execution/useExecutionStream.js +166 -23
package/execution/useExecutionStream.js.map +1 -1
package/internal/VirtualizedThread.d.ts +3 -1
package/internal/VirtualizedThread.d.ts.map +1 -1
package/internal/VirtualizedThread.js +4 -2
package/internal/VirtualizedThread.js.map +1 -1
package/internal/backoff.d.ts +61 -0
package/internal/backoff.d.ts.map +1 -0
package/internal/backoff.js +79 -0
package/internal/backoff.js.map +1 -0
package/internal/store/conversation-store.d.ts +34 -0
package/internal/store/conversation-store.d.ts.map +1 -1
package/internal/store/conversation-store.js +50 -2
package/internal/store/conversation-store.js.map +1 -1
package/internal/store/workflow-execution-event-store.d.ts +12 -0
package/internal/store/workflow-execution-event-store.d.ts.map +1 -1
package/internal/store/workflow-execution-event-store.js +7 -0
package/internal/store/workflow-execution-event-store.js.map +1 -1
package/internal/stream-controller.d.ts +57 -21
package/internal/stream-controller.d.ts.map +1 -1
package/internal/stream-controller.js +117 -3
package/internal/stream-controller.js.map +1 -1
package/internal/useFetch.d.ts +7 -0
package/internal/useFetch.d.ts.map +1 -1
package/internal/useFetch.js +21 -0
package/internal/useFetch.js.map +1 -1
package/package.json +4 -4
package/session/SessionViewer.js +26 -1
package/session/SessionViewer.js.map +1 -1
package/session/useSessionConversation.d.ts +41 -4
package/session/useSessionConversation.d.ts.map +1 -1
package/session/useSessionConversation.js +74 -10
package/session/useSessionConversation.js.map +1 -1
package/session/useSessionExecutions.d.ts +17 -1
package/session/useSessionExecutions.d.ts.map +1 -1
package/session/useSessionExecutions.js +6 -2
package/session/useSessionExecutions.js.map +1 -1
package/src/execution/ExecutionProgress.tsx +12 -0
package/src/execution/MessageThread.tsx +174 -5
package/src/execution/__tests__/MessageThread.test.tsx +64 -0
package/src/execution/__tests__/useExecutionStream.test.tsx +279 -0
package/src/execution/useExecutionStream.ts +254 -34
package/src/internal/VirtualizedThread.tsx +7 -1
package/src/internal/__tests__/backoff.test.ts +99 -0
package/src/internal/__tests__/stream-controller.test.ts +165 -10
package/src/internal/__tests__/useFetch.test.tsx +59 -0
package/src/internal/backoff.ts +100 -0
package/src/internal/store/__tests__/conversation-store.test.ts +61 -0
package/src/internal/store/conversation-store.ts +68 -3
package/src/internal/store/workflow-execution-event-store.ts +22 -0
package/src/internal/stream-controller.ts +151 -26
package/src/internal/useFetch.ts +26 -0
package/src/session/SessionViewer.tsx +89 -0
package/src/session/__tests__/useSessionConversation.test.tsx +53 -0
package/src/session/useSessionConversation.ts +121 -15
package/src/session/useSessionExecutions.ts +23 -1
package/src/workflow/WorkflowExecutionHeader.tsx +4 -1
package/src/workflow/WorkflowExecutionTimeline.tsx +2 -1
package/src/workflow/__tests__/useWorkflowExecutionEventStream.test.tsx +117 -1
package/src/workflow/execution/useWaterfallEntries.ts +2 -1
package/src/workflow/useWorkflowExecutionEventStream.ts +122 -41
package/src/workflow/waterfall/WaterfallTimeline.tsx +2 -1
package/styles.css +1 -1
package/workflow/WorkflowExecutionHeader.d.ts.map +1 -1
package/workflow/WorkflowExecutionHeader.js +3 -1
package/workflow/WorkflowExecutionHeader.js.map +1 -1
package/workflow/WorkflowExecutionTimeline.d.ts.map +1 -1
package/workflow/WorkflowExecutionTimeline.js +1 -1
package/workflow/WorkflowExecutionTimeline.js.map +1 -1
package/workflow/execution/useWaterfallEntries.d.ts.map +1 -1
package/workflow/execution/useWaterfallEntries.js +1 -1
package/workflow/execution/useWaterfallEntries.js.map +1 -1
package/workflow/useWorkflowExecutionEventStream.d.ts +32 -4
package/workflow/useWorkflowExecutionEventStream.d.ts.map +1 -1
package/workflow/useWorkflowExecutionEventStream.js +75 -32
package/workflow/useWorkflowExecutionEventStream.js.map +1 -1
package/workflow/waterfall/WaterfallTimeline.d.ts.map +1 -1
package/workflow/waterfall/WaterfallTimeline.js +1 -1
package/workflow/waterfall/WaterfallTimeline.js.map +1 -1

package/src/session/useSessionConversation.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 "use client";
-import { useCallback, useEffect, useMemo, useState } from "react";
+import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import type { AgentExecution } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/api_pb";
 import type { PendingApproval } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/approval_pb";
 import { ApprovalAction, ExecutionPhase } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/enum_pb";
@@ -17,6 +17,7 @@ import type {
 } from "@stigmer/sdk";
 import { isTerminalPhase } from "../execution/execution-phases";
 import { useStigmer } from "../hooks";
+import { toError } from "../internal/toError";
 import { useConversationStoreRef } from "../internal/store";
 import { useCreateAgentExecution } from "../execution/useCreateAgentExecution";
 import { useExecutionStream } from "../execution/useExecutionStream";
@@ -31,6 +32,14 @@ import {
   specSkillRefsToInput,
 } from "./session-spec-converters";
+/**
+ * Cadence for re-discovering the session's executions while the live stream
+ * cannot be relied on (a created-but-not-yet-listed execution, a silent
+ * connect-timeout, or an exhausted stream error). Disabled the instant the
+ * stream is healthy or terminal, so this never competes with the live feed.
+ */
+const REDISCOVERY_POLL_INTERVAL_MS = 5_000;
 /**
  * Options for {@link UseSessionConversationReturn.sendFollowUp}.
  *
@@ -144,12 +153,29 @@ export interface UseSessionConversationReturn {
   readonly canSendFollowUp: boolean;
   /** True during the create RPC call (between submit and execution ID). */
   readonly isSending: boolean;
-  /** Error from the last sendFollowUp attempt, or null. */
+  /**
+   * Error from the last `sendFollowUp` attempt, or `null`.
+   *
+   * Covers **both** failing paths — the optional `session.update()` and the
+   * `create()` RPC — so a follow-up never fails silently. When set, the user's
+   * message is preserved (see {@link pendingUserMessage}) and can be re-sent
+   * via {@link retryLastSend}.
+   */
   readonly sendError: Error | null;
-  /** Reset `sendError` to `null`. */
+  /** Reset `sendError` to `null` (keeps the preserved pending message). */
   readonly clearSendError: () => void;
+  /**
+   * Re-send the most recent `sendFollowUp` (same message and options). No-op
+   * when nothing has been sent yet. Use as the "Retry" affordance on a failed
+   * turn; clears {@link sendError} for the new attempt.
+   */
+  readonly retryLastSend: () => void;
-  /** The user's message text, shown in the thread before the stream delivers it. */
+  /**
+   * The user's message text, shown in the thread before the stream delivers it.
+   * Retained when a send fails so the typed message is never lost — pair with
+   * {@link sendError} to render the turn as failed with a retry control.
+   */
   readonly pendingUserMessage: string | null;
   /** Current workspace entries from the session spec. Empty array when session is not loaded. */
@@ -179,7 +205,27 @@ export interface UseSessionConversationReturn {
   /** Error from session or execution list loading, or `null` when healthy. */
   readonly loadError: Error | null;
-  /** Error from the execution stream, or `null` when healthy. */
+  /**
+   * `true` while the execution stream is auto-reconnecting after a transient
+   * drop. The conversation stays visible and `streamError` remains `null` —
+   * surface a subtle "Reconnecting…" hint rather than an error banner.
+   */
+  readonly isReconnecting: boolean;
+  /**
+   * `true` when the stream opened but never delivered a first snapshot within
+   * the watchdog window (even after a silent retry) — the agent hasn't started.
+   * Distinct from `streamError`: nothing threw, the stream is simply silent.
+   * Surface an actionable "the agent hasn't started — Retry" banner wired to
+   * {@link reconnectStream}.
+   */
+  readonly connectTimedOut: boolean;
+  /**
+   * `true` when a live, non-terminal stream has been silent past the slow
+   * threshold. Purely informational ("still working — taking longer than
+   * usual"); cleared by the next update. Never an error.
+   */
+  readonly isSlow: boolean;
+  /** Error from the execution stream, or `null` when healthy or reconnecting. */
   readonly streamError: Error | null;
   /** Reset the stream error and re-establish the execution stream subscription. */
   readonly reconnectStream: () => void;
@@ -244,16 +290,24 @@ export function useSessionConversation(
     error: sessionError,
     refetch: refetchSession,
   } = useSession(sessionId);
+  // Bounded re-discovery (see REDISCOVERY_POLL_INTERVAL_MS). The gate depends on
+  // the stream below, so the decision is synced into state via an effect and fed
+  // back here on the next render — a one-frame lag that is immaterial at 5s.
+  const [rediscoveryActive, setRediscoveryActive] = useState(false);
   const {
     executions,
     isLoading: executionsLoading,
     error: executionsError,
     refetch,
-  } = useSessionExecutions(sessionId);
+  } = useSessionExecutions(sessionId, {
+    refetchInterval: rediscoveryActive ? REDISCOVERY_POLL_INTERVAL_MS : false,
+    // Re-list on app-relaunch / tab refocus so an execution that appeared while
+    // backgrounded is picked up without the user having to act.
+    refetchOnWindowFocus: true,
+  });
   const {
     create,
     isCreating,
-    error: createError,
     clearError: clearCreateError,
   } = useCreateAgentExecution();
   const { update: updateSession } = useUpdateSession();
@@ -274,6 +328,14 @@ export function useSessionConversation(
   const [pendingUserMessage, setPendingUserMessage] = useState<string | null>(
     null,
   );
+  // Dedicated send-failure state, distinct from the create hook's internal
+  // error so it can also cover the session.update() path. The last send's
+  // arguments are captured for an exact retry.
+  const [sendError, setSendError] = useState<Error | null>(null);
+  const lastSendRef = useRef<{
+    message: string;
+    options?: SendFollowUpOptions;
+  } | null>(null);
   const listActiveId = useMemo(() => {
     for (let i = executions.length - 1; i >= 0; i--) {
@@ -296,6 +358,25 @@ export function useSessionConversation(
     store: conversationStore,
   });
+  // Re-discovery gate. Poll only while the live stream cannot carry us:
+  //  • a fresh session whose first execution is created but not yet listed
+  //    (`executions.length === 0`) — the race this fix targets,
+  //  • a silent connect-timeout, or an exhausted stream error.
+  // Never while the stream is healthy (`isStreaming`) or the active execution
+  // has reached a terminal phase — the live feed is then the source of truth.
+  const streamTerminal =
+    activeExecutionId !== null && isTerminalPhase(stream.phase);
+  const needsRediscovery =
+    !stream.isStreaming &&
+    !streamTerminal &&
+    ((activeExecutionId === null && executions.length === 0) ||
+      stream.connectTimedOut ||
+      stream.error !== null);
+  useEffect(() => {
+    setRediscoveryActive(needsRediscovery);
+  }, [needsRediscovery]);
   // Clear pendingExecutionId once the execution appears in the fetched list
   useEffect(() => {
     if (
@@ -306,12 +387,17 @@ export function useSessionConversation(
     }
   }, [pendingExecutionId, executions]);
-  // Clear optimistic message once the stream delivers its first snapshot
+  // Clear the optimistic message — and any stale send error — once the stream
+  // delivers a real snapshot. This also handles recovery: if a failed send's
+  // execution is later re-discovered and streams, the failed turn resolves into
+  // the live one instead of lingering. (At send time the composer is only
+  // enabled when no execution is active, so a *fresh* failure cannot be cleared
+  // here prematurely — `stream.execution` is null then.)
   useEffect(() => {
-    if (pendingUserMessage && stream.execution) {
-      setPendingUserMessage(null);
-    }
-  }, [pendingUserMessage, stream.execution]);
+    if (!stream.execution) return;
+    if (pendingUserMessage) setPendingUserMessage(null);
+    if (sendError) setSendError(null);
+  }, [pendingUserMessage, sendError, stream.execution]);
   // Refetch executions when stream reaches a terminal phase so the
   // fetched list reflects the completed status and listActiveId clears.
@@ -366,6 +452,9 @@ export function useSessionConversation(
     async (message: string, options?: SendFollowUpOptions): Promise<void> => {
       if (!sessionId || !session) return;
+      // Capture for retry and clear any prior failure before the new attempt.
+      lastSendRef.current = { message, options };
+      setSendError(null);
       setPendingUserMessage(message);
       try {
@@ -405,7 +494,10 @@ export function useSessionConversation(
         setPendingExecutionId(result.executionId);
         refetch();
       } catch (err) {
-        setPendingUserMessage(null);
+        // Surface the failure and KEEP the user's message visible (do not clear
+        // pendingUserMessage) so the turn renders as failed-with-retry instead
+        // of vanishing. Covers both the update() and create() paths.
+        setSendError(toError(err));
         if (process.env.NODE_ENV !== "production") {
           console.error("[useSessionConversation] sendFollowUp failed:", err);
         }
@@ -414,6 +506,16 @@ export function useSessionConversation(
     [sessionId, session, org, stigmer, create, updateSession, refetch, refetchSession],
   );
+  const retryLastSend = useCallback(() => {
+    const last = lastSendRef.current;
+    if (last) void sendFollowUp(last.message, last.options);
+  }, [sendFollowUp]);
+  const clearSendError = useCallback(() => {
+    setSendError(null);
+    clearCreateError();
+  }, [clearCreateError]);
   const pendingApprovals = useMemo<readonly PendingApproval[]>(
     () => activeStreamExecution?.status?.pendingApprovals ?? [],
     [activeStreamExecution],
@@ -445,8 +547,9 @@ export function useSessionConversation(
     sendFollowUp,
     canSendFollowUp,
     isSending: isCreating,
-    sendError: createError,
-    clearSendError: clearCreateError,
+    sendError,
+    clearSendError,
+    retryLastSend,
     pendingUserMessage,
@@ -463,6 +566,9 @@ export function useSessionConversation(
     isLoading,
     loadError,
+    isReconnecting: stream.isReconnecting,
+    connectTimedOut: stream.connectTimedOut,
+    isSlow: stream.isSlow,
     streamError: stream.error,
     reconnectStream: stream.reconnect,
   };

package/src/session/useSessionExecutions.ts CHANGED Viewed

@@ -6,6 +6,23 @@ import { ListAgentExecutionsBySessionRequestSchema } from "@stigmer/protos/ai/st
 import { useStigmer } from "../hooks";
 import { useFetch } from "../internal/useFetch";
+/** Options for {@link useSessionExecutions}. */
+export interface UseSessionExecutionsOptions {
+  /**
+   * Poll interval in milliseconds for re-listing the session's executions.
+   * Used by the conversation loop to re-discover a created-but-not-yet-listed
+   * execution. Pass `false` (the default) to disable polling and rely on the
+   * live stream plus imperative {@link UseSessionExecutionsReturn.refetch}.
+   */
+  readonly refetchInterval?: number | false;
+  /**
+   * Re-list when the window regains focus / the tab becomes visible — covers
+   * the app-relaunch case where an execution may have appeared while
+   * backgrounded. Defaults to `false`.
+   */
+  readonly refetchOnWindowFocus?: boolean;
+}
 /** Return value of {@link useSessionExecutions}. */
 export interface UseSessionExecutionsReturn {
   /** All executions for the session, empty while loading or on error. */
@@ -56,6 +73,7 @@ export interface UseSessionExecutionsReturn {
  */
 export function useSessionExecutions(
   sessionId: string | null,
+  options?: UseSessionExecutionsOptions,
 ): UseSessionExecutionsReturn {
   const stigmer = useStigmer();
@@ -73,7 +91,11 @@ export function useSessionExecutions(
       : null,
     [sessionId, stigmer],
     [] as AgentExecution[],
-    { cacheKey: sessionId ? `session-executions:${sessionId}` : undefined },
+    {
+      cacheKey: sessionId ? `session-executions:${sessionId}` : undefined,
+      refetchInterval: options?.refetchInterval,
+      refetchOnWindowFocus: options?.refetchOnWindowFocus,
+    },
   );
   return { executions, isLoading, isRefetching, error, refetch };

package/src/workflow/WorkflowExecutionHeader.tsx CHANGED Viewed

@@ -82,7 +82,10 @@ export const WorkflowExecutionHeader = memo(function WorkflowExecutionHeader({
   const isRunning = RUNNING_PHASES.has(phase);
   const isPaused = phase === ExecutionPhase.EXECUTION_PAUSED;
   const isFailed = phase === ExecutionPhase.EXECUTION_FAILED;
-  const isLive = streamState.stage === "streaming" || streamState.stage === "connecting";
+  const isLive =
+    streamState.stage === "streaming" ||
+    streamState.stage === "connecting" ||
+    streamState.stage === "reconnecting";
   return (
     <header className={cn("flex items-center gap-3 border-b border-border px-4 py-3", className)}>

package/src/workflow/WorkflowExecutionTimeline.tsx CHANGED Viewed

@@ -77,7 +77,8 @@ export const WorkflowExecutionTimeline = memo(function WorkflowExecutionTimeline
     }
   }, [events.length]);
-  const isLive = streamState.stage === "streaming";
+  const isLive =
+    streamState.stage === "streaming" || streamState.stage === "reconnecting";
   const isConnecting = streamState.stage === "connecting";
   const isComplete = streamState.stage === "complete";
   const isError = streamState.stage === "error";

package/src/workflow/__tests__/useWorkflowExecutionEventStream.test.tsx CHANGED Viewed

@@ -1,5 +1,5 @@
 import { describe, it, expect, vi, beforeEach } from "vitest";
-import { renderHook, act } from "@testing-library/react";
+import { renderHook, act, waitFor } from "@testing-library/react";
 import type { ReactNode } from "react";
 import { createElement } from "react";
 import { create } from "@bufbuild/protobuf";
@@ -417,3 +417,119 @@ describe("useWorkflowExecutionEventStream", () => {
     expect(result.current.events[0]?.taskName).toBe("beta");
   });
 });
+// ---------------------------------------------------------------------------
+// Auto-reconnect (#174)
+// ---------------------------------------------------------------------------
+describe("useWorkflowExecutionEventStream — auto-reconnect", () => {
+  it("auto-reconnects on a transient drop and resumes from the latest sequence", async () => {
+    let call = 0;
+    const subscribeEvents = vi.fn((_req: any) => {
+      call += 1;
+      if (call === 1) {
+        return (async function* () {
+          yield makeTaskStartedEvent(5, "t5");
+          throw new TypeError("Load failed");
+        })();
+      }
+      return (async function* () {
+        yield makeTaskStartedEvent(6, "t6");
+      })();
+    });
+    const client = makeMockClient({ subscribeEvents });
+    const { result } = renderHook(
+      () =>
+        useWorkflowExecutionEventStream("wex-001", {
+          executionPhase: PHASE.IN_PROGRESS,
+          reconnectOptions: { baseDelayMs: 5, maxDelayMs: 5 },
+        }),
+      { wrapper: createWrapper(client) },
+    );
+    await waitFor(() => expect(subscribeEvents).toHaveBeenCalledTimes(2));
+    await waitFor(() => expect(result.current.events).toHaveLength(2));
+    // The resumed subscription continues after the last received sequence (5),
+    // so no events are lost or duplicated.
+    const secondReq = (subscribeEvents.mock.calls as unknown[][])[1]?.[0] as {
+      afterSequence: bigint;
+    };
+    expect(secondReq.afterSequence).toBe(BigInt(5));
+    expect(result.current.error).toBeNull();
+  });
+  it("surfaces an error after exhausting reconnect attempts", async () => {
+    const subscribeEvents = vi.fn(
+      () =>
+        (async function* () {
+          throw new TypeError("Load failed");
+        })(),
+    );
+    const client = makeMockClient({ subscribeEvents });
+    const { result } = renderHook(
+      () =>
+        useWorkflowExecutionEventStream("wex-001", {
+          executionPhase: PHASE.IN_PROGRESS,
+          reconnectOptions: { baseDelayMs: 1, maxDelayMs: 1, maxAttempts: 2 },
+        }),
+      { wrapper: createWrapper(client) },
+    );
+    await waitFor(() => expect(result.current.error).not.toBeNull(), {
+      timeout: 2000,
+    });
+    expect(result.current.streamState.stage).toBe("error");
+    // 1 initial attempt + 2 retries.
+    expect(subscribeEvents).toHaveBeenCalledTimes(3);
+  });
+  it("marks unsupported on UNIMPLEMENTED without retrying", async () => {
+    const subscribeEvents = vi.fn(
+      () =>
+        (async function* () {
+          throw new Error("UNIMPLEMENTED: event streaming not supported");
+        })(),
+    );
+    const client = makeMockClient({ subscribeEvents });
+    const { result } = renderHook(
+      () =>
+        useWorkflowExecutionEventStream("wex-001", {
+          executionPhase: PHASE.IN_PROGRESS,
+          reconnectOptions: { baseDelayMs: 1, maxDelayMs: 1 },
+        }),
+      { wrapper: createWrapper(client) },
+    );
+    await waitFor(() =>
+      expect(result.current.streamState.stage).toBe("unsupported"),
+    );
+    await new Promise((r) => setTimeout(r, 20));
+    expect(subscribeEvents).toHaveBeenCalledTimes(1);
+  });
+  it("treats a clean stream end as completion (never a reconnect loop)", async () => {
+    const subscribeEvents = vi.fn(async function* () {
+      /* no events, then clean end */
+    });
+    const client = makeMockClient({ subscribeEvents });
+    const { result } = renderHook(
+      () =>
+        useWorkflowExecutionEventStream("wex-001", {
+          executionPhase: PHASE.IN_PROGRESS,
+          reconnectOptions: { baseDelayMs: 1, maxDelayMs: 1 },
+        }),
+      { wrapper: createWrapper(client) },
+    );
+    await waitFor(() =>
+      expect(result.current.streamState.stage).toBe("complete"),
+    );
+    await new Promise((r) => setTimeout(r, 20));
+    expect(subscribeEvents).toHaveBeenCalledTimes(1);
+  });
+});

package/src/workflow/execution/useWaterfallEntries.ts CHANGED Viewed

@@ -61,7 +61,8 @@ export function useWaterfallEntries({
   executionStartIso,
   executionDurationMs,
 }: UseWaterfallEntriesOptions): UseWaterfallEntriesReturn {
-  const isLive = streamState.stage === "streaming";
+  const isLive =
+    streamState.stage === "streaming" || streamState.stage === "reconnecting";
   const execStartEpoch = useMemo(
     () => (executionStartIso ? new Date(executionStartIso).getTime() : 0),
     [executionStartIso],

package/src/workflow/useWorkflowExecutionEventStream.ts CHANGED Viewed

@@ -16,8 +16,15 @@ import {
   SubscribeEventsRequestSchema,
 } from "@stigmer/protos/ai/stigmer/agentic/workflowexecution/v1/io_pb";
 import { ExecutionPhase } from "@stigmer/protos/ai/stigmer/agentic/workflowexecution/v1/enum_pb";
+import { isTransientStreamError } from "@stigmer/sdk";
 import { useStigmer } from "../hooks";
 import { toError } from "../internal/toError";
+import {
+  computeBackoffDelay,
+  sleep,
+  DEFAULT_RECONNECT_MAX_ATTEMPTS,
+  type BackoffOptions,
+} from "../internal/backoff";
 import {
   WorkflowExecutionEventStore,
   type WorkflowEventStreamState,
@@ -41,6 +48,20 @@ export interface UseWorkflowExecutionEventStreamOptions {
    * (terminal). When omitted, defaults to live streaming.
    */
   readonly executionPhase?: ExecutionPhase;
+  /**
+   * Automatically re-establish the live subscription with exponential
+   * backoff when it drops with a transient transport error, resuming from
+   * the last received `sequence_number` (no events lost). Defaults to `true`.
+   */
+  readonly autoReconnect?: boolean;
+  /**
+   * Tune the auto-reconnect backoff schedule and attempt cap. Omitted fields
+   * fall back to SDK defaults (base 1s, ×2, max 30s, 10 attempts).
+   */
+  readonly reconnectOptions?: BackoffOptions & {
+    /** Max attempts before surfacing a terminal `error`. */
+    readonly maxAttempts?: number;
+  };
 }
 /** Return value of {@link useWorkflowExecutionEventStream}. */
@@ -59,9 +80,20 @@ export interface UseWorkflowExecutionEventStreamReturn {
   readonly isStreaming: boolean;
   /** `true` while connecting to the event stream. */
   readonly isConnecting: boolean;
-  /** Error from the last failed stream attempt, or `null`. */
+  /**
+   * `true` while a transient drop is being retried automatically. Accumulated
+   * events stay visible and `error` remains `null`; on success the
+   * subscription resumes from the last sequence number with no events lost.
+   */
+  readonly isReconnecting: boolean;
+  /** 1-based count of the in-flight reconnect attempt; `0` when not reconnecting. */
+  readonly reconnectAttempt: number;
+  /**
+   * Error from the last failed stream attempt, or `null`. Set only once
+   * auto-reconnect exhausts its attempts (or for a non-transient failure).
+   */
   readonly error: Error | null;
-  /** Re-establish the stream subscription. */
+  /** Re-establish the stream subscription (manual fallback). */
   readonly reconnect: () => void;
 }
@@ -99,8 +131,10 @@ export function isRecoveryTransition(
  * integration.
  *
  * For running executions: subscribes via `subscribeEvents` with
- * replay+live-tail. On disconnect, reconnects from the last received
- * sequence number.
+ * replay+live-tail. A transient drop auto-reconnects with exponential
+ * backoff, resuming from the last received sequence number so no events are
+ * lost; `error` is surfaced only once retries are exhausted. A clean stream
+ * end is the server's completion signal and is never retried.
  *
  * For terminal executions: loads the full event log via paginated
  * `getEventLog` calls.
@@ -136,6 +170,8 @@ export function useWorkflowExecutionEventStream(
   const eventTypes = options?.eventTypes;
   const executionPhase = options?.executionPhase;
+  const autoReconnect = options?.autoReconnect ?? true;
+  const reconnectOptions = options?.reconnectOptions;
   // Stable ref for values that should not trigger re-subscription
   const storeRef = useRef(store);
@@ -239,50 +275,90 @@ export function useWorkflowExecutionEventStream(
         }
       })();
     } else {
-      // Live-stream events for running executions
+      // Live-stream events for running executions, with auto-reconnect.
       currentStore.setStreamState({ stage: "connecting", executionId });
       (async () => {
-        try {
-          const afterSequence = currentStore.getLatestSequence();
-          for await (const event of stigmer.workflowExecution.subscribeEvents(
-            create(SubscribeEventsRequestSchema, {
-              executionId,
-              afterSequence,
-              eventTypes: eventTypes ? [...eventTypes] : [],
-            }),
-            abortController.signal,
-          )) {
-            if (abortController.signal.aborted) return;
-            startTransition(() => {
-              currentStore.appendEvents([event]);
-              if (currentStore.getStreamState().stage === "connecting") {
-                currentStore.setStreamState({ stage: "streaming", executionId });
-              }
-            });
-          }
-          if (!abortController.signal.aborted) {
-            currentStore.setStreamState({ stage: "complete", executionId });
-          }
-        } catch (err) {
-          if (abortController.signal.aborted) return;
-          const error = toError(err);
-          const isUnimplemented =
-            error.message.includes("UNIMPLEMENTED") ||
-            error.message.includes("unimplemented");
-          if (isUnimplemented) {
-            currentStore.setStreamState({ stage: "unsupported", executionId });
-          } else {
+        const signal = abortController.signal;
+        const maxAttempts =
+          reconnectOptions?.maxAttempts ?? DEFAULT_RECONNECT_MAX_ATTEMPTS;
+        // 1-based count of consecutive failed attempts, reset by any event.
+        let attempt = 0;
+        while (!signal.aborted) {
+          try {
+            // Re-read each attempt: after a drop we resume from the last
+            // sequence number, so the server replays only what we missed and
+            // no events are lost or duplicated.
+            const afterSequence = currentStore.getLatestSequence();
+            for await (const event of stigmer.workflowExecution.subscribeEvents(
+              create(SubscribeEventsRequestSchema, {
+                executionId,
+                afterSequence,
+                eventTypes: eventTypes ? [...eventTypes] : [],
+              }),
+              signal,
+            )) {
+              if (signal.aborted) return;
+              attempt = 0; // an event proves the connection is healthy
+              startTransition(() => {
+                currentStore.appendEvents([event]);
+                const stage = currentStore.getStreamState().stage;
+                if (stage === "connecting" || stage === "reconnecting") {
+                  currentStore.setStreamState({ stage: "streaming", executionId });
+                }
+              });
+            }
+            // A clean end of the event stream is the server's completion
+            // signal (the execution finished). Unlike the agent snapshot
+            // stream, there is no separate terminal marker to re-check, so we
+            // must NOT treat this as a premature drop — doing so would loop
+            // forever re-subscribing past the final sequence. Transient drops
+            // surface as thrown errors (handled below), not a clean end.
+            if (!signal.aborted) {
+              currentStore.setStreamState({ stage: "complete", executionId });
+            }
+            return;
+          } catch (err) {
+            if (signal.aborted) return;
+            const error = toError(err);
+            const isUnimplemented =
+              error.message.includes("UNIMPLEMENTED") ||
+              error.message.includes("unimplemented");
+            // A server without event-stream support will never recover —
+            // surface the unsupported state immediately, never retry.
+            if (isUnimplemented) {
+              currentStore.setStreamState({ stage: "unsupported", executionId });
+              return;
+            }
+            if (
+              !autoReconnect ||
+              !isTransientStreamError(error) ||
+              attempt >= maxAttempts
+            ) {
+              currentStore.setStreamState({ stage: "error", executionId, error });
+              return;
+            }
+            attempt += 1;
             currentStore.setStreamState({
-              stage: "error",
+              stage: "reconnecting",
               executionId,
+              attempt,
               error,
             });
+            try {
+              await sleep(computeBackoffDelay(attempt, reconnectOptions), signal);
+            } catch {
+              return; // aborted mid-backoff
+            }
           }
         }
       })();
@@ -302,6 +378,9 @@ export function useWorkflowExecutionEventStream(
   const isStreaming = streamState.stage === "streaming";
   const isConnecting = streamState.stage === "connecting";
+  const isReconnecting = streamState.stage === "reconnecting";
+  const reconnectAttempt =
+    streamState.stage === "reconnecting" ? streamState.attempt : 0;
   const error = streamState.stage === "error" ? streamState.error : null;
   return {
@@ -312,6 +391,8 @@ export function useWorkflowExecutionEventStream(
     totalTasks,
     isStreaming,
     isConnecting,
+    isReconnecting,
+    reconnectAttempt,
     error,
     reconnect,
   };