npm - @ryanfw/prompt-orchestration-pipeline - Versions diffs - 1.2.2 → 1.2.3 - Mend

@ryanfw/prompt-orchestration-pipeline 1.2.2 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/package.json +1 -1
package/src/core/config.ts +1 -1
package/src/llm/index.ts +14 -10
package/src/providers/__tests__/alibaba.test.ts +152 -59
package/src/providers/__tests__/anthropic.test.ts +145 -55
package/src/providers/__tests__/claude-code.test.ts +68 -34
package/src/providers/__tests__/deepseek.test.ts +229 -269
package/src/providers/__tests__/gemini.test.ts +145 -100
package/src/providers/__tests__/moonshot.test.ts +140 -121
package/src/providers/__tests__/openai.test.ts +162 -55
package/src/providers/__tests__/stream-accumulator.test.ts +512 -0
package/src/providers/__tests__/zhipu.test.ts +153 -95
package/src/providers/alibaba.ts +138 -18
package/src/providers/anthropic.ts +75 -69
package/src/providers/base.ts +1 -1
package/src/providers/claude-code.ts +21 -12
package/src/providers/deepseek.ts +22 -156
package/src/providers/gemini.ts +10 -24
package/src/providers/moonshot.ts +19 -18
package/src/providers/openai.ts +180 -5
package/src/providers/stream-accumulator.ts +435 -0
package/src/providers/types.ts +1 -0
package/src/providers/zhipu.ts +19 -17

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ryanfw/prompt-orchestration-pipeline",
-  "version": "1.2.2",
+  "version": "1.2.3",
   "description": "A Prompt-orchestration pipeline (POP) is a framework for building, running, and experimenting with complex chains of LLM tasks.",
   "type": "module",
   "main": "src/ui/server/index.ts",

package/src/core/config.ts CHANGED Viewed

@@ -94,7 +94,7 @@ export const defaultConfig = {
   taskRunner: {
     maxRefinementAttempts: 3,
     stageTimeout: 300000,
-    llmRequestTimeout: 120000,
+    llmRequestTimeout: 3600000,
   },
   llm: {
     defaultProvider: "openai",

package/src/llm/index.ts CHANGED Viewed

@@ -92,43 +92,43 @@ function inferJsonFormat(options: ChatOptions): ChatOptions {
 async function callAdapter(
   options: ChatOptions,
 ): Promise<AdapterResponse> {
-  const { provider, messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries } = options;
+  const { provider, messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs } = options;
   switch (provider) {
     case "alibaba":
       return alibabaChat({
-        messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries,
+        messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs,
         frequencyPenalty: options.frequencyPenalty,
         presencePenalty: options.presencePenalty,
       });
     case "anthropic":
-      return anthropicChat({ messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries });
+      return anthropicChat({ messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs });
     case "openai":
       return openaiChat({
-        messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries,
+        messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs,
         seed: undefined,
         frequencyPenalty: options.frequencyPenalty,
         presencePenalty: options.presencePenalty,
       });
     case "gemini":
       return geminiChat({
-        messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries,
+        messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs,
         frequencyPenalty: options.frequencyPenalty,
         presencePenalty: options.presencePenalty,
       });
     case "deepseek":
       return deepseekChat({
-        messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries,
+        messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs,
         frequencyPenalty: options.frequencyPenalty,
         presencePenalty: options.presencePenalty,
       });
     case "moonshot":
-      return moonshotChat({ messages, model, maxTokens, responseFormat, maxRetries });
+      return moonshotChat({ messages, model, maxTokens, responseFormat, maxRetries, requestTimeoutMs });
     case "zai":
     case "zhipu":
-      return zaiChat({ messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries });
+      return zaiChat({ messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs });
     case "claudecode":
-      return claudeCodeChat({ messages, model, maxTokens, responseFormat, maxRetries });
+      return claudeCodeChat({ messages, model, maxTokens, responseFormat, maxRetries, requestTimeoutMs });
     case "mock": {
       if (!mockProvider) {
         throw new Error("No mock provider registered. Call registerMockProvider() first.");
@@ -180,7 +180,11 @@ async function writeDebugLog(options: ChatOptions, response: ChatResponse): Prom
 export async function chat(options: ChatOptions): Promise<ChatResponse> {
   ensureMessagesPresent(options.messages, options.provider);
-  const opts = inferJsonFormat(options);
+  const configTimeout = getConfig().taskRunner.llmRequestTimeout;
+  const opts = inferJsonFormat({
+    ...options,
+    requestTimeoutMs: options.requestTimeoutMs ?? configTimeout,
+  });
   const id = `llm-${++requestCounter}-${Date.now()}`;
   const model = opts.model ?? "";
   const startTime = Date.now();

package/src/providers/__tests__/alibaba.test.ts CHANGED Viewed

@@ -4,25 +4,66 @@ import { ProviderJsonParseError } from "../types.ts";
 import type { AlibabaOptions } from "../types.ts";
 import type { Mock } from "vitest";
-function makeAlibabaResponse(
-  content: string,
-  promptTokens = 10,
-  completionTokens = 20,
-) {
-  return {
-    choices: [{ message: { content } }],
-    usage: {
-      prompt_tokens: promptTokens,
-      completion_tokens: completionTokens,
-      total_tokens: promptTokens + completionTokens,
+/**
+ * Creates a mock ReadableStream that yields SSE-formatted data.
+ */
+function makeSSEStream(events: string[]): ReadableStream<Uint8Array> {
+  const encoder = new TextEncoder();
+  const chunks = events.map((e) => encoder.encode(e));
+  let index = 0;
+  return new ReadableStream({
+    pull(controller) {
+      if (index < chunks.length) {
+        controller.enqueue(chunks[index]!);
+        index++;
+      } else {
+        controller.close();
+      }
     },
-  };
+  });
+}
+/**
+ * Builds SSE events for an OpenAI-compatible streaming response.
+ */
+function makeOpenAiSseEvents(
+  textChunks: string[],
+  usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number },
+): string[] {
+  const events: string[] = [];
+  for (const chunk of textChunks) {
+    events.push(
+      `data: ${JSON.stringify({ choices: [{ delta: { content: chunk } }] })}\n\n`,
+    );
+  }
+  if (usage) {
+    events.push(
+      `data: ${JSON.stringify({ choices: [{ delta: {}, finish_reason: "stop" }], usage })}\n\n`,
+    );
+  } else {
+    events.push(
+      `data: ${JSON.stringify({ choices: [{ delta: {}, finish_reason: "stop" }] })}\n\n`,
+    );
+  }
+  events.push("data: [DONE]\n\n");
+  return events;
 }
-function mockFetchResponse(body: unknown, status = 200) {
+function mockStreamingResponse(events: string[], status = 200) {
   return {
     ok: status >= 200 && status < 300,
     status,
+    body: makeSSEStream(events),
+    json: vi.fn(),
+    text: vi.fn(),
+  } as unknown as Response;
+}
+function mockErrorResponse(body: unknown, status: number) {
+  return {
+    ok: false,
+    status,
     json: vi.fn().mockResolvedValue(body),
     text: vi.fn().mockResolvedValue(JSON.stringify(body)),
   } as unknown as Response;
@@ -56,11 +97,11 @@ describe("alibabaChat", () => {
   it("returns parsed JSON content with usage on success", async () => {
     const jsonPayload = { result: "success", count: 42 };
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(
-        makeAlibabaResponse(JSON.stringify(jsonPayload), 15, 25),
-      ),
+    const events = makeOpenAiSseEvents(
+      [JSON.stringify(jsonPayload)],
+      { prompt_tokens: 15, completion_tokens: 25, total_tokens: 40 },
     );
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     const result = await alibabaChat(baseOptions);
@@ -70,14 +111,25 @@ describe("alibabaChat", () => {
       completion_tokens: 25,
       total_tokens: 40,
     });
-    expect(result.raw).toBeDefined();
+  });
+  it("sends stream: true and stream_options in request body", async () => {
+    const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
+    await alibabaChat(baseOptions);
+    const body = JSON.parse(
+      (fetchMock.mock.calls[0] as [string, RequestInit])[1].body as string,
+    );
+    expect(body.stream).toBe(true);
+    expect(body.stream_options).toEqual({ include_usage: true });
   });
   it("throws ProviderJsonParseError on invalid JSON when responseFormat is json_object", async () => {
     const nonJsonText = "This is plain text, not JSON at all.";
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAlibabaResponse(nonJsonText)),
-    );
+    const events = makeOpenAiSseEvents([nonJsonText]);
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     try {
       await alibabaChat(baseOptions);
@@ -93,18 +145,15 @@ describe("alibabaChat", () => {
   it("retries on HTTP 500 with exponential backoff", async () => {
     const jsonPayload = { retried: true };
+    const events = makeOpenAiSseEvents([JSON.stringify(jsonPayload)]);
     fetchMock
       .mockResolvedValueOnce(
-        mockFetchResponse({ error: { message: "Server error" } }, 500),
+        mockErrorResponse({ error: { message: "Server error" } }, 500),
       )
       .mockResolvedValueOnce(
-        mockFetchResponse({ error: { message: "Server error" } }, 500),
+        mockErrorResponse({ error: { message: "Server error" } }, 500),
       )
-      .mockResolvedValueOnce(
-        mockFetchResponse(
-          makeAlibabaResponse(JSON.stringify(jsonPayload)),
-        ),
-      );
+      .mockResolvedValueOnce(mockStreamingResponse(events));
     const result = await alibabaChat({ ...baseOptions, maxRetries: 3 });
@@ -114,7 +163,7 @@ describe("alibabaChat", () => {
   it("does NOT retry on HTTP 401", async () => {
     fetchMock.mockResolvedValue(
-      mockFetchResponse({ error: { message: "Unauthorized" } }, 401),
+      mockErrorResponse({ error: { message: "Unauthorized" } }, 401),
     );
     await expect(
@@ -126,10 +175,8 @@ describe("alibabaChat", () => {
   it("uses ALIBABA_BASE_URL env var when set", async () => {
     process.env["ALIBABA_BASE_URL"] = "https://custom.api.example.com";
-    const jsonPayload = { ok: true };
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAlibabaResponse(JSON.stringify(jsonPayload))),
-    );
+    const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     await alibabaChat(baseOptions);
@@ -140,10 +187,8 @@ describe("alibabaChat", () => {
   });
   it("passes frequencyPenalty and presencePenalty in request body", async () => {
-    const jsonPayload = { ok: true };
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAlibabaResponse(JSON.stringify(jsonPayload))),
-    );
+    const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     await alibabaChat({
       ...baseOptions,
@@ -158,10 +203,9 @@ describe("alibabaChat", () => {
     expect(body.presence_penalty).toBe(0.2);
   });
-  it("passes an AbortSignal to fetch", async () => {
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAlibabaResponse(JSON.stringify({ ok: true }))),
-    );
+  it("passes an AbortSignal to fetch (IdleTimeoutController)", async () => {
+    const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     await alibabaChat(baseOptions);
@@ -169,22 +213,9 @@ describe("alibabaChat", () => {
     expect(init.signal).toBeInstanceOf(AbortSignal);
   });
-  it("uses custom requestTimeoutMs for the abort signal", async () => {
-    const timeoutSpy = vi.spyOn(AbortSignal, "timeout");
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAlibabaResponse(JSON.stringify({ ok: true }))),
-    );
-    await alibabaChat({ ...baseOptions, requestTimeoutMs: 5000 });
-    expect(timeoutSpy).toHaveBeenCalledWith(5000);
-    timeoutSpy.mockRestore();
-  });
   it("sends enable_thinking true by default", async () => {
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAlibabaResponse(JSON.stringify({ ok: true }))),
-    );
+    const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     await alibabaChat(baseOptions);
@@ -195,9 +226,8 @@ describe("alibabaChat", () => {
   });
   it("sends enable_thinking false when thinking is disabled", async () => {
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAlibabaResponse(JSON.stringify({ ok: true }))),
-    );
+    const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     await alibabaChat({ ...baseOptions, thinking: "disabled" });
@@ -206,4 +236,67 @@ describe("alibabaChat", () => {
     );
     expect(body.enable_thinking).toBe(false);
   });
+  describe("streaming accumulation", () => {
+    it("accumulates text across multiple SSE chunks", async () => {
+      const events = [
+        'data: {"choices":[{"delta":{"content":"{\\"he"}}]}\n\n',
+        'data: {"choices":[{"delta":{"content":"llo\\":\\"world\\"}"}}]}\n\n',
+        'data: {"choices":[{"delta":{},"finish_reason":"stop"}]}\n\n',
+        "data: [DONE]\n\n",
+      ];
+      fetchMock.mockResolvedValue(mockStreamingResponse(events));
+      const result = await alibabaChat(baseOptions);
+      expect(result.content).toEqual({ hello: "world" });
+    });
+    it("captures usage from the final streaming chunk", async () => {
+      const events = makeOpenAiSseEvents(
+        [JSON.stringify({ ok: true })],
+        { prompt_tokens: 50, completion_tokens: 30, total_tokens: 80 },
+      );
+      fetchMock.mockResolvedValue(mockStreamingResponse(events));
+      const result = await alibabaChat(baseOptions);
+      expect(result.usage).toEqual({
+        prompt_tokens: 50,
+        completion_tokens: 30,
+        total_tokens: 80,
+      });
+    });
+    it("defaults usage to zeros when stream provides no usage", async () => {
+      const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
+      fetchMock.mockResolvedValue(mockStreamingResponse(events));
+      const result = await alibabaChat(baseOptions);
+      expect(result.usage).toEqual({
+        prompt_tokens: 0,
+        completion_tokens: 0,
+        total_tokens: 0,
+      });
+    });
+    it("retries on timeout then succeeds on second attempt", async () => {
+      const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
+      fetchMock
+        .mockRejectedValueOnce(
+          new DOMException("signal timed out", "TimeoutError"),
+        )
+        .mockResolvedValueOnce(mockStreamingResponse(events));
+      const result = await alibabaChat({
+        ...baseOptions,
+        maxRetries: 1,
+      });
+      expect(fetchMock).toHaveBeenCalledTimes(2);
+      expect(result.content).toEqual({ ok: true });
+    });
+  });
 });

package/src/providers/__tests__/anthropic.test.ts CHANGED Viewed

@@ -4,21 +4,67 @@ import { ProviderJsonModeError, ProviderJsonParseError } from "../types.ts";
 import type { AnthropicOptions } from "../types.ts";
 import type { Mock } from "vitest";
-function makeAnthropicResponse(
+/**
+ * Creates a mock ReadableStream that yields SSE-formatted data.
+ */
+function makeSSEStream(events: string[]): ReadableStream<Uint8Array> {
+  const encoder = new TextEncoder();
+  const chunks = events.map((e) => encoder.encode(e));
+  let index = 0;
+  return new ReadableStream({
+    pull(controller) {
+      if (index < chunks.length) {
+        controller.enqueue(chunks[index]!);
+        index++;
+      } else {
+        controller.close();
+      }
+    },
+  });
+}
+/**
+ * Builds SSE events for an Anthropic streaming response.
+ */
+function makeAnthropicSseEvents(
   text: string,
   inputTokens = 10,
   outputTokens = 20,
-) {
-  return {
-    content: [{ type: "text", text }],
-    usage: { input_tokens: inputTokens, output_tokens: outputTokens },
-  };
+): string[] {
+  return [
+    `event: message_start\ndata: ${JSON.stringify({
+      type: "message_start",
+      message: { usage: { input_tokens: inputTokens } },
+    })}\n\n`,
+    `event: content_block_delta\ndata: ${JSON.stringify({
+      type: "content_block_delta",
+      delta: { text },
+    })}\n\n`,
+    `event: message_delta\ndata: ${JSON.stringify({
+      type: "message_delta",
+      usage: { output_tokens: outputTokens },
+    })}\n\n`,
+    `event: message_stop\ndata: ${JSON.stringify({
+      type: "message_stop",
+    })}\n\n`,
+  ];
 }
-function mockFetchResponse(body: unknown, status = 200) {
+function mockStreamingResponse(events: string[], status = 200) {
   return {
     ok: status >= 200 && status < 300,
     status,
+    body: makeSSEStream(events),
+    json: vi.fn(),
+    text: vi.fn(),
+  } as unknown as Response;
+}
+function mockErrorResponse(body: unknown, status: number) {
+  return {
+    ok: false,
+    status,
     json: vi.fn().mockResolvedValue(body),
     text: vi.fn().mockResolvedValue(JSON.stringify(body)),
   } as unknown as Response;
@@ -51,9 +97,12 @@ describe("anthropicChat", () => {
   it("returns parsed JSON content, correct usage, and text for a valid response", async () => {
     const jsonPayload = { result: "success", count: 42 };
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAnthropicResponse(JSON.stringify(jsonPayload), 15, 25)),
+    const events = makeAnthropicSseEvents(
+      JSON.stringify(jsonPayload),
+      15,
+      25,
     );
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     const result = await anthropicChat(baseOptions);
@@ -64,12 +113,11 @@ describe("anthropicChat", () => {
       completion_tokens: 25,
       total_tokens: 40,
     });
-    expect(result.raw).toBeDefined();
   });
   it("throws immediately on 401 without retrying", async () => {
     fetchMock.mockResolvedValue(
-      mockFetchResponse({ error: { message: "Unauthorized" } }, 401),
+      mockErrorResponse({ error: { message: "Unauthorized" } }, 401),
     );
     await expect(
@@ -82,13 +130,12 @@ describe("anthropicChat", () => {
   it("retries on 429 then succeeds on 200", async () => {
     const jsonPayload = { retried: true };
+    const events = makeAnthropicSseEvents(JSON.stringify(jsonPayload));
     fetchMock
       .mockResolvedValueOnce(
-        mockFetchResponse({ error: { message: "Rate limited" } }, 429),
+        mockErrorResponse({ error: { message: "Rate limited" } }, 429),
       )
-      .mockResolvedValueOnce(
-        mockFetchResponse(makeAnthropicResponse(JSON.stringify(jsonPayload))),
-      );
+      .mockResolvedValueOnce(mockStreamingResponse(events));
     const result = await anthropicChat({
       ...baseOptions,
@@ -110,9 +157,8 @@ describe("anthropicChat", () => {
   it("defaults to json responseFormat when responseFormat is omitted", async () => {
     const jsonPayload = { defaultFormat: true };
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAnthropicResponse(JSON.stringify(jsonPayload))),
-    );
+    const events = makeAnthropicSseEvents(JSON.stringify(jsonPayload));
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     // responseFormat defaults to "json" — should not throw
     const result = await anthropicChat({
@@ -123,9 +169,8 @@ describe("anthropicChat", () => {
   it("throws ProviderJsonParseError for non-JSON text in JSON mode", async () => {
     const nonJsonText = "This is plain text, not JSON at all.";
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAnthropicResponse(nonJsonText)),
-    );
+    const events = makeAnthropicSseEvents(nonJsonText);
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     try {
       await anthropicChat(baseOptions);
@@ -140,10 +185,8 @@ describe("anthropicChat", () => {
   });
   it("sends correct headers including anthropic-version and x-api-key", async () => {
-    const jsonPayload = { ok: true };
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAnthropicResponse(JSON.stringify(jsonPayload))),
-    );
+    const events = makeAnthropicSseEvents(JSON.stringify({ ok: true }));
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     await anthropicChat(baseOptions);
@@ -155,11 +198,21 @@ describe("anthropicChat", () => {
     expect(headers["Content-Type"]).toBe("application/json");
   });
-  it("constructs the request body with system and messages in conversation order", async () => {
-    const jsonPayload = { ok: true };
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAnthropicResponse(JSON.stringify(jsonPayload))),
+  it("sends stream: true in the request body", async () => {
+    const events = makeAnthropicSseEvents(JSON.stringify({ ok: true }));
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
+    await anthropicChat(baseOptions);
+    const body = JSON.parse(
+      (fetchMock.mock.calls[0] as [string, RequestInit])[1].body as string,
     );
+    expect(body.stream).toBe(true);
+  });
+  it("constructs the request body with system and messages in conversation order", async () => {
+    const events = makeAnthropicSseEvents(JSON.stringify({ ok: true }));
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     await anthropicChat({
       messages: [
@@ -186,10 +239,8 @@ describe("anthropicChat", () => {
   });
   it("uses custom model, temperature, and maxTokens when provided", async () => {
-    const jsonPayload = { custom: true };
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAnthropicResponse(JSON.stringify(jsonPayload))),
-    );
+    const events = makeAnthropicSseEvents(JSON.stringify({ custom: true }));
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     await anthropicChat({
       ...baseOptions,
@@ -207,9 +258,8 @@ describe("anthropicChat", () => {
   });
   it("passes topP and stop sequences when provided", async () => {
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAnthropicResponse(JSON.stringify({ ok: true }))),
-    );
+    const events = makeAnthropicSseEvents(JSON.stringify({ ok: true }));
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     await anthropicChat({
       ...baseOptions,
@@ -224,10 +274,9 @@ describe("anthropicChat", () => {
     expect(body.stop_sequences).toEqual(["END", "STOP"]);
   });
-  it("passes an AbortSignal to fetch", async () => {
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAnthropicResponse(JSON.stringify({ ok: true }))),
-    );
+  it("passes an AbortSignal to fetch (IdleTimeoutController)", async () => {
+    const events = makeAnthropicSseEvents(JSON.stringify({ ok: true }));
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     await anthropicChat(baseOptions);
@@ -235,25 +284,66 @@ describe("anthropicChat", () => {
     expect(init.signal).toBeInstanceOf(AbortSignal);
   });
-  it("uses custom requestTimeoutMs for the abort signal", async () => {
-    const timeoutSpy = vi.spyOn(AbortSignal, "timeout");
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAnthropicResponse(JSON.stringify({ ok: true }))),
-    );
-    await anthropicChat({ ...baseOptions, requestTimeoutMs: 5000 });
-    expect(timeoutSpy).toHaveBeenCalledWith(5000);
-    timeoutSpy.mockRestore();
-  });
   it("handles markdown-fenced JSON responses", async () => {
     const fencedJson = '```json\n{"fenced": true}\n```';
-    fetchMock.mockResolvedValue(
-      mockFetchResponse(makeAnthropicResponse(fencedJson)),
-    );
+    const events = makeAnthropicSseEvents(fencedJson);
+    fetchMock.mockResolvedValue(mockStreamingResponse(events));
     const result = await anthropicChat(baseOptions);
     expect(result.content).toEqual({ fenced: true });
   });
+  describe("streaming accumulation", () => {
+    it("accumulates text across multiple content_block_delta events", async () => {
+      const events = [
+        `event: message_start\ndata: ${JSON.stringify({
+          type: "message_start",
+          message: { usage: { input_tokens: 10 } },
+        })}\n\n`,
+        `event: content_block_delta\ndata: ${JSON.stringify({
+          type: "content_block_delta",
+          delta: { text: '{"he' },
+        })}\n\n`,
+        `event: content_block_delta\ndata: ${JSON.stringify({
+          type: "content_block_delta",
+          delta: { text: 'llo":"world"}' },
+        })}\n\n`,
+        `event: message_delta\ndata: ${JSON.stringify({
+          type: "message_delta",
+          usage: { output_tokens: 5 },
+        })}\n\n`,
+        `event: message_stop\ndata: ${JSON.stringify({
+          type: "message_stop",
+        })}\n\n`,
+      ];
+      fetchMock.mockResolvedValue(mockStreamingResponse(events));
+      const result = await anthropicChat(baseOptions);
+      expect(result.content).toEqual({ hello: "world" });
+      expect(result.usage).toEqual({
+        prompt_tokens: 10,
+        completion_tokens: 5,
+        total_tokens: 15,
+      });
+    });
+    it("retries on timeout then succeeds on second attempt", async () => {
+      const events = makeAnthropicSseEvents(JSON.stringify({ ok: true }));
+      fetchMock
+        .mockRejectedValueOnce(
+          new DOMException("signal timed out", "TimeoutError"),
+        )
+        .mockResolvedValueOnce(mockStreamingResponse(events));
+      const result = await anthropicChat({
+        ...baseOptions,
+        maxRetries: 1,
+      });
+      expect(fetchMock).toHaveBeenCalledTimes(2);
+      expect(result.content).toEqual({ ok: true });
+    });
+  });
 });