npm - @polderlabs/bizar-plugin - Versions diffs - 0.8.0 → 0.8.2 - Mend

@polderlabs/bizar-plugin 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +1 -1
package/index.ts +106 -10
package/package.json +2 -2
package/src/background.ts +195 -76
package/src/opencode-runner.ts +47 -19
package/src/reasoning-clean.ts +120 -26
package/tests/attach-handler-bug.test.ts +2 -1
package/tests/background-state.test.ts +1 -1
package/tests/background.test.ts +1 -1
package/tests/reasoning-clean.test.ts +422 -0
package/tests/stall-think.test.ts +6 -6
package/tests/tools/bg-spawn.test.ts +6 -6
package/tests/tools/opencode-runner.test.ts +115 -0
package/tests/update-deadlock.test.ts +1 -0

package/src/reasoning-clean.ts CHANGED Viewed

@@ -37,10 +37,35 @@
  *   is forwarded unchanged — this wrapper must never break a chat.
  */
-const THINK_OPEN = "<think>" as const;
-const THINK_CLOSE = "</think>" as const;
+// All known inline think-style tag names. Each name pairs with itself
+// for the close tag (e.g. `` matches ``, `<thinking>` matches
+// `</thinking>`, etc.). The order does not matter for matching — we
+// search for the earliest occurrence of any of them.
+//
+// The model emits `` (most common) and `<thinking>` (the original
+// dashboard fix targeted this one). `<reasoning>` and `<ant_thinking>`
+// are included for forward compatibility with other providers that use
+// the same anti-slop pattern.
+const THINK_TAG_NAMES = ["think", "thinking", "reasoning", "ant_thinking"] as const;
+type ThinkTagName = (typeof THINK_TAG_NAMES)[number];
-type FetchLike = (input: Parameters<typeof fetch>[0], init?: RequestInit) => Promise<Response>;
+/** Map from open-tag prefix (without `>`) to its matching close tag. */
+const THINK_OPEN_TO_CLOSE: ReadonlyMap<string, string> = new Map(
+  THINK_TAG_NAMES.map((n) => [`<${n}`, `</${n}>`] as const),
+);
+/** All open-tag prefixes — used by the streaming state machine. */
+const ALL_OPENS: readonly string[] = Array.from(THINK_OPEN_TO_CLOSE.keys());
+/** Regex form, used by the non-streaming strip. Backreference matches
+ *  the open-tag name to the close tag. */
+const THINK_TAG_RE = new RegExp(
+  `<(${THINK_TAG_NAMES.join("|")})\\b[^>]*>[\\s\\S]*?</\\1>\\s*`,
+  "gi",
+);
+export type FetchLike = (
+  input: Parameters<typeof fetch>[0],
+  init?: RequestInit,
+) => Promise<Response>;
 export interface ReasoningCleanOptions {
   /** Extra logger for debug lines; defaults to no-op. */
@@ -55,14 +80,16 @@ export interface ReasoningCleanOptions {
 const DEFAULT_PROVIDERS = new Set(["openrouter", "minimax"]);
 /**
- * Strip ``...</think>`` blocks from a plain string. Used for
+ * Strip inline think-style blocks (`<think>…</think>`,
+ * `<thinking>…</thinking>`, `<reasoning>…</reasoning>`,
+ * `<ant_thinking>…</ant_thinking>`) from a plain string. Used for
  * non-streaming responses (or for accumulated streamed content).
  *
- * The trailing whitespace after `</think>` is also consumed so the
+ * The trailing whitespace after the close tag is also consumed so the
  * cleaned content does not start with an extra blank line.
  */
 export function stripInlineThinkBlocks(content: string): string {
-  return content.replace(/<think>[\s\S]*?<\/think>\s*/g, "");
+  return content.replace(THINK_TAG_RE, "");
 }
 /**
@@ -72,9 +99,46 @@ export function stripInlineThinkBlocks(content: string): string {
 class ThinkStripper {
   private state: "NORMAL" | "IN_THINK" = "NORMAL";
   // Buffer of characters that may be the start of a marker but are not
-  // yet complete. Holds at most max(THINK_OPEN.length, THINK_CLOSE.length)
-  // characters from a chunk boundary.
+  // yet complete. Holds at most max(open.length, close.length) chars
+  // from a chunk boundary.
   private pending = "";
+  // The close tag we are looking for while IN_THINK. Set when we find
+  // an open, cleared when we find the matching close. Each open tag
+  // has its own close tag (e.g. `` pairs with ``, not ``).
+  private activeClose: string | null = null;
+  /**
+   * Find the earliest valid open-tag prefix in `input`. A valid match
+   * is `<tagname` followed by `>`, whitespace, or end-of-string — so we
+   * don't accidentally match `` as a substring of `<thinking>`.
+   */
+  private findOpen(input: string): { idx: number; open: string } | null {
+    let best: { idx: number; open: string } | null = null;
+    for (const open of ALL_OPENS) {
+      let from = 0;
+      while (from < input.length) {
+        const idx = input.indexOf(open, from);
+        if (idx === -1) break;
+        const nextPos = idx + open.length;
+        const nextCh = nextPos < input.length ? input.charAt(nextPos) : "";
+        const isBoundary =
+          nextCh === ">" ||
+          nextCh === " " ||
+          nextCh === "\t" ||
+          nextCh === "\n" ||
+          nextCh === "\r" ||
+          nextCh === "";
+        if (isBoundary) {
+          if (best === null || idx < best.idx) {
+            best = { idx, open };
+          }
+          break;
+        }
+        from = idx + 1;
+      }
+    }
+    return best;
+  }
   push(chunk: string): string {
     if (chunk.length === 0) return "";
@@ -84,31 +148,39 @@ class ThinkStripper {
     while (input.length > 0) {
       if (this.state === "NORMAL") {
-        const idx = input.indexOf(THINK_OPEN);
-        if (idx === -1) {
+        const found = this.findOpen(input);
+        if (found === null) {
           // No open marker; might have a partial at the tail.
-          const tail = keepPartialTail(input, [THINK_OPEN]);
+          const tail = keepPartialTail(input, ALL_OPENS);
           out += input.slice(0, input.length - tail.length);
           this.pending = tail;
           input = "";
           break;
         }
-        out += input.slice(0, idx);
-        input = input.slice(idx + THINK_OPEN.length);
+        out += input.slice(0, found.idx);
+        input = input.slice(found.idx + found.open.length);
+        this.activeClose = THINK_OPEN_TO_CLOSE.get(found.open) ?? null;
         this.state = "IN_THINK";
       } else {
         // IN_THINK
-        const idx = input.indexOf(THINK_CLOSE);
+        const closeTag = this.activeClose;
+        if (closeTag === null) {
+          // Defensive: should never happen, but recover gracefully.
+          this.state = "NORMAL";
+          break;
+        }
+        const idx = input.indexOf(closeTag);
         if (idx === -1) {
           // Still inside a think block; might have a partial close at tail.
-          const tail = keepPartialTail(input, [THINK_CLOSE]);
+          const tail = keepPartialTail(input, [closeTag]);
           // Discard everything except the possible partial tail.
           this.pending = tail;
           input = "";
           break;
         }
-        input = input.slice(idx + THINK_CLOSE.length);
+        input = input.slice(idx + closeTag.length);
         this.state = "NORMAL";
+        this.activeClose = null;
         // Drop any whitespace that immediately follows the close tag so
         // the next emitted content does not start with extra blank lines.
         const wsMatch = input.match(/^\s*/);
@@ -126,6 +198,7 @@ class ThinkStripper {
     this.pending = "";
     if (this.state === "IN_THINK") {
       this.state = "NORMAL";
+      this.activeClose = null;
       return tail;
     }
     return tail;
@@ -185,7 +258,7 @@ function cleanNonStreamingJson(text: string): string {
   let touched = false;
   for (const choice of choices) {
     const msg = choice?.message;
-    if (msg && typeof msg.content === "string" && msg.content.includes(THINK_OPEN)) {
+    if (msg && typeof msg.content === "string" && contentHasAnyThinkOpen(msg.content)) {
       const cleaned = stripInlineThinkBlocks(msg.content);
       if (cleaned !== msg.content) {
         msg.content = cleaned;
@@ -196,6 +269,16 @@ function cleanNonStreamingJson(text: string): string {
   return touched ? JSON.stringify(data) : text;
 }
+/** Cheap fast-path check: does `content` contain any of the known
+ *  think-tag open prefixes? Avoids invoking the (more expensive) full
+ *  regex on responses that obviously don't need cleaning. */
+function contentHasAnyThinkOpen(content: string): boolean {
+  for (const open of ALL_OPENS) {
+    if (content.includes(open)) return true;
+  }
+  return false;
+}
 /**
  * Process one SSE event line of the form `data: <payload>`. Mutates the
  * decoded payload in place to strip inline think blocks from
@@ -343,18 +426,29 @@ export function wrapFetchForReasoningCleanup(
       });
     }
     // Non-streaming JSON.
+    let text: string;
     try {
-      const text = await response.text();
-      const cleaned = cleanNonStreamingJson(text);
-      if (cleaned === text) return response;
-      return new Response(cleaned, {
-        status: response.status,
-        statusText: response.statusText,
-        headers: response.headers,
-      });
+      text = await response.text();
     } catch (err) {
-      debug?.(`reasoning-clean: clean failed, passing through: ${(err as Error).message}`);
+      debug?.(`reasoning-clean: read body failed, passing through: ${(err as Error).message}`);
       return response;
     }
+    let cleaned: string;
+    try {
+      cleaned = cleanNonStreamingJson(text);
+    } catch (err) {
+      debug?.(`reasoning-clean: parse failed, passing through original body: ${(err as Error).message}`);
+      // Re-wrap the original text in a fresh Response so the caller
+      // can read the body (we already consumed the original via
+      // .text()). The status/headers are preserved.
+      cleaned = text;
+    }
+    // Always return a fresh Response so the caller can read the body
+    // (the original `response` was consumed by `.text()`).
+    return new Response(cleaned, {
+      status: response.status,
+      statusText: response.statusText,
+      headers: response.headers,
+    });
   };
 }

package/tests/attach-handler-bug.test.ts CHANGED Viewed

@@ -88,7 +88,7 @@ function makeDraft(overrides: Partial<BackgroundState> = {}): BackgroundState {
     agent: "mimir",
     status: "pending",
     startedAt: Date.now(),
-    model: "openrouter/minimax-m3",
+    model: "minimax/minimax-m3",
     promptPreview: "test",
     resultPreview: undefined,
     resultMessageIds: [],
@@ -124,6 +124,7 @@ describe("InstanceManager.add — empty sessionId (BUGFIX v0.5.1)", () => {
         warn: () => {},
         error: () => {},
       } as never,
+      worktree: os.tmpdir(),
       serve: { worktree: os.tmpdir() } as never,
       http: {} as never,
       stream: stream as never,

package/tests/background-state.test.ts CHANGED Viewed

@@ -40,7 +40,7 @@ function makeState(overrides: Partial<BackgroundState> = {}): BackgroundState {
     agent: "mimir",
     status: "running",
     startedAt: Date.now(),
-    model: "openrouter/minimax-m3",
+    model: "minimax/minimax-m3",
     promptPreview: "Do the thing",
     resultPreview: undefined,
     resultMessageIds: [],

package/tests/background.test.ts CHANGED Viewed

@@ -22,7 +22,7 @@ function makeBgState(overrides: Partial<BackgroundState> = {}): BackgroundState
     agent: "mimir",
     status: "pending",
     startedAt: Date.now(),
-    model: "openrouter/minimax-m3",
+    model: "minimax/minimax-m3",
     promptPreview: "Do the thing",
     resultPreview: undefined,
     resultMessageIds: [],

package/tests/reasoning-clean.test.ts ADDED Viewed

@@ -0,0 +1,422 @@
+/**
+ * reasoning-clean unit tests (v0.6.2).
+ *
+ * Covers the inline-think-block stripper used by the global fetch
+ * wrapper. The wrapper exists to defeat the M3-via-OpenRouter pattern
+ * where the model emits its chain-of-thought in BOTH the structured
+ * `reasoning` field AND inline in `message.content`. opencode's
+ * openrouter SDK renders the structured field as a separate "Thought"
+ * panel, but it does NOT strip the inline blocks — so the user sees
+ * the same thinking twice. The wrapper post-processes the response
+ * stream to drop the inline blocks.
+ *
+ * Tests here cover the pure functions in isolation (no opencode, no
+ * fetch, no networking):
+ *   1. `stripInlineThinkBlocks` — regex strip on a full string.
+ *   2. The streaming `ThinkStripper` state machine — verified via the
+ *      `cleanSseLine` public path (since `ThinkStripper` itself is
+ *      private). Cross-chunk boundaries are the interesting case.
+ *   3. `wrapFetchForReasoningCleanup` — provider routing, pass-through
+ *      for non-chat-completions, and the actual JSON / SSE rewriting
+ *      via a fake `fetch`.
+ *
+ * If the file grows beyond ~300 lines, split into multiple files
+ * (one per concern).
+ */
+import { describe, test, expect } from "bun:test";
+import {
+  stripInlineThinkBlocks,
+  wrapFetchForReasoningCleanup,
+  type FetchLike,
+} from "../src/reasoning-clean.js";
+// ---------------------------------------------------------------------------
+// stripInlineThinkBlocks — regex strip
+// ---------------------------------------------------------------------------
+describe("stripInlineThinkBlocks", () => {
+  test("strips <think>…</think>", () => {
+    expect(stripInlineThinkBlocks("<think>secret</think>public")).toBe("public");
+  });
+  test("strips <thinking>…</thinking> (the original dashboard target)", () => {
+    expect(stripInlineThinkBlocks("<thinking>secret</thinking>public")).toBe("public");
+  });
+  test("strips <reasoning>…</reasoning>", () => {
+    expect(stripInlineThinkBlocks("<reasoning>secret</reasoning>public")).toBe("public");
+  });
+  test("strips <ant_thinking>…</ant_thinking>", () => {
+    expect(stripInlineThinkBlocks("<ant_thinking>secret</ant_thinking>public")).toBe("public");
+  });
+  test("consumes trailing whitespace after the close tag", () => {
+    expect(stripInlineThinkBlocks("<think>x</think>\n\n  public")).toBe("public");
+  });
+  test("does not treat <think> as a prefix of <thinking> when the next char is 'i'", () => {
+    // Regression: a naive indexOf("<think") would match the `<think` inside
+    // `<thinking>` and slice past 7 chars, leaving us mid-tag. The
+    // boundary check in the streaming state machine (findOpen) prevents
+    // this for the streaming case; the regex here uses `\b[^>]*>` to
+    // require a proper tag boundary, so it should leave `<thinking>` alone
+    // when the close tag is `</thinking>`.
+    const input = "<thinking>NOT STRIPPED</thinking>after";
+    expect(stripInlineThinkBlocks(input)).toBe("after");
+  });
+  test("handles attributes inside the open tag", () => {
+    expect(stripInlineThinkBlocks('<think> foo="bar" >secret</think>ok')).toBe("ok");
+  });
+  test("handles multiple blocks in one string", () => {
+    expect(
+      stripInlineThinkBlocks(
+        "a<think>x</think>b<thinking>y</thinking>c<reasoning>z</reasoning>d",
+      ),
+    ).toBe("abcd");
+  });
+  test("returns input unchanged when no think tags are present", () => {
+    const input = "just a normal response with no inline thinking";
+    expect(stripInlineThinkBlocks(input)).toBe(input);
+  });
+  test("preserves content that LOOKS like a think tag but is incomplete", () => {
+    // No closing tag → regex should not match (lazy quantifier needs a close).
+    expect(stripInlineThinkBlocks("<think>unfinished")).toBe("<think>unfinished");
+  });
+});
+// ---------------------------------------------------------------------------
+// wrapFetchForReasoningCleanup — provider routing
+// ---------------------------------------------------------------------------
+/** A minimal fake `fetch` that returns a canned `Response` and records
+ *  every URL it was called with. */
+function makeFakeFetch(responder: (url: string) => Response): FetchLike & {
+  calls: string[];
+} {
+  const calls: string[] = [];
+  const fn: FetchLike & { calls: string[] } = Object.assign(
+    async (input: Parameters<typeof fetch>[0], _init?: RequestInit) => {
+      const url =
+        typeof input === "string"
+          ? input
+          : input instanceof URL
+            ? input.toString()
+            : (input as Request).url;
+      calls.push(url);
+      return responder(url);
+    },
+    { calls },
+  );
+  return fn;
+}
+describe("wrapFetchForReasoningCleanup — provider routing", () => {
+  test("passes through non-chat-completions requests", async () => {
+    const fake = makeFakeFetch(
+      (url) =>
+        new Response("not a chat completion", { status: 200, headers: { "content-type": "text/plain" } }),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake, {
+      providers: ["openrouter"],
+    });
+    const res = await wrapped("https://example.com/some/other/endpoint");
+    expect(await res.text()).toBe("not a chat completion");
+  });
+  test("passes through chat-completions to a non-targeted provider", async () => {
+    const fake = makeFakeFetch(
+      () =>
+        new Response('{"choices":[{"message":{"content":"<think>x</think>hi"}}]}', {
+          status: 200,
+          headers: { "content-type": "application/json" },
+        }),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake, {
+      providers: ["openrouter"],
+    });
+    // Anthropic endpoint — not in the providers list, so no cleaning.
+    const res = await wrapped("https://api.anthropic.com/v1/chat/completions", { method: "POST" });
+    const body = await res.text();
+    expect(body).toContain("<think>x</think>"); // unchanged
+  });
+  test("intercepts chat-completions to the targeted provider (openrouter)", async () => {
+    const fake = makeFakeFetch(
+      () =>
+        new Response('{"choices":[{"message":{"content":"<think>x</think>hi"}}]}', {
+          status: 200,
+          headers: { "content-type": "application/json" },
+        }),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake, {
+      providers: ["openrouter"],
+    });
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    const body = await res.text();
+    expect(body).not.toContain("<think>");
+    expect(body).toContain('"content":"hi"');
+  });
+  test("intercepts chat-completions to the targeted provider (minimax)", async () => {
+    const fake = makeFakeFetch(
+      () =>
+        new Response('{"choices":[{"message":{"content":"<thinking>x</thinking>hi"}}]}', {
+          status: 200,
+          headers: { "content-type": "application/json" },
+        }),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake, {
+      providers: ["minimax"],
+    });
+    const res = await wrapped("https://minimax.io/v1/chat/completions", {
+      method: "POST",
+    });
+    const body = await res.text();
+    expect(body).not.toContain("<thinking>");
+    expect(body).toContain('"content":"hi"');
+  });
+});
+// ---------------------------------------------------------------------------
+// wrapFetchForReasoningCleanup — non-streaming JSON rewriting
+// ---------------------------------------------------------------------------
+describe("wrapFetchForReasoningCleanup — non-streaming JSON", () => {
+  test("strips think blocks from a single choice", async () => {
+    const fake = makeFakeFetch(
+      () =>
+        new Response(
+          JSON.stringify({
+            choices: [
+              {
+                message: {
+                  role: "assistant",
+                  content: "<think>step 1\nstep 2</think>The answer is 42.",
+                },
+              },
+            ],
+          }),
+          { status: 200, headers: { "content-type": "application/json" } },
+        ),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    const body = JSON.parse(await res.text());
+    expect(body.choices[0].message.content).toBe("The answer is 42.");
+  });
+  test("preserves structured reasoning field while stripping inline blocks", async () => {
+    const fake = makeFakeFetch(
+      () =>
+        new Response(
+          JSON.stringify({
+            choices: [
+              {
+                message: {
+                  role: "assistant",
+                  reasoning: "the structured chain of thought",
+                  reasoning_details: [{ type: "reasoning.text", text: "the structured chain of thought" }],
+                  content: "<think>the same text inline</think>final answer",
+                },
+              },
+            ],
+          }),
+          { status: 200, headers: { "content-type": "application/json" } },
+        ),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    const body = JSON.parse(await res.text());
+    expect(body.choices[0].message.reasoning).toBe("the structured chain of thought");
+    expect(body.choices[0].message.reasoning_details).toEqual([
+      { type: "reasoning.text", text: "the structured chain of thought" },
+    ]);
+    expect(body.choices[0].message.content).toBe("final answer");
+  });
+  test("returns the original response untouched when no think blocks are present", async () => {
+    const original = JSON.stringify({
+      choices: [{ message: { role: "assistant", content: "clean response" } }],
+    });
+    const fake = makeFakeFetch(
+      () =>
+        new Response(original, {
+          status: 200,
+          headers: { "content-type": "application/json" },
+        }),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    expect(await res.text()).toBe(original);
+  });
+  test("forwards the response unchanged on JSON parse error (safety net)", async () => {
+    const fake = makeFakeFetch(
+      () =>
+        new Response("not json {{{", {
+          status: 200,
+          headers: { "content-type": "application/json" },
+        }),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    expect(await res.text()).toBe("not json {{{");
+  });
+});
+// ---------------------------------------------------------------------------
+// wrapFetchForReasoningCleanup — SSE streaming
+// ---------------------------------------------------------------------------
+/** Build an SSE Response body from a list of event payloads (without
+ *  the `data: ` prefix — the prefix is added here for convenience). */
+function sseResponse(events: string[], finalChoiceIndex = 0): Response {
+  const lines: string[] = [];
+  for (const payload of events) {
+    lines.push(`data: ${payload}`);
+  }
+  // Add a finish_reason on the last event so the stripper flushes.
+  lines.push(
+    `data: ${JSON.stringify({
+      choices: [{ index: finalChoiceIndex, delta: {}, finish_reason: "stop" }],
+    })}`,
+  );
+  lines.push("data: [DONE]");
+  const body = lines.join("\n\n") + "\n\n";
+  return new Response(body, {
+    status: 200,
+    headers: { "content-type": "text/event-stream" },
+  });
+}
+/** Read the full text of a (possibly transformed) response body. */
+async function readBodyText(res: Response): Promise<string> {
+  if (!res.body) return "";
+  const reader = res.body.getReader();
+  const decoder = new TextDecoder();
+  let out = "";
+  while (true) {
+    const { value, done } = await reader.read();
+    if (done) break;
+    if (value) out += decoder.decode(value, { stream: true });
+  }
+  out += decoder.decode();
+  return out;
+}
+describe("wrapFetchForReasoningCleanup — SSE streaming", () => {
+  test("strips a complete think block split across multiple deltas", async () => {
+    // The model emits a `<think>...</think>` block across many deltas,
+    // then a clean final answer. After cleaning, only the final answer
+    // should remain in the SSE stream.
+    const fake = makeFakeFetch(() =>
+      sseResponse([
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "<think>" } }] }),
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "step 1. " } }] }),
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "step 2. " } }] }),
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "</think>" } }] }),
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "final answer" } }] }),
+      ]),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    const body = await readBodyText(res);
+    expect(body).not.toContain("<think>");
+    expect(body).not.toContain("</think>");
+    expect(body).not.toContain("step 1.");
+    expect(body).not.toContain("step 2.");
+    expect(body).toContain("final answer");
+  });
+  test("strips a think block split ACROSS byte-level chunk boundaries", async () => {
+    // Simulate a real network: the SSE body is delivered as a stream
+    // of arbitrary byte chunks. The `<think>` open tag itself straddles
+    // two chunks, so the `streamTransformer` must buffer the partial
+    // first event until the `\n\n` boundary arrives in the second
+    // chunk, then run the full event through `cleanSseLine` and
+    // strip the think block.
+    const sseBody =
+      `data: {"choices":[{"index":0,"delta":{"content":"<th` +
+      `ink>step A. step B.</think>The answer is 7."}}]}\n\n` +
+      `data: {"choices":[{"index":0,"delta":{"content":""},"finish_reason":"stop"}]}\n\n` +
+      `data: [DONE]\n\n`;
+    // Split the body at byte offset 80 (which lands inside the `<th`
+    // open tag). The first chunk ends with the open tag half-written;
+    // the second chunk starts with the rest of the open tag and
+    // includes the `\n\n` boundary.
+    const splitAt = 80;
+    const chunk1 = sseBody.slice(0, splitAt);
+    const chunk2 = sseBody.slice(splitAt);
+    const fake = makeFakeFetch(
+      () =>
+        new Response(
+          new ReadableStream({
+            start(controller) {
+              controller.enqueue(new TextEncoder().encode(chunk1));
+              controller.enqueue(new TextEncoder().encode(chunk2));
+              controller.close();
+            },
+          }),
+          {
+            status: 200,
+            headers: { "content-type": "text/event-stream" },
+          },
+        ),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    const body = await readBodyText(res);
+    expect(body).not.toContain("<think>");
+    expect(body).not.toContain("step A.");
+    expect(body).not.toContain("step B.");
+    expect(body).toContain("The answer is 7.");
+  });
+  test("strips <thinking> (not just <think>) in streaming mode", async () => {
+    const fake = makeFakeFetch(() =>
+      sseResponse([
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "<thinking>step</thinking>" } }] }),
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "after" } }] }),
+      ]),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    const body = await readBodyText(res);
+    expect(body).not.toContain("<thinking>");
+    expect(body).not.toContain("</thinking>");
+    expect(body).toContain("after");
+  });
+});