npm - @polderlabs/bizar-plugin - Versions diffs - 0.8.1 → 0.8.3 - Mend

@polderlabs/bizar-plugin 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/index.ts +105 -10
package/package.json +2 -2
package/src/reasoning-clean.ts +120 -26
package/tests/config.test.ts +3 -3
package/tests/reasoning-clean.test.ts +422 -0

package/index.ts CHANGED Viewed

@@ -126,7 +126,11 @@ import { SettingsStore } from "./src/settings.js";
 import { parseSlashCommand } from "./src/commands.js";
 import { createPlanActionTool } from "./src/tools/plan-action.js";
 import { createWaitForFeedbackTool } from "./src/tools/wait-for-feedback.js";
-import { stripInlineThinkBlocks } from "./src/reasoning-clean.js";
+import {
+  stripInlineThinkBlocks,
+  wrapFetchForReasoningCleanup,
+  type FetchLike,
+} from "./src/reasoning-clean.js";
 // v0.5.0 — visual plan wiring: side-effect executor + plan-fs
 import { executeSideEffect, type ExecuteOptions } from "./src/commands-impl.js";
@@ -223,6 +227,45 @@ let streamHandle: EventStream | null = null;
 let loggerHandle: Logger | null = null;
 const signalHandlerRefs = new Map<"SIGTERM" | "SIGINT", () => void>();
+/** v0.6.2 — Set to `true` after the first time we wrap `globalThis.fetch`
+ *  with the reasoning-clean wrapper. Subsequent calls in the same process
+ *  are no-ops, so a plugin reload cannot double-wrap. */
+let fetchWrapInstalled = false;
+/**
+ * v0.6.2 — Reasoning directive. Install the reasoning-clean fetch wrap
+ * on `globalThis.fetch`. The wrap strips inline ``...</think>` (and the
+ * other recognised variants — see `src/reasoning-clean.ts`) from
+ * chat-completions responses targeting `openrouter`/`minimax`, while
+ * leaving the structured `reasoning` / `reasoning_details` fields
+ * intact.
+ *
+ * This is the workaround for the fact that opencode 1.17.9 does not
+ * fire the `config` hook in this runtime (the SDK type declares it, but
+ * the host never calls it). By the time the host would call `config`,
+ * the plugin would already be past init — and the AI SDK is already
+ * using the unwrapped fetch. So we wrap fetch once, globally, as the
+ * plugin initialises. Subsequent reloads in the same process are a
+ * no-op thanks to the `fetchWrapInstalled` flag.
+ */
+function installFetchReasoningCleanup(logger: Logger): void {
+  if (fetchWrapInstalled) return;
+  const original = globalThis.fetch;
+  if (typeof original !== "function") {
+    logger.warn("bizar: globalThis.fetch is not a function; reasoning-clean wrap skipped");
+    return;
+  }
+  const wrapped = wrapFetchForReasoningCleanup(
+    original.bind(globalThis) as FetchLike,
+    {
+      debug: (msg) => logger.debug(msg),
+    },
+  );
+  globalThis.fetch = wrapped as typeof globalThis.fetch;
+  fetchWrapInstalled = true;
+  logger.info("bizar: reasoning-clean fetch wrap installed (openrouter/minimax)");
+}
 // --- Plugin entry point ---------------------------------------------------
 /**
@@ -319,6 +362,16 @@ async function init(
     logger.warn(`bizar: ${note}`);
   }
+  // v0.6.2 — Reasoning directive. Wrap globalThis.fetch so that inline
+  // ``...</think>` blocks in chat completions responses
+  // from openrouter/minimax providers are stripped from `content` even
+  // when the model also emits structured reasoning. The `config` hook
+  // in the opencode plugin API is declared in the SDK type but does NOT
+  // fire in 1.17.9 (confirmed via debug probe 2026-06-24), so we wrap
+  // fetch globally as a fallback. Idempotent — only the first call in
+  // this process actually wraps.
+  installFetchReasoningCleanup(logger);
   const stateStore = new StateStore(options.stateDir, logger);
   const settingsStore = new SettingsStore(options.stateDir, logger);
   const logWriter = new LogWriter(options.logDir, options.logRotationBytes, logger);
@@ -758,22 +811,30 @@ function buildHooks(ctx: RuntimeContext, bg: BgDeps): Hooks {
   // sees the same thinking text twice — once in the proper panel and
   // again as visible message text below it.
   //
-  // The opencode plugin API in this version does NOT trigger a
-  // `config` hook (the `wrap-fetch` workaround from v0.6.1 is dead
-  // code in current builds), so we cannot post-process the response
-  // stream. The only working hooks that can help are:
+  // Defence in depth (three layers, in order of impact):
+  //
+  //   1. `installFetchReasoningCleanup` (init-time) — wraps
+  //      `globalThis.fetch` with `wrapFetchForReasoningCleanup` from
+  //      `src/reasoning-clean.ts`. The wrap strips the inline ``
+  //      blocks from chat-completions responses to `openrouter` /
+  //      `minimax` while leaving the structured reasoning fields
+  //      alone. This is the only layer that fixes the CURRENT
+  //      response in-flight. The opencode plugin API in 1.17.9 declares
+  //      a `config` hook in the SDK type but does not actually fire it
+  //      (confirmed via debug probe 2026-06-24), so we wrap fetch
+  //      globally instead.
   //
-  //   1. `experimental.chat.system.transform` — runs every turn; we
+  //   2. `experimental.chat.system.transform` — runs every turn; we
   //      push a directive telling the model to put thinking in the
   //      structured field only.
-  //   2. `experimental.chat.messages.transform` — runs before each
+  //
+  //   3. `experimental.chat.messages.transform` — runs before each
   //      request; we strip `` blocks from previous assistant
   //      messages so the model sees clean history and is less likely
   //      to keep emitting inline ``.
   //
-  // Neither fixes the CURRENT response (the model has already
-  // returned), but together they strongly reduce — and in many cases
-  // eliminate — the duplication on subsequent turns.
+  // Layers 2 and 3 reduce the frequency of the leak; layer 1 strips
+  // any leak that still slips through.
   const REASONING_DIRECTIVE_MARKER = "BIZAR_REASONING_DIRECTIVE_v0.6.2";
   const REASONING_DIRECTIVE = [
     REASONING_DIRECTIVE_MARKER,
@@ -904,6 +965,40 @@ function buildHooks(ctx: RuntimeContext, bg: BgDeps): Hooks {
       }
     },
+    // v0.6.2 — Reasoning directive. Strip inline `` blocks
+    // from the FINAL text of each completed assistant text part. This is
+    // the post-processing layer that fixes the CURRENT response in cases
+    // where the model emits its chain-of-thought in BOTH the structured
+    // `reasoning` field AND inline in `content` (the M3-via-OpenRouter
+    // leak). opencode's openrouter SDK does not strip the inline blocks,
+    // so we do it here at the boundary between the SDK output and the
+    // UI rendering. The `config` hook that the SDK type declares for
+    // fetch-level wrapping does NOT fire in 1.17.9, and the AI SDK
+    // uses `Bun.fetch` (read-only) rather than `globalThis.fetch`, so a
+    // fetch wrap is a no-op in this runtime. `experimental.text.complete`
+    // is the working alternative — it runs on every completed text
+    // part, with mutable `output.text`. Idempotent: stripping already-
+    // cleaned text is a no-op.
+    "experimental.text.complete": async (input, output) => {
+      try {
+        const original = output.text;
+        if (typeof original !== "string" || !original.includes("<think>")) return;
+        const cleaned = stripInlineThinkBlocks(original);
+        if (cleaned !== original) {
+          output.text = cleaned;
+          ctx.logger.debug(
+            `bizar: text.complete stripped think blocks (session=${input.sessionID} message=${input.messageID} part=${input.partID} ${original.length}→${cleaned.length}B)`,
+          );
+        }
+      } catch (err) {
+        ctx.logger.warn(
+          `bizar: text.complete failed (passing through): ${
+            err instanceof Error ? err.message : String(err)
+          }`,
+        );
+      }
+    },
     // §3.1, §4.5.1 — event: track session boundaries. We do NOT create
     // the state file here (canonical lifecycle: file is created at the
     // `chat.message` seed, per spec §4.5.1).

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@polderlabs/bizar-plugin",
-  "version": "0.8.1",
+  "version": "0.8.3",
   "description": "Bizar opencode plugin — loop detection, status reporting, handoff signal, background agents, and slash commands + visual plan flow for subagent activity",
   "type": "module",
   "main": "./index.ts",
@@ -11,7 +11,7 @@
   "scripts": {
     "check:imports": "bash scripts/check-forbidden-imports.sh",
     "typecheck": "tsc --noEmit",
-    "test": "npm run check:imports && bun test tests/loop.test.ts tests/block.test.ts tests/stall-think.test.ts tests/tools/bg-get-comments.test.ts tests/tools/opencode-runner.test.ts tests/settings.test.ts tests/commands.test.ts tests/commands-impl.test.ts tests/tools/plan-action.test.ts tests/tools/wait-for-feedback.test.ts"
+    "test": "npm run check:imports && bun test tests/loop.test.ts tests/block.test.ts tests/stall-think.test.ts tests/tools/bg-get-comments.test.ts tests/tools/opencode-runner.test.ts tests/settings.test.ts tests/commands.test.ts tests/commands-impl.test.ts tests/tools/plan-action.test.ts tests/tools/wait-for-feedback.test.ts tests/reasoning-clean.test.ts"
   },
   "keywords": [
     "opencode",

package/src/reasoning-clean.ts CHANGED Viewed

@@ -37,10 +37,35 @@
  *   is forwarded unchanged — this wrapper must never break a chat.
  */
-const THINK_OPEN = "<think>" as const;
-const THINK_CLOSE = "</think>" as const;
+// All known inline think-style tag names. Each name pairs with itself
+// for the close tag (e.g. `` matches ``, `<thinking>` matches
+// `</thinking>`, etc.). The order does not matter for matching — we
+// search for the earliest occurrence of any of them.
+//
+// The model emits `` (most common) and `<thinking>` (the original
+// dashboard fix targeted this one). `<reasoning>` and `<ant_thinking>`
+// are included for forward compatibility with other providers that use
+// the same anti-slop pattern.
+const THINK_TAG_NAMES = ["think", "thinking", "reasoning", "ant_thinking"] as const;
+type ThinkTagName = (typeof THINK_TAG_NAMES)[number];
-type FetchLike = (input: Parameters<typeof fetch>[0], init?: RequestInit) => Promise<Response>;
+/** Map from open-tag prefix (without `>`) to its matching close tag. */
+const THINK_OPEN_TO_CLOSE: ReadonlyMap<string, string> = new Map(
+  THINK_TAG_NAMES.map((n) => [`<${n}`, `</${n}>`] as const),
+);
+/** All open-tag prefixes — used by the streaming state machine. */
+const ALL_OPENS: readonly string[] = Array.from(THINK_OPEN_TO_CLOSE.keys());
+/** Regex form, used by the non-streaming strip. Backreference matches
+ *  the open-tag name to the close tag. */
+const THINK_TAG_RE = new RegExp(
+  `<(${THINK_TAG_NAMES.join("|")})\\b[^>]*>[\\s\\S]*?</\\1>\\s*`,
+  "gi",
+);
+export type FetchLike = (
+  input: Parameters<typeof fetch>[0],
+  init?: RequestInit,
+) => Promise<Response>;
 export interface ReasoningCleanOptions {
   /** Extra logger for debug lines; defaults to no-op. */
@@ -55,14 +80,16 @@ export interface ReasoningCleanOptions {
 const DEFAULT_PROVIDERS = new Set(["openrouter", "minimax"]);
 /**
- * Strip ``...</think>`` blocks from a plain string. Used for
+ * Strip inline think-style blocks (`<think>…</think>`,
+ * `<thinking>…</thinking>`, `<reasoning>…</reasoning>`,
+ * `<ant_thinking>…</ant_thinking>`) from a plain string. Used for
  * non-streaming responses (or for accumulated streamed content).
  *
- * The trailing whitespace after `</think>` is also consumed so the
+ * The trailing whitespace after the close tag is also consumed so the
  * cleaned content does not start with an extra blank line.
  */
 export function stripInlineThinkBlocks(content: string): string {
-  return content.replace(/<think>[\s\S]*?<\/think>\s*/g, "");
+  return content.replace(THINK_TAG_RE, "");
 }
 /**
@@ -72,9 +99,46 @@ export function stripInlineThinkBlocks(content: string): string {
 class ThinkStripper {
   private state: "NORMAL" | "IN_THINK" = "NORMAL";
   // Buffer of characters that may be the start of a marker but are not
-  // yet complete. Holds at most max(THINK_OPEN.length, THINK_CLOSE.length)
-  // characters from a chunk boundary.
+  // yet complete. Holds at most max(open.length, close.length) chars
+  // from a chunk boundary.
   private pending = "";
+  // The close tag we are looking for while IN_THINK. Set when we find
+  // an open, cleared when we find the matching close. Each open tag
+  // has its own close tag (e.g. `` pairs with ``, not ``).
+  private activeClose: string | null = null;
+  /**
+   * Find the earliest valid open-tag prefix in `input`. A valid match
+   * is `<tagname` followed by `>`, whitespace, or end-of-string — so we
+   * don't accidentally match `` as a substring of `<thinking>`.
+   */
+  private findOpen(input: string): { idx: number; open: string } | null {
+    let best: { idx: number; open: string } | null = null;
+    for (const open of ALL_OPENS) {
+      let from = 0;
+      while (from < input.length) {
+        const idx = input.indexOf(open, from);
+        if (idx === -1) break;
+        const nextPos = idx + open.length;
+        const nextCh = nextPos < input.length ? input.charAt(nextPos) : "";
+        const isBoundary =
+          nextCh === ">" ||
+          nextCh === " " ||
+          nextCh === "\t" ||
+          nextCh === "\n" ||
+          nextCh === "\r" ||
+          nextCh === "";
+        if (isBoundary) {
+          if (best === null || idx < best.idx) {
+            best = { idx, open };
+          }
+          break;
+        }
+        from = idx + 1;
+      }
+    }
+    return best;
+  }
   push(chunk: string): string {
     if (chunk.length === 0) return "";
@@ -84,31 +148,39 @@ class ThinkStripper {
     while (input.length > 0) {
       if (this.state === "NORMAL") {
-        const idx = input.indexOf(THINK_OPEN);
-        if (idx === -1) {
+        const found = this.findOpen(input);
+        if (found === null) {
           // No open marker; might have a partial at the tail.
-          const tail = keepPartialTail(input, [THINK_OPEN]);
+          const tail = keepPartialTail(input, ALL_OPENS);
           out += input.slice(0, input.length - tail.length);
           this.pending = tail;
           input = "";
           break;
         }
-        out += input.slice(0, idx);
-        input = input.slice(idx + THINK_OPEN.length);
+        out += input.slice(0, found.idx);
+        input = input.slice(found.idx + found.open.length);
+        this.activeClose = THINK_OPEN_TO_CLOSE.get(found.open) ?? null;
         this.state = "IN_THINK";
       } else {
         // IN_THINK
-        const idx = input.indexOf(THINK_CLOSE);
+        const closeTag = this.activeClose;
+        if (closeTag === null) {
+          // Defensive: should never happen, but recover gracefully.
+          this.state = "NORMAL";
+          break;
+        }
+        const idx = input.indexOf(closeTag);
         if (idx === -1) {
           // Still inside a think block; might have a partial close at tail.
-          const tail = keepPartialTail(input, [THINK_CLOSE]);
+          const tail = keepPartialTail(input, [closeTag]);
           // Discard everything except the possible partial tail.
           this.pending = tail;
           input = "";
           break;
         }
-        input = input.slice(idx + THINK_CLOSE.length);
+        input = input.slice(idx + closeTag.length);
         this.state = "NORMAL";
+        this.activeClose = null;
         // Drop any whitespace that immediately follows the close tag so
         // the next emitted content does not start with extra blank lines.
         const wsMatch = input.match(/^\s*/);
@@ -126,6 +198,7 @@ class ThinkStripper {
     this.pending = "";
     if (this.state === "IN_THINK") {
       this.state = "NORMAL";
+      this.activeClose = null;
       return tail;
     }
     return tail;
@@ -185,7 +258,7 @@ function cleanNonStreamingJson(text: string): string {
   let touched = false;
   for (const choice of choices) {
     const msg = choice?.message;
-    if (msg && typeof msg.content === "string" && msg.content.includes(THINK_OPEN)) {
+    if (msg && typeof msg.content === "string" && contentHasAnyThinkOpen(msg.content)) {
       const cleaned = stripInlineThinkBlocks(msg.content);
       if (cleaned !== msg.content) {
         msg.content = cleaned;
@@ -196,6 +269,16 @@ function cleanNonStreamingJson(text: string): string {
   return touched ? JSON.stringify(data) : text;
 }
+/** Cheap fast-path check: does `content` contain any of the known
+ *  think-tag open prefixes? Avoids invoking the (more expensive) full
+ *  regex on responses that obviously don't need cleaning. */
+function contentHasAnyThinkOpen(content: string): boolean {
+  for (const open of ALL_OPENS) {
+    if (content.includes(open)) return true;
+  }
+  return false;
+}
 /**
  * Process one SSE event line of the form `data: <payload>`. Mutates the
  * decoded payload in place to strip inline think blocks from
@@ -343,18 +426,29 @@ export function wrapFetchForReasoningCleanup(
       });
     }
     // Non-streaming JSON.
+    let text: string;
     try {
-      const text = await response.text();
-      const cleaned = cleanNonStreamingJson(text);
-      if (cleaned === text) return response;
-      return new Response(cleaned, {
-        status: response.status,
-        statusText: response.statusText,
-        headers: response.headers,
-      });
+      text = await response.text();
     } catch (err) {
-      debug?.(`reasoning-clean: clean failed, passing through: ${(err as Error).message}`);
+      debug?.(`reasoning-clean: read body failed, passing through: ${(err as Error).message}`);
       return response;
     }
+    let cleaned: string;
+    try {
+      cleaned = cleanNonStreamingJson(text);
+    } catch (err) {
+      debug?.(`reasoning-clean: parse failed, passing through original body: ${(err as Error).message}`);
+      // Re-wrap the original text in a fresh Response so the caller
+      // can read the body (we already consumed the original via
+      // .text()). The status/headers are preserved.
+      cleaned = text;
+    }
+    // Always return a fresh Response so the caller can read the body
+    // (the original `response` was consumed by `.text()`).
+    return new Response(cleaned, {
+      status: response.status,
+      statusText: response.statusText,
+      headers: response.headers,
+    });
   };
 }

package/tests/config.test.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  *   1. Every `bizar_*` tool registered in `plugins/bizar/index.ts`
  *      is also present in `config/opencode.json` `tools: { ... }`.
  *   2. No `bizarre_*` (double-r) typos remain in `plugins/bizar/src/`.
- *   3. `plugins/bizar/package.json` version is `0.5.0`.
+ *   3. `plugins/bizar/package.json` version is `0.8.3`.
  */
 import { describe, test, expect } from "bun:test";
@@ -83,9 +83,9 @@ describe("config drift detection", () => {
     ).toEqual([]);
   });
-  test("plugins/bizar/package.json version is 0.8.0", () => {
+  test("plugins/bizar/package.json version is 0.8.3", () => {
     const pkg = JSON.parse(readFileSync(PKG_JSON, "utf-8")) as { version?: string };
-    expect(pkg.version).toBe("0.8.0");
+    expect(pkg.version).toBe("0.8.3");
   });
 });

package/tests/reasoning-clean.test.ts ADDED Viewed

@@ -0,0 +1,422 @@
+/**
+ * reasoning-clean unit tests (v0.6.2).
+ *
+ * Covers the inline-think-block stripper used by the global fetch
+ * wrapper. The wrapper exists to defeat the M3-via-OpenRouter pattern
+ * where the model emits its chain-of-thought in BOTH the structured
+ * `reasoning` field AND inline in `message.content`. opencode's
+ * openrouter SDK renders the structured field as a separate "Thought"
+ * panel, but it does NOT strip the inline blocks — so the user sees
+ * the same thinking twice. The wrapper post-processes the response
+ * stream to drop the inline blocks.
+ *
+ * Tests here cover the pure functions in isolation (no opencode, no
+ * fetch, no networking):
+ *   1. `stripInlineThinkBlocks` — regex strip on a full string.
+ *   2. The streaming `ThinkStripper` state machine — verified via the
+ *      `cleanSseLine` public path (since `ThinkStripper` itself is
+ *      private). Cross-chunk boundaries are the interesting case.
+ *   3. `wrapFetchForReasoningCleanup` — provider routing, pass-through
+ *      for non-chat-completions, and the actual JSON / SSE rewriting
+ *      via a fake `fetch`.
+ *
+ * If the file grows beyond ~300 lines, split into multiple files
+ * (one per concern).
+ */
+import { describe, test, expect } from "bun:test";
+import {
+  stripInlineThinkBlocks,
+  wrapFetchForReasoningCleanup,
+  type FetchLike,
+} from "../src/reasoning-clean.js";
+// ---------------------------------------------------------------------------
+// stripInlineThinkBlocks — regex strip
+// ---------------------------------------------------------------------------
+describe("stripInlineThinkBlocks", () => {
+  test("strips <think>…</think>", () => {
+    expect(stripInlineThinkBlocks("<think>secret</think>public")).toBe("public");
+  });
+  test("strips <thinking>…</thinking> (the original dashboard target)", () => {
+    expect(stripInlineThinkBlocks("<thinking>secret</thinking>public")).toBe("public");
+  });
+  test("strips <reasoning>…</reasoning>", () => {
+    expect(stripInlineThinkBlocks("<reasoning>secret</reasoning>public")).toBe("public");
+  });
+  test("strips <ant_thinking>…</ant_thinking>", () => {
+    expect(stripInlineThinkBlocks("<ant_thinking>secret</ant_thinking>public")).toBe("public");
+  });
+  test("consumes trailing whitespace after the close tag", () => {
+    expect(stripInlineThinkBlocks("<think>x</think>\n\n  public")).toBe("public");
+  });
+  test("does not treat <think> as a prefix of <thinking> when the next char is 'i'", () => {
+    // Regression: a naive indexOf("<think") would match the `<think` inside
+    // `<thinking>` and slice past 7 chars, leaving us mid-tag. The
+    // boundary check in the streaming state machine (findOpen) prevents
+    // this for the streaming case; the regex here uses `\b[^>]*>` to
+    // require a proper tag boundary, so it should leave `<thinking>` alone
+    // when the close tag is `</thinking>`.
+    const input = "<thinking>NOT STRIPPED</thinking>after";
+    expect(stripInlineThinkBlocks(input)).toBe("after");
+  });
+  test("handles attributes inside the open tag", () => {
+    expect(stripInlineThinkBlocks('<think> foo="bar" >secret</think>ok')).toBe("ok");
+  });
+  test("handles multiple blocks in one string", () => {
+    expect(
+      stripInlineThinkBlocks(
+        "a<think>x</think>b<thinking>y</thinking>c<reasoning>z</reasoning>d",
+      ),
+    ).toBe("abcd");
+  });
+  test("returns input unchanged when no think tags are present", () => {
+    const input = "just a normal response with no inline thinking";
+    expect(stripInlineThinkBlocks(input)).toBe(input);
+  });
+  test("preserves content that LOOKS like a think tag but is incomplete", () => {
+    // No closing tag → regex should not match (lazy quantifier needs a close).
+    expect(stripInlineThinkBlocks("<think>unfinished")).toBe("<think>unfinished");
+  });
+});
+// ---------------------------------------------------------------------------
+// wrapFetchForReasoningCleanup — provider routing
+// ---------------------------------------------------------------------------
+/** A minimal fake `fetch` that returns a canned `Response` and records
+ *  every URL it was called with. */
+function makeFakeFetch(responder: (url: string) => Response): FetchLike & {
+  calls: string[];
+} {
+  const calls: string[] = [];
+  const fn: FetchLike & { calls: string[] } = Object.assign(
+    async (input: Parameters<typeof fetch>[0], _init?: RequestInit) => {
+      const url =
+        typeof input === "string"
+          ? input
+          : input instanceof URL
+            ? input.toString()
+            : (input as Request).url;
+      calls.push(url);
+      return responder(url);
+    },
+    { calls },
+  );
+  return fn;
+}
+describe("wrapFetchForReasoningCleanup — provider routing", () => {
+  test("passes through non-chat-completions requests", async () => {
+    const fake = makeFakeFetch(
+      (url) =>
+        new Response("not a chat completion", { status: 200, headers: { "content-type": "text/plain" } }),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake, {
+      providers: ["openrouter"],
+    });
+    const res = await wrapped("https://example.com/some/other/endpoint");
+    expect(await res.text()).toBe("not a chat completion");
+  });
+  test("passes through chat-completions to a non-targeted provider", async () => {
+    const fake = makeFakeFetch(
+      () =>
+        new Response('{"choices":[{"message":{"content":"<think>x</think>hi"}}]}', {
+          status: 200,
+          headers: { "content-type": "application/json" },
+        }),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake, {
+      providers: ["openrouter"],
+    });
+    // Anthropic endpoint — not in the providers list, so no cleaning.
+    const res = await wrapped("https://api.anthropic.com/v1/chat/completions", { method: "POST" });
+    const body = await res.text();
+    expect(body).toContain("<think>x</think>"); // unchanged
+  });
+  test("intercepts chat-completions to the targeted provider (openrouter)", async () => {
+    const fake = makeFakeFetch(
+      () =>
+        new Response('{"choices":[{"message":{"content":"<think>x</think>hi"}}]}', {
+          status: 200,
+          headers: { "content-type": "application/json" },
+        }),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake, {
+      providers: ["openrouter"],
+    });
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    const body = await res.text();
+    expect(body).not.toContain("<think>");
+    expect(body).toContain('"content":"hi"');
+  });
+  test("intercepts chat-completions to the targeted provider (minimax)", async () => {
+    const fake = makeFakeFetch(
+      () =>
+        new Response('{"choices":[{"message":{"content":"<thinking>x</thinking>hi"}}]}', {
+          status: 200,
+          headers: { "content-type": "application/json" },
+        }),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake, {
+      providers: ["minimax"],
+    });
+    const res = await wrapped("https://minimax.io/v1/chat/completions", {
+      method: "POST",
+    });
+    const body = await res.text();
+    expect(body).not.toContain("<thinking>");
+    expect(body).toContain('"content":"hi"');
+  });
+});
+// ---------------------------------------------------------------------------
+// wrapFetchForReasoningCleanup — non-streaming JSON rewriting
+// ---------------------------------------------------------------------------
+describe("wrapFetchForReasoningCleanup — non-streaming JSON", () => {
+  test("strips think blocks from a single choice", async () => {
+    const fake = makeFakeFetch(
+      () =>
+        new Response(
+          JSON.stringify({
+            choices: [
+              {
+                message: {
+                  role: "assistant",
+                  content: "<think>step 1\nstep 2</think>The answer is 42.",
+                },
+              },
+            ],
+          }),
+          { status: 200, headers: { "content-type": "application/json" } },
+        ),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    const body = JSON.parse(await res.text());
+    expect(body.choices[0].message.content).toBe("The answer is 42.");
+  });
+  test("preserves structured reasoning field while stripping inline blocks", async () => {
+    const fake = makeFakeFetch(
+      () =>
+        new Response(
+          JSON.stringify({
+            choices: [
+              {
+                message: {
+                  role: "assistant",
+                  reasoning: "the structured chain of thought",
+                  reasoning_details: [{ type: "reasoning.text", text: "the structured chain of thought" }],
+                  content: "<think>the same text inline</think>final answer",
+                },
+              },
+            ],
+          }),
+          { status: 200, headers: { "content-type": "application/json" } },
+        ),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    const body = JSON.parse(await res.text());
+    expect(body.choices[0].message.reasoning).toBe("the structured chain of thought");
+    expect(body.choices[0].message.reasoning_details).toEqual([
+      { type: "reasoning.text", text: "the structured chain of thought" },
+    ]);
+    expect(body.choices[0].message.content).toBe("final answer");
+  });
+  test("returns the original response untouched when no think blocks are present", async () => {
+    const original = JSON.stringify({
+      choices: [{ message: { role: "assistant", content: "clean response" } }],
+    });
+    const fake = makeFakeFetch(
+      () =>
+        new Response(original, {
+          status: 200,
+          headers: { "content-type": "application/json" },
+        }),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    expect(await res.text()).toBe(original);
+  });
+  test("forwards the response unchanged on JSON parse error (safety net)", async () => {
+    const fake = makeFakeFetch(
+      () =>
+        new Response("not json {{{", {
+          status: 200,
+          headers: { "content-type": "application/json" },
+        }),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    expect(await res.text()).toBe("not json {{{");
+  });
+});
+// ---------------------------------------------------------------------------
+// wrapFetchForReasoningCleanup — SSE streaming
+// ---------------------------------------------------------------------------
+/** Build an SSE Response body from a list of event payloads (without
+ *  the `data: ` prefix — the prefix is added here for convenience). */
+function sseResponse(events: string[], finalChoiceIndex = 0): Response {
+  const lines: string[] = [];
+  for (const payload of events) {
+    lines.push(`data: ${payload}`);
+  }
+  // Add a finish_reason on the last event so the stripper flushes.
+  lines.push(
+    `data: ${JSON.stringify({
+      choices: [{ index: finalChoiceIndex, delta: {}, finish_reason: "stop" }],
+    })}`,
+  );
+  lines.push("data: [DONE]");
+  const body = lines.join("\n\n") + "\n\n";
+  return new Response(body, {
+    status: 200,
+    headers: { "content-type": "text/event-stream" },
+  });
+}
+/** Read the full text of a (possibly transformed) response body. */
+async function readBodyText(res: Response): Promise<string> {
+  if (!res.body) return "";
+  const reader = res.body.getReader();
+  const decoder = new TextDecoder();
+  let out = "";
+  while (true) {
+    const { value, done } = await reader.read();
+    if (done) break;
+    if (value) out += decoder.decode(value, { stream: true });
+  }
+  out += decoder.decode();
+  return out;
+}
+describe("wrapFetchForReasoningCleanup — SSE streaming", () => {
+  test("strips a complete think block split across multiple deltas", async () => {
+    // The model emits a `<think>...</think>` block across many deltas,
+    // then a clean final answer. After cleaning, only the final answer
+    // should remain in the SSE stream.
+    const fake = makeFakeFetch(() =>
+      sseResponse([
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "<think>" } }] }),
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "step 1. " } }] }),
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "step 2. " } }] }),
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "</think>" } }] }),
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "final answer" } }] }),
+      ]),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    const body = await readBodyText(res);
+    expect(body).not.toContain("<think>");
+    expect(body).not.toContain("</think>");
+    expect(body).not.toContain("step 1.");
+    expect(body).not.toContain("step 2.");
+    expect(body).toContain("final answer");
+  });
+  test("strips a think block split ACROSS byte-level chunk boundaries", async () => {
+    // Simulate a real network: the SSE body is delivered as a stream
+    // of arbitrary byte chunks. The `<think>` open tag itself straddles
+    // two chunks, so the `streamTransformer` must buffer the partial
+    // first event until the `\n\n` boundary arrives in the second
+    // chunk, then run the full event through `cleanSseLine` and
+    // strip the think block.
+    const sseBody =
+      `data: {"choices":[{"index":0,"delta":{"content":"<th` +
+      `ink>step A. step B.</think>The answer is 7."}}]}\n\n` +
+      `data: {"choices":[{"index":0,"delta":{"content":""},"finish_reason":"stop"}]}\n\n` +
+      `data: [DONE]\n\n`;
+    // Split the body at byte offset 80 (which lands inside the `<th`
+    // open tag). The first chunk ends with the open tag half-written;
+    // the second chunk starts with the rest of the open tag and
+    // includes the `\n\n` boundary.
+    const splitAt = 80;
+    const chunk1 = sseBody.slice(0, splitAt);
+    const chunk2 = sseBody.slice(splitAt);
+    const fake = makeFakeFetch(
+      () =>
+        new Response(
+          new ReadableStream({
+            start(controller) {
+              controller.enqueue(new TextEncoder().encode(chunk1));
+              controller.enqueue(new TextEncoder().encode(chunk2));
+              controller.close();
+            },
+          }),
+          {
+            status: 200,
+            headers: { "content-type": "text/event-stream" },
+          },
+        ),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    const body = await readBodyText(res);
+    expect(body).not.toContain("<think>");
+    expect(body).not.toContain("step A.");
+    expect(body).not.toContain("step B.");
+    expect(body).toContain("The answer is 7.");
+  });
+  test("strips <thinking> (not just <think>) in streaming mode", async () => {
+    const fake = makeFakeFetch(() =>
+      sseResponse([
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "<thinking>step</thinking>" } }] }),
+        JSON.stringify({ choices: [{ index: 0, delta: { content: "after" } }] }),
+      ]),
+    );
+    const wrapped = wrapFetchForReasoningCleanup(fake);
+    const res = await wrapped(
+      "https://openrouter.ai/api/v1/chat/completions",
+      { method: "POST" },
+    );
+    const body = await readBodyText(res);
+    expect(body).not.toContain("<thinking>");
+    expect(body).not.toContain("</thinking>");
+    expect(body).toContain("after");
+  });
+});