@polderlabs/bizar-plugin 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -126,7 +126,11 @@ import { SettingsStore } from "./src/settings.js";
126
126
  import { parseSlashCommand } from "./src/commands.js";
127
127
  import { createPlanActionTool } from "./src/tools/plan-action.js";
128
128
  import { createWaitForFeedbackTool } from "./src/tools/wait-for-feedback.js";
129
- import { stripInlineThinkBlocks } from "./src/reasoning-clean.js";
129
+ import {
130
+ stripInlineThinkBlocks,
131
+ wrapFetchForReasoningCleanup,
132
+ type FetchLike,
133
+ } from "./src/reasoning-clean.js";
130
134
 
131
135
  // v0.5.0 — visual plan wiring: side-effect executor + plan-fs
132
136
  import { executeSideEffect, type ExecuteOptions } from "./src/commands-impl.js";
@@ -223,6 +227,45 @@ let streamHandle: EventStream | null = null;
223
227
  let loggerHandle: Logger | null = null;
224
228
  const signalHandlerRefs = new Map<"SIGTERM" | "SIGINT", () => void>();
225
229
 
230
+ /** v0.6.2 — Set to `true` after the first time we wrap `globalThis.fetch`
231
+ * with the reasoning-clean wrapper. Subsequent calls in the same process
232
+ * are no-ops, so a plugin reload cannot double-wrap. */
233
+ let fetchWrapInstalled = false;
234
+
235
+ /**
236
+ * v0.6.2 — Reasoning directive. Install the reasoning-clean fetch wrap
237
+ * on `globalThis.fetch`. The wrap strips inline ``...</think>` (and the
238
+ * other recognised variants — see `src/reasoning-clean.ts`) from
239
+ * chat-completions responses targeting `openrouter`/`minimax`, while
240
+ * leaving the structured `reasoning` / `reasoning_details` fields
241
+ * intact.
242
+ *
243
+ * This is the workaround for the fact that opencode 1.17.9 does not
244
+ * fire the `config` hook in this runtime (the SDK type declares it, but
245
+ * the host never calls it). By the time the host would call `config`,
246
+ * the plugin would already be past init — and the AI SDK is already
247
+ * using the unwrapped fetch. So we wrap fetch once, globally, as the
248
+ * plugin initialises. Subsequent reloads in the same process are a
249
+ * no-op thanks to the `fetchWrapInstalled` flag.
250
+ */
251
+ function installFetchReasoningCleanup(logger: Logger): void {
252
+ if (fetchWrapInstalled) return;
253
+ const original = globalThis.fetch;
254
+ if (typeof original !== "function") {
255
+ logger.warn("bizar: globalThis.fetch is not a function; reasoning-clean wrap skipped");
256
+ return;
257
+ }
258
+ const wrapped = wrapFetchForReasoningCleanup(
259
+ original.bind(globalThis) as FetchLike,
260
+ {
261
+ debug: (msg) => logger.debug(msg),
262
+ },
263
+ );
264
+ globalThis.fetch = wrapped as typeof globalThis.fetch;
265
+ fetchWrapInstalled = true;
266
+ logger.info("bizar: reasoning-clean fetch wrap installed (openrouter/minimax)");
267
+ }
268
+
226
269
  // --- Plugin entry point ---------------------------------------------------
227
270
 
228
271
  /**
@@ -319,6 +362,16 @@ async function init(
319
362
  logger.warn(`bizar: ${note}`);
320
363
  }
321
364
 
365
+ // v0.6.2 — Reasoning directive. Wrap globalThis.fetch so that inline
366
+ // ``...</think>` blocks in chat completions responses
367
+ // from openrouter/minimax providers are stripped from `content` even
368
+ // when the model also emits structured reasoning. The `config` hook
369
+ // in the opencode plugin API is declared in the SDK type but does NOT
370
+ // fire in 1.17.9 (confirmed via debug probe 2026-06-24), so we wrap
371
+ // fetch globally as a fallback. Idempotent — only the first call in
372
+ // this process actually wraps.
373
+ installFetchReasoningCleanup(logger);
374
+
322
375
  const stateStore = new StateStore(options.stateDir, logger);
323
376
  const settingsStore = new SettingsStore(options.stateDir, logger);
324
377
  const logWriter = new LogWriter(options.logDir, options.logRotationBytes, logger);
@@ -758,22 +811,30 @@ function buildHooks(ctx: RuntimeContext, bg: BgDeps): Hooks {
758
811
  // sees the same thinking text twice — once in the proper panel and
759
812
  // again as visible message text below it.
760
813
  //
761
- // The opencode plugin API in this version does NOT trigger a
762
- // `config` hook (the `wrap-fetch` workaround from v0.6.1 is dead
763
- // code in current builds), so we cannot post-process the response
764
- // stream. The only working hooks that can help are:
814
+ // Defence in depth (three layers, in order of impact):
815
+ //
816
+ // 1. `installFetchReasoningCleanup` (init-time) wraps
817
+ // `globalThis.fetch` with `wrapFetchForReasoningCleanup` from
818
+ // `src/reasoning-clean.ts`. The wrap strips the inline ``
819
+ // blocks from chat-completions responses to `openrouter` /
820
+ // `minimax` while leaving the structured reasoning fields
821
+ // alone. This is the only layer that fixes the CURRENT
822
+ // response in-flight. The opencode plugin API in 1.17.9 declares
823
+ // a `config` hook in the SDK type but does not actually fire it
824
+ // (confirmed via debug probe 2026-06-24), so we wrap fetch
825
+ // globally instead.
765
826
  //
766
- // 1. `experimental.chat.system.transform` — runs every turn; we
827
+ // 2. `experimental.chat.system.transform` — runs every turn; we
767
828
  // push a directive telling the model to put thinking in the
768
829
  // structured field only.
769
- // 2. `experimental.chat.messages.transform` — runs before each
830
+ //
831
+ // 3. `experimental.chat.messages.transform` — runs before each
770
832
  // request; we strip `` blocks from previous assistant
771
833
  // messages so the model sees clean history and is less likely
772
834
  // to keep emitting inline ``.
773
835
  //
774
- // Neither fixes the CURRENT response (the model has already
775
- // returned), but together they strongly reduce — and in many cases
776
- // eliminate — the duplication on subsequent turns.
836
+ // Layers 2 and 3 reduce the frequency of the leak; layer 1 strips
837
+ // any leak that still slips through.
777
838
  const REASONING_DIRECTIVE_MARKER = "BIZAR_REASONING_DIRECTIVE_v0.6.2";
778
839
  const REASONING_DIRECTIVE = [
779
840
  REASONING_DIRECTIVE_MARKER,
@@ -904,6 +965,40 @@ function buildHooks(ctx: RuntimeContext, bg: BgDeps): Hooks {
904
965
  }
905
966
  },
906
967
 
968
+ // v0.6.2 — Reasoning directive. Strip inline `` blocks
969
+ // from the FINAL text of each completed assistant text part. This is
970
+ // the post-processing layer that fixes the CURRENT response in cases
971
+ // where the model emits its chain-of-thought in BOTH the structured
972
+ // `reasoning` field AND inline in `content` (the M3-via-OpenRouter
973
+ // leak). opencode's openrouter SDK does not strip the inline blocks,
974
+ // so we do it here at the boundary between the SDK output and the
975
+ // UI rendering. The `config` hook that the SDK type declares for
976
+ // fetch-level wrapping does NOT fire in 1.17.9, and the AI SDK
977
+ // uses `Bun.fetch` (read-only) rather than `globalThis.fetch`, so a
978
+ // fetch wrap is a no-op in this runtime. `experimental.text.complete`
979
+ // is the working alternative — it runs on every completed text
980
+ // part, with mutable `output.text`. Idempotent: stripping already-
981
+ // cleaned text is a no-op.
982
+ "experimental.text.complete": async (input, output) => {
983
+ try {
984
+ const original = output.text;
985
+ if (typeof original !== "string" || !original.includes("<think>")) return;
986
+ const cleaned = stripInlineThinkBlocks(original);
987
+ if (cleaned !== original) {
988
+ output.text = cleaned;
989
+ ctx.logger.debug(
990
+ `bizar: text.complete stripped think blocks (session=${input.sessionID} message=${input.messageID} part=${input.partID} ${original.length}→${cleaned.length}B)`,
991
+ );
992
+ }
993
+ } catch (err) {
994
+ ctx.logger.warn(
995
+ `bizar: text.complete failed (passing through): ${
996
+ err instanceof Error ? err.message : String(err)
997
+ }`,
998
+ );
999
+ }
1000
+ },
1001
+
907
1002
  // §3.1, §4.5.1 — event: track session boundaries. We do NOT create
908
1003
  // the state file here (canonical lifecycle: file is created at the
909
1004
  // `chat.message` seed, per spec §4.5.1).
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polderlabs/bizar-plugin",
3
- "version": "0.8.1",
3
+ "version": "0.8.3",
4
4
  "description": "Bizar opencode plugin — loop detection, status reporting, handoff signal, background agents, and slash commands + visual plan flow for subagent activity",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
@@ -11,7 +11,7 @@
11
11
  "scripts": {
12
12
  "check:imports": "bash scripts/check-forbidden-imports.sh",
13
13
  "typecheck": "tsc --noEmit",
14
- "test": "npm run check:imports && bun test tests/loop.test.ts tests/block.test.ts tests/stall-think.test.ts tests/tools/bg-get-comments.test.ts tests/tools/opencode-runner.test.ts tests/settings.test.ts tests/commands.test.ts tests/commands-impl.test.ts tests/tools/plan-action.test.ts tests/tools/wait-for-feedback.test.ts"
14
+ "test": "npm run check:imports && bun test tests/loop.test.ts tests/block.test.ts tests/stall-think.test.ts tests/tools/bg-get-comments.test.ts tests/tools/opencode-runner.test.ts tests/settings.test.ts tests/commands.test.ts tests/commands-impl.test.ts tests/tools/plan-action.test.ts tests/tools/wait-for-feedback.test.ts tests/reasoning-clean.test.ts"
15
15
  },
16
16
  "keywords": [
17
17
  "opencode",
@@ -37,10 +37,35 @@
37
37
  * is forwarded unchanged — this wrapper must never break a chat.
38
38
  */
39
39
 
40
- const THINK_OPEN = "<think>" as const;
41
- const THINK_CLOSE = "</think>" as const;
40
+ // All known inline think-style tag names. Each name pairs with itself
41
+ // for the close tag (e.g. `` matches ``, `<thinking>` matches
42
+ // `</thinking>`, etc.). The order does not matter for matching — we
43
+ // search for the earliest occurrence of any of them.
44
+ //
45
+ // The model emits `` (most common) and `<thinking>` (the original
46
+ // dashboard fix targeted this one). `<reasoning>` and `<ant_thinking>`
47
+ // are included for forward compatibility with other providers that use
48
+ // the same anti-slop pattern.
49
+ const THINK_TAG_NAMES = ["think", "thinking", "reasoning", "ant_thinking"] as const;
50
+ type ThinkTagName = (typeof THINK_TAG_NAMES)[number];
42
51
 
43
- type FetchLike = (input: Parameters<typeof fetch>[0], init?: RequestInit) => Promise<Response>;
52
+ /** Map from open-tag prefix (without `>`) to its matching close tag. */
53
+ const THINK_OPEN_TO_CLOSE: ReadonlyMap<string, string> = new Map(
54
+ THINK_TAG_NAMES.map((n) => [`<${n}`, `</${n}>`] as const),
55
+ );
56
+ /** All open-tag prefixes — used by the streaming state machine. */
57
+ const ALL_OPENS: readonly string[] = Array.from(THINK_OPEN_TO_CLOSE.keys());
58
+ /** Regex form, used by the non-streaming strip. Backreference matches
59
+ * the open-tag name to the close tag. */
60
+ const THINK_TAG_RE = new RegExp(
61
+ `<(${THINK_TAG_NAMES.join("|")})\\b[^>]*>[\\s\\S]*?</\\1>\\s*`,
62
+ "gi",
63
+ );
64
+
65
+ export type FetchLike = (
66
+ input: Parameters<typeof fetch>[0],
67
+ init?: RequestInit,
68
+ ) => Promise<Response>;
44
69
 
45
70
  export interface ReasoningCleanOptions {
46
71
  /** Extra logger for debug lines; defaults to no-op. */
@@ -55,14 +80,16 @@ export interface ReasoningCleanOptions {
55
80
  const DEFAULT_PROVIDERS = new Set(["openrouter", "minimax"]);
56
81
 
57
82
  /**
58
- * Strip ``...</think>`` blocks from a plain string. Used for
83
+ * Strip inline think-style blocks (`<think>…</think>`,
84
+ * `<thinking>…</thinking>`, `<reasoning>…</reasoning>`,
85
+ * `<ant_thinking>…</ant_thinking>`) from a plain string. Used for
59
86
  * non-streaming responses (or for accumulated streamed content).
60
87
  *
61
- * The trailing whitespace after `</think>` is also consumed so the
88
+ * The trailing whitespace after the close tag is also consumed so the
62
89
  * cleaned content does not start with an extra blank line.
63
90
  */
64
91
  export function stripInlineThinkBlocks(content: string): string {
65
- return content.replace(/<think>[\s\S]*?<\/think>\s*/g, "");
92
+ return content.replace(THINK_TAG_RE, "");
66
93
  }
67
94
 
68
95
  /**
@@ -72,9 +99,46 @@ export function stripInlineThinkBlocks(content: string): string {
72
99
  class ThinkStripper {
73
100
  private state: "NORMAL" | "IN_THINK" = "NORMAL";
74
101
  // Buffer of characters that may be the start of a marker but are not
75
- // yet complete. Holds at most max(THINK_OPEN.length, THINK_CLOSE.length)
76
- // characters from a chunk boundary.
102
+ // yet complete. Holds at most max(open.length, close.length) chars
103
+ // from a chunk boundary.
77
104
  private pending = "";
105
+ // The close tag we are looking for while IN_THINK. Set when we find
106
+ // an open, cleared when we find the matching close. Each open tag
107
+ // has its own close tag (e.g. `` pairs with ``, not ``).
108
+ private activeClose: string | null = null;
109
+
110
+ /**
111
+ * Find the earliest valid open-tag prefix in `input`. A valid match
112
+ * is `<tagname` followed by `>`, whitespace, or end-of-string — so we
113
+ * don't accidentally match `` as a substring of `<thinking>`.
114
+ */
115
+ private findOpen(input: string): { idx: number; open: string } | null {
116
+ let best: { idx: number; open: string } | null = null;
117
+ for (const open of ALL_OPENS) {
118
+ let from = 0;
119
+ while (from < input.length) {
120
+ const idx = input.indexOf(open, from);
121
+ if (idx === -1) break;
122
+ const nextPos = idx + open.length;
123
+ const nextCh = nextPos < input.length ? input.charAt(nextPos) : "";
124
+ const isBoundary =
125
+ nextCh === ">" ||
126
+ nextCh === " " ||
127
+ nextCh === "\t" ||
128
+ nextCh === "\n" ||
129
+ nextCh === "\r" ||
130
+ nextCh === "";
131
+ if (isBoundary) {
132
+ if (best === null || idx < best.idx) {
133
+ best = { idx, open };
134
+ }
135
+ break;
136
+ }
137
+ from = idx + 1;
138
+ }
139
+ }
140
+ return best;
141
+ }
78
142
 
79
143
  push(chunk: string): string {
80
144
  if (chunk.length === 0) return "";
@@ -84,31 +148,39 @@ class ThinkStripper {
84
148
 
85
149
  while (input.length > 0) {
86
150
  if (this.state === "NORMAL") {
87
- const idx = input.indexOf(THINK_OPEN);
88
- if (idx === -1) {
151
+ const found = this.findOpen(input);
152
+ if (found === null) {
89
153
  // No open marker; might have a partial at the tail.
90
- const tail = keepPartialTail(input, [THINK_OPEN]);
154
+ const tail = keepPartialTail(input, ALL_OPENS);
91
155
  out += input.slice(0, input.length - tail.length);
92
156
  this.pending = tail;
93
157
  input = "";
94
158
  break;
95
159
  }
96
- out += input.slice(0, idx);
97
- input = input.slice(idx + THINK_OPEN.length);
160
+ out += input.slice(0, found.idx);
161
+ input = input.slice(found.idx + found.open.length);
162
+ this.activeClose = THINK_OPEN_TO_CLOSE.get(found.open) ?? null;
98
163
  this.state = "IN_THINK";
99
164
  } else {
100
165
  // IN_THINK
101
- const idx = input.indexOf(THINK_CLOSE);
166
+ const closeTag = this.activeClose;
167
+ if (closeTag === null) {
168
+ // Defensive: should never happen, but recover gracefully.
169
+ this.state = "NORMAL";
170
+ break;
171
+ }
172
+ const idx = input.indexOf(closeTag);
102
173
  if (idx === -1) {
103
174
  // Still inside a think block; might have a partial close at tail.
104
- const tail = keepPartialTail(input, [THINK_CLOSE]);
175
+ const tail = keepPartialTail(input, [closeTag]);
105
176
  // Discard everything except the possible partial tail.
106
177
  this.pending = tail;
107
178
  input = "";
108
179
  break;
109
180
  }
110
- input = input.slice(idx + THINK_CLOSE.length);
181
+ input = input.slice(idx + closeTag.length);
111
182
  this.state = "NORMAL";
183
+ this.activeClose = null;
112
184
  // Drop any whitespace that immediately follows the close tag so
113
185
  // the next emitted content does not start with extra blank lines.
114
186
  const wsMatch = input.match(/^\s*/);
@@ -126,6 +198,7 @@ class ThinkStripper {
126
198
  this.pending = "";
127
199
  if (this.state === "IN_THINK") {
128
200
  this.state = "NORMAL";
201
+ this.activeClose = null;
129
202
  return tail;
130
203
  }
131
204
  return tail;
@@ -185,7 +258,7 @@ function cleanNonStreamingJson(text: string): string {
185
258
  let touched = false;
186
259
  for (const choice of choices) {
187
260
  const msg = choice?.message;
188
- if (msg && typeof msg.content === "string" && msg.content.includes(THINK_OPEN)) {
261
+ if (msg && typeof msg.content === "string" && contentHasAnyThinkOpen(msg.content)) {
189
262
  const cleaned = stripInlineThinkBlocks(msg.content);
190
263
  if (cleaned !== msg.content) {
191
264
  msg.content = cleaned;
@@ -196,6 +269,16 @@ function cleanNonStreamingJson(text: string): string {
196
269
  return touched ? JSON.stringify(data) : text;
197
270
  }
198
271
 
272
+ /** Cheap fast-path check: does `content` contain any of the known
273
+ * think-tag open prefixes? Avoids invoking the (more expensive) full
274
+ * regex on responses that obviously don't need cleaning. */
275
+ function contentHasAnyThinkOpen(content: string): boolean {
276
+ for (const open of ALL_OPENS) {
277
+ if (content.includes(open)) return true;
278
+ }
279
+ return false;
280
+ }
281
+
199
282
  /**
200
283
  * Process one SSE event line of the form `data: <payload>`. Mutates the
201
284
  * decoded payload in place to strip inline think blocks from
@@ -343,18 +426,29 @@ export function wrapFetchForReasoningCleanup(
343
426
  });
344
427
  }
345
428
  // Non-streaming JSON.
429
+ let text: string;
346
430
  try {
347
- const text = await response.text();
348
- const cleaned = cleanNonStreamingJson(text);
349
- if (cleaned === text) return response;
350
- return new Response(cleaned, {
351
- status: response.status,
352
- statusText: response.statusText,
353
- headers: response.headers,
354
- });
431
+ text = await response.text();
355
432
  } catch (err) {
356
- debug?.(`reasoning-clean: clean failed, passing through: ${(err as Error).message}`);
433
+ debug?.(`reasoning-clean: read body failed, passing through: ${(err as Error).message}`);
357
434
  return response;
358
435
  }
436
+ let cleaned: string;
437
+ try {
438
+ cleaned = cleanNonStreamingJson(text);
439
+ } catch (err) {
440
+ debug?.(`reasoning-clean: parse failed, passing through original body: ${(err as Error).message}`);
441
+ // Re-wrap the original text in a fresh Response so the caller
442
+ // can read the body (we already consumed the original via
443
+ // .text()). The status/headers are preserved.
444
+ cleaned = text;
445
+ }
446
+ // Always return a fresh Response so the caller can read the body
447
+ // (the original `response` was consumed by `.text()`).
448
+ return new Response(cleaned, {
449
+ status: response.status,
450
+ statusText: response.statusText,
451
+ headers: response.headers,
452
+ });
359
453
  };
360
454
  }
@@ -5,7 +5,7 @@
5
5
  * 1. Every `bizar_*` tool registered in `plugins/bizar/index.ts`
6
6
  * is also present in `config/opencode.json` `tools: { ... }`.
7
7
  * 2. No `bizarre_*` (double-r) typos remain in `plugins/bizar/src/`.
8
- * 3. `plugins/bizar/package.json` version is `0.5.0`.
8
+ * 3. `plugins/bizar/package.json` version is `0.8.3`.
9
9
  */
10
10
 
11
11
  import { describe, test, expect } from "bun:test";
@@ -83,9 +83,9 @@ describe("config drift detection", () => {
83
83
  ).toEqual([]);
84
84
  });
85
85
 
86
- test("plugins/bizar/package.json version is 0.8.0", () => {
86
+ test("plugins/bizar/package.json version is 0.8.3", () => {
87
87
  const pkg = JSON.parse(readFileSync(PKG_JSON, "utf-8")) as { version?: string };
88
- expect(pkg.version).toBe("0.8.0");
88
+ expect(pkg.version).toBe("0.8.3");
89
89
  });
90
90
  });
91
91
 
@@ -0,0 +1,422 @@
1
+ /**
2
+ * reasoning-clean unit tests (v0.6.2).
3
+ *
4
+ * Covers the inline-think-block stripper used by the global fetch
5
+ * wrapper. The wrapper exists to defeat the M3-via-OpenRouter pattern
6
+ * where the model emits its chain-of-thought in BOTH the structured
7
+ * `reasoning` field AND inline in `message.content`. opencode's
8
+ * openrouter SDK renders the structured field as a separate "Thought"
9
+ * panel, but it does NOT strip the inline blocks — so the user sees
10
+ * the same thinking twice. The wrapper post-processes the response
11
+ * stream to drop the inline blocks.
12
+ *
13
+ * Tests here cover the pure functions in isolation (no opencode, no
14
+ * fetch, no networking):
15
+ * 1. `stripInlineThinkBlocks` — regex strip on a full string.
16
+ * 2. The streaming `ThinkStripper` state machine — verified via the
17
+ * `cleanSseLine` public path (since `ThinkStripper` itself is
18
+ * private). Cross-chunk boundaries are the interesting case.
19
+ * 3. `wrapFetchForReasoningCleanup` — provider routing, pass-through
20
+ * for non-chat-completions, and the actual JSON / SSE rewriting
21
+ * via a fake `fetch`.
22
+ *
23
+ * If the file grows beyond ~300 lines, split into multiple files
24
+ * (one per concern).
25
+ */
26
+
27
+ import { describe, test, expect } from "bun:test";
28
+
29
+ import {
30
+ stripInlineThinkBlocks,
31
+ wrapFetchForReasoningCleanup,
32
+ type FetchLike,
33
+ } from "../src/reasoning-clean.js";
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // stripInlineThinkBlocks — regex strip
37
+ // ---------------------------------------------------------------------------
38
+
39
+ describe("stripInlineThinkBlocks", () => {
40
+ test("strips <think>…</think>", () => {
41
+ expect(stripInlineThinkBlocks("<think>secret</think>public")).toBe("public");
42
+ });
43
+
44
+ test("strips <thinking>…</thinking> (the original dashboard target)", () => {
45
+ expect(stripInlineThinkBlocks("<thinking>secret</thinking>public")).toBe("public");
46
+ });
47
+
48
+ test("strips <reasoning>…</reasoning>", () => {
49
+ expect(stripInlineThinkBlocks("<reasoning>secret</reasoning>public")).toBe("public");
50
+ });
51
+
52
+ test("strips <ant_thinking>…</ant_thinking>", () => {
53
+ expect(stripInlineThinkBlocks("<ant_thinking>secret</ant_thinking>public")).toBe("public");
54
+ });
55
+
56
+ test("consumes trailing whitespace after the close tag", () => {
57
+ expect(stripInlineThinkBlocks("<think>x</think>\n\n public")).toBe("public");
58
+ });
59
+
60
+ test("does not treat <think> as a prefix of <thinking> when the next char is 'i'", () => {
61
+ // Regression: a naive indexOf("<think") would match the `<think` inside
62
+ // `<thinking>` and slice past 7 chars, leaving us mid-tag. The
63
+ // boundary check in the streaming state machine (findOpen) prevents
64
+ // this for the streaming case; the regex here uses `\b[^>]*>` to
65
+ // require a proper tag boundary, so it should leave `<thinking>` alone
66
+ // when the close tag is `</thinking>`.
67
+ const input = "<thinking>NOT STRIPPED</thinking>after";
68
+ expect(stripInlineThinkBlocks(input)).toBe("after");
69
+ });
70
+
71
+ test("handles attributes inside the open tag", () => {
72
+ expect(stripInlineThinkBlocks('<think> foo="bar" >secret</think>ok')).toBe("ok");
73
+ });
74
+
75
+ test("handles multiple blocks in one string", () => {
76
+ expect(
77
+ stripInlineThinkBlocks(
78
+ "a<think>x</think>b<thinking>y</thinking>c<reasoning>z</reasoning>d",
79
+ ),
80
+ ).toBe("abcd");
81
+ });
82
+
83
+ test("returns input unchanged when no think tags are present", () => {
84
+ const input = "just a normal response with no inline thinking";
85
+ expect(stripInlineThinkBlocks(input)).toBe(input);
86
+ });
87
+
88
+ test("preserves content that LOOKS like a think tag but is incomplete", () => {
89
+ // No closing tag → regex should not match (lazy quantifier needs a close).
90
+ expect(stripInlineThinkBlocks("<think>unfinished")).toBe("<think>unfinished");
91
+ });
92
+ });
93
+
94
+ // ---------------------------------------------------------------------------
95
+ // wrapFetchForReasoningCleanup — provider routing
96
+ // ---------------------------------------------------------------------------
97
+
98
+ /** A minimal fake `fetch` that returns a canned `Response` and records
99
+ * every URL it was called with. */
100
+ function makeFakeFetch(responder: (url: string) => Response): FetchLike & {
101
+ calls: string[];
102
+ } {
103
+ const calls: string[] = [];
104
+ const fn: FetchLike & { calls: string[] } = Object.assign(
105
+ async (input: Parameters<typeof fetch>[0], _init?: RequestInit) => {
106
+ const url =
107
+ typeof input === "string"
108
+ ? input
109
+ : input instanceof URL
110
+ ? input.toString()
111
+ : (input as Request).url;
112
+ calls.push(url);
113
+ return responder(url);
114
+ },
115
+ { calls },
116
+ );
117
+ return fn;
118
+ }
119
+
120
+ describe("wrapFetchForReasoningCleanup — provider routing", () => {
121
+ test("passes through non-chat-completions requests", async () => {
122
+ const fake = makeFakeFetch(
123
+ (url) =>
124
+ new Response("not a chat completion", { status: 200, headers: { "content-type": "text/plain" } }),
125
+ );
126
+ const wrapped = wrapFetchForReasoningCleanup(fake, {
127
+ providers: ["openrouter"],
128
+ });
129
+ const res = await wrapped("https://example.com/some/other/endpoint");
130
+ expect(await res.text()).toBe("not a chat completion");
131
+ });
132
+
133
+ test("passes through chat-completions to a non-targeted provider", async () => {
134
+ const fake = makeFakeFetch(
135
+ () =>
136
+ new Response('{"choices":[{"message":{"content":"<think>x</think>hi"}}]}', {
137
+ status: 200,
138
+ headers: { "content-type": "application/json" },
139
+ }),
140
+ );
141
+ const wrapped = wrapFetchForReasoningCleanup(fake, {
142
+ providers: ["openrouter"],
143
+ });
144
+ // Anthropic endpoint — not in the providers list, so no cleaning.
145
+ const res = await wrapped("https://api.anthropic.com/v1/chat/completions", { method: "POST" });
146
+ const body = await res.text();
147
+ expect(body).toContain("<think>x</think>"); // unchanged
148
+ });
149
+
150
+ test("intercepts chat-completions to the targeted provider (openrouter)", async () => {
151
+ const fake = makeFakeFetch(
152
+ () =>
153
+ new Response('{"choices":[{"message":{"content":"<think>x</think>hi"}}]}', {
154
+ status: 200,
155
+ headers: { "content-type": "application/json" },
156
+ }),
157
+ );
158
+ const wrapped = wrapFetchForReasoningCleanup(fake, {
159
+ providers: ["openrouter"],
160
+ });
161
+ const res = await wrapped(
162
+ "https://openrouter.ai/api/v1/chat/completions",
163
+ { method: "POST" },
164
+ );
165
+ const body = await res.text();
166
+ expect(body).not.toContain("<think>");
167
+ expect(body).toContain('"content":"hi"');
168
+ });
169
+
170
+ test("intercepts chat-completions to the targeted provider (minimax)", async () => {
171
+ const fake = makeFakeFetch(
172
+ () =>
173
+ new Response('{"choices":[{"message":{"content":"<thinking>x</thinking>hi"}}]}', {
174
+ status: 200,
175
+ headers: { "content-type": "application/json" },
176
+ }),
177
+ );
178
+ const wrapped = wrapFetchForReasoningCleanup(fake, {
179
+ providers: ["minimax"],
180
+ });
181
+ const res = await wrapped("https://minimax.io/v1/chat/completions", {
182
+ method: "POST",
183
+ });
184
+ const body = await res.text();
185
+ expect(body).not.toContain("<thinking>");
186
+ expect(body).toContain('"content":"hi"');
187
+ });
188
+ });
189
+
190
+ // ---------------------------------------------------------------------------
191
+ // wrapFetchForReasoningCleanup — non-streaming JSON rewriting
192
+ // ---------------------------------------------------------------------------
193
+
194
+ describe("wrapFetchForReasoningCleanup — non-streaming JSON", () => {
195
+ test("strips think blocks from a single choice", async () => {
196
+ const fake = makeFakeFetch(
197
+ () =>
198
+ new Response(
199
+ JSON.stringify({
200
+ choices: [
201
+ {
202
+ message: {
203
+ role: "assistant",
204
+ content: "<think>step 1\nstep 2</think>The answer is 42.",
205
+ },
206
+ },
207
+ ],
208
+ }),
209
+ { status: 200, headers: { "content-type": "application/json" } },
210
+ ),
211
+ );
212
+ const wrapped = wrapFetchForReasoningCleanup(fake);
213
+ const res = await wrapped(
214
+ "https://openrouter.ai/api/v1/chat/completions",
215
+ { method: "POST" },
216
+ );
217
+ const body = JSON.parse(await res.text());
218
+ expect(body.choices[0].message.content).toBe("The answer is 42.");
219
+ });
220
+
221
+ test("preserves structured reasoning field while stripping inline blocks", async () => {
222
+ const fake = makeFakeFetch(
223
+ () =>
224
+ new Response(
225
+ JSON.stringify({
226
+ choices: [
227
+ {
228
+ message: {
229
+ role: "assistant",
230
+ reasoning: "the structured chain of thought",
231
+ reasoning_details: [{ type: "reasoning.text", text: "the structured chain of thought" }],
232
+ content: "<think>the same text inline</think>final answer",
233
+ },
234
+ },
235
+ ],
236
+ }),
237
+ { status: 200, headers: { "content-type": "application/json" } },
238
+ ),
239
+ );
240
+ const wrapped = wrapFetchForReasoningCleanup(fake);
241
+ const res = await wrapped(
242
+ "https://openrouter.ai/api/v1/chat/completions",
243
+ { method: "POST" },
244
+ );
245
+ const body = JSON.parse(await res.text());
246
+ expect(body.choices[0].message.reasoning).toBe("the structured chain of thought");
247
+ expect(body.choices[0].message.reasoning_details).toEqual([
248
+ { type: "reasoning.text", text: "the structured chain of thought" },
249
+ ]);
250
+ expect(body.choices[0].message.content).toBe("final answer");
251
+ });
252
+
253
+ test("returns the original response untouched when no think blocks are present", async () => {
254
+ const original = JSON.stringify({
255
+ choices: [{ message: { role: "assistant", content: "clean response" } }],
256
+ });
257
+ const fake = makeFakeFetch(
258
+ () =>
259
+ new Response(original, {
260
+ status: 200,
261
+ headers: { "content-type": "application/json" },
262
+ }),
263
+ );
264
+ const wrapped = wrapFetchForReasoningCleanup(fake);
265
+ const res = await wrapped(
266
+ "https://openrouter.ai/api/v1/chat/completions",
267
+ { method: "POST" },
268
+ );
269
+ expect(await res.text()).toBe(original);
270
+ });
271
+
272
+ test("forwards the response unchanged on JSON parse error (safety net)", async () => {
273
+ const fake = makeFakeFetch(
274
+ () =>
275
+ new Response("not json {{{", {
276
+ status: 200,
277
+ headers: { "content-type": "application/json" },
278
+ }),
279
+ );
280
+ const wrapped = wrapFetchForReasoningCleanup(fake);
281
+ const res = await wrapped(
282
+ "https://openrouter.ai/api/v1/chat/completions",
283
+ { method: "POST" },
284
+ );
285
+ expect(await res.text()).toBe("not json {{{");
286
+ });
287
+ });
288
+
289
+ // ---------------------------------------------------------------------------
290
+ // wrapFetchForReasoningCleanup — SSE streaming
291
+ // ---------------------------------------------------------------------------
292
+
293
+ /** Build an SSE Response body from a list of event payloads (without
294
+ * the `data: ` prefix — the prefix is added here for convenience). */
295
+ function sseResponse(events: string[], finalChoiceIndex = 0): Response {
296
+ const lines: string[] = [];
297
+ for (const payload of events) {
298
+ lines.push(`data: ${payload}`);
299
+ }
300
+ // Add a finish_reason on the last event so the stripper flushes.
301
+ lines.push(
302
+ `data: ${JSON.stringify({
303
+ choices: [{ index: finalChoiceIndex, delta: {}, finish_reason: "stop" }],
304
+ })}`,
305
+ );
306
+ lines.push("data: [DONE]");
307
+ const body = lines.join("\n\n") + "\n\n";
308
+ return new Response(body, {
309
+ status: 200,
310
+ headers: { "content-type": "text/event-stream" },
311
+ });
312
+ }
313
+
314
+ /** Read the full text of a (possibly transformed) response body. */
315
+ async function readBodyText(res: Response): Promise<string> {
316
+ if (!res.body) return "";
317
+ const reader = res.body.getReader();
318
+ const decoder = new TextDecoder();
319
+ let out = "";
320
+ while (true) {
321
+ const { value, done } = await reader.read();
322
+ if (done) break;
323
+ if (value) out += decoder.decode(value, { stream: true });
324
+ }
325
+ out += decoder.decode();
326
+ return out;
327
+ }
328
+
329
+ describe("wrapFetchForReasoningCleanup — SSE streaming", () => {
330
+ test("strips a complete think block split across multiple deltas", async () => {
331
+ // The model emits a `<think>...</think>` block across many deltas,
332
+ // then a clean final answer. After cleaning, only the final answer
333
+ // should remain in the SSE stream.
334
+ const fake = makeFakeFetch(() =>
335
+ sseResponse([
336
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "<think>" } }] }),
337
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "step 1. " } }] }),
338
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "step 2. " } }] }),
339
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "</think>" } }] }),
340
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "final answer" } }] }),
341
+ ]),
342
+ );
343
+ const wrapped = wrapFetchForReasoningCleanup(fake);
344
+ const res = await wrapped(
345
+ "https://openrouter.ai/api/v1/chat/completions",
346
+ { method: "POST" },
347
+ );
348
+ const body = await readBodyText(res);
349
+ expect(body).not.toContain("<think>");
350
+ expect(body).not.toContain("</think>");
351
+ expect(body).not.toContain("step 1.");
352
+ expect(body).not.toContain("step 2.");
353
+ expect(body).toContain("final answer");
354
+ });
355
+
356
+ test("strips a think block split ACROSS byte-level chunk boundaries", async () => {
357
+ // Simulate a real network: the SSE body is delivered as a stream
358
+ // of arbitrary byte chunks. The `<think>` open tag itself straddles
359
+ // two chunks, so the `streamTransformer` must buffer the partial
360
+ // first event until the `\n\n` boundary arrives in the second
361
+ // chunk, then run the full event through `cleanSseLine` and
362
+ // strip the think block.
363
+ const sseBody =
364
+ `data: {"choices":[{"index":0,"delta":{"content":"<th` +
365
+ `ink>step A. step B.</think>The answer is 7."}}]}\n\n` +
366
+ `data: {"choices":[{"index":0,"delta":{"content":""},"finish_reason":"stop"}]}\n\n` +
367
+ `data: [DONE]\n\n`;
368
+
369
+ // Split the body at byte offset 80 (which lands inside the `<th`
370
+ // open tag). The first chunk ends with the open tag half-written;
371
+ // the second chunk starts with the rest of the open tag and
372
+ // includes the `\n\n` boundary.
373
+ const splitAt = 80;
374
+ const chunk1 = sseBody.slice(0, splitAt);
375
+ const chunk2 = sseBody.slice(splitAt);
376
+
377
+ const fake = makeFakeFetch(
378
+ () =>
379
+ new Response(
380
+ new ReadableStream({
381
+ start(controller) {
382
+ controller.enqueue(new TextEncoder().encode(chunk1));
383
+ controller.enqueue(new TextEncoder().encode(chunk2));
384
+ controller.close();
385
+ },
386
+ }),
387
+ {
388
+ status: 200,
389
+ headers: { "content-type": "text/event-stream" },
390
+ },
391
+ ),
392
+ );
393
+ const wrapped = wrapFetchForReasoningCleanup(fake);
394
+ const res = await wrapped(
395
+ "https://openrouter.ai/api/v1/chat/completions",
396
+ { method: "POST" },
397
+ );
398
+ const body = await readBodyText(res);
399
+ expect(body).not.toContain("<think>");
400
+ expect(body).not.toContain("step A.");
401
+ expect(body).not.toContain("step B.");
402
+ expect(body).toContain("The answer is 7.");
403
+ });
404
+
405
+ test("strips <thinking> (not just <think>) in streaming mode", async () => {
406
+ const fake = makeFakeFetch(() =>
407
+ sseResponse([
408
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "<thinking>step</thinking>" } }] }),
409
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "after" } }] }),
410
+ ]),
411
+ );
412
+ const wrapped = wrapFetchForReasoningCleanup(fake);
413
+ const res = await wrapped(
414
+ "https://openrouter.ai/api/v1/chat/completions",
415
+ { method: "POST" },
416
+ );
417
+ const body = await readBodyText(res);
418
+ expect(body).not.toContain("<thinking>");
419
+ expect(body).not.toContain("</thinking>");
420
+ expect(body).toContain("after");
421
+ });
422
+ });