@polderlabs/bizar-plugin 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,10 +37,35 @@
37
37
  * is forwarded unchanged — this wrapper must never break a chat.
38
38
  */
39
39
 
40
- const THINK_OPEN = "<think>" as const;
41
- const THINK_CLOSE = "</think>" as const;
40
+ // All known inline think-style tag names. Each name pairs with itself
41
+ // for the close tag (e.g. `` matches ``, `<thinking>` matches
42
+ // `</thinking>`, etc.). The order does not matter for matching — we
43
+ // search for the earliest occurrence of any of them.
44
+ //
45
+ // The model emits `` (most common) and `<thinking>` (the original
46
+ // dashboard fix targeted this one). `<reasoning>` and `<ant_thinking>`
47
+ // are included for forward compatibility with other providers that use
48
+ // the same anti-slop pattern.
49
+ const THINK_TAG_NAMES = ["think", "thinking", "reasoning", "ant_thinking"] as const;
50
+ type ThinkTagName = (typeof THINK_TAG_NAMES)[number];
42
51
 
43
- type FetchLike = (input: Parameters<typeof fetch>[0], init?: RequestInit) => Promise<Response>;
52
+ /** Map from open-tag prefix (without `>`) to its matching close tag. */
53
+ const THINK_OPEN_TO_CLOSE: ReadonlyMap<string, string> = new Map(
54
+ THINK_TAG_NAMES.map((n) => [`<${n}`, `</${n}>`] as const),
55
+ );
56
+ /** All open-tag prefixes — used by the streaming state machine. */
57
+ const ALL_OPENS: readonly string[] = Array.from(THINK_OPEN_TO_CLOSE.keys());
58
+ /** Regex form, used by the non-streaming strip. Backreference matches
59
+ * the open-tag name to the close tag. */
60
+ const THINK_TAG_RE = new RegExp(
61
+ `<(${THINK_TAG_NAMES.join("|")})\\b[^>]*>[\\s\\S]*?</\\1>\\s*`,
62
+ "gi",
63
+ );
64
+
65
+ export type FetchLike = (
66
+ input: Parameters<typeof fetch>[0],
67
+ init?: RequestInit,
68
+ ) => Promise<Response>;
44
69
 
45
70
  export interface ReasoningCleanOptions {
46
71
  /** Extra logger for debug lines; defaults to no-op. */
@@ -55,14 +80,16 @@ export interface ReasoningCleanOptions {
55
80
  const DEFAULT_PROVIDERS = new Set(["openrouter", "minimax"]);
56
81
 
57
82
  /**
58
- * Strip ``...</think>`` blocks from a plain string. Used for
83
+ * Strip inline think-style blocks (`<think>…</think>`,
84
+ * `<thinking>…</thinking>`, `<reasoning>…</reasoning>`,
85
+ * `<ant_thinking>…</ant_thinking>`) from a plain string. Used for
59
86
  * non-streaming responses (or for accumulated streamed content).
60
87
  *
61
- * The trailing whitespace after `</think>` is also consumed so the
88
+ * The trailing whitespace after the close tag is also consumed so the
62
89
  * cleaned content does not start with an extra blank line.
63
90
  */
64
91
  export function stripInlineThinkBlocks(content: string): string {
65
- return content.replace(/<think>[\s\S]*?<\/think>\s*/g, "");
92
+ return content.replace(THINK_TAG_RE, "");
66
93
  }
67
94
 
68
95
  /**
@@ -72,9 +99,46 @@ export function stripInlineThinkBlocks(content: string): string {
72
99
  class ThinkStripper {
73
100
  private state: "NORMAL" | "IN_THINK" = "NORMAL";
74
101
  // Buffer of characters that may be the start of a marker but are not
75
- // yet complete. Holds at most max(THINK_OPEN.length, THINK_CLOSE.length)
76
- // characters from a chunk boundary.
102
+ // yet complete. Holds at most max(open.length, close.length) chars
103
+ // from a chunk boundary.
77
104
  private pending = "";
105
+ // The close tag we are looking for while IN_THINK. Set when we find
106
+ // an open, cleared when we find the matching close. Each open tag
107
+ // has its own close tag (e.g. `` pairs with ``, not ``).
108
+ private activeClose: string | null = null;
109
+
110
+ /**
111
+ * Find the earliest valid open-tag prefix in `input`. A valid match
112
+ * is `<tagname` followed by `>`, whitespace, or end-of-string — so we
113
+ * don't accidentally match `` as a substring of `<thinking>`.
114
+ */
115
+ private findOpen(input: string): { idx: number; open: string } | null {
116
+ let best: { idx: number; open: string } | null = null;
117
+ for (const open of ALL_OPENS) {
118
+ let from = 0;
119
+ while (from < input.length) {
120
+ const idx = input.indexOf(open, from);
121
+ if (idx === -1) break;
122
+ const nextPos = idx + open.length;
123
+ const nextCh = nextPos < input.length ? input.charAt(nextPos) : "";
124
+ const isBoundary =
125
+ nextCh === ">" ||
126
+ nextCh === " " ||
127
+ nextCh === "\t" ||
128
+ nextCh === "\n" ||
129
+ nextCh === "\r" ||
130
+ nextCh === "";
131
+ if (isBoundary) {
132
+ if (best === null || idx < best.idx) {
133
+ best = { idx, open };
134
+ }
135
+ break;
136
+ }
137
+ from = idx + 1;
138
+ }
139
+ }
140
+ return best;
141
+ }
78
142
 
79
143
  push(chunk: string): string {
80
144
  if (chunk.length === 0) return "";
@@ -84,31 +148,39 @@ class ThinkStripper {
84
148
 
85
149
  while (input.length > 0) {
86
150
  if (this.state === "NORMAL") {
87
- const idx = input.indexOf(THINK_OPEN);
88
- if (idx === -1) {
151
+ const found = this.findOpen(input);
152
+ if (found === null) {
89
153
  // No open marker; might have a partial at the tail.
90
- const tail = keepPartialTail(input, [THINK_OPEN]);
154
+ const tail = keepPartialTail(input, ALL_OPENS);
91
155
  out += input.slice(0, input.length - tail.length);
92
156
  this.pending = tail;
93
157
  input = "";
94
158
  break;
95
159
  }
96
- out += input.slice(0, idx);
97
- input = input.slice(idx + THINK_OPEN.length);
160
+ out += input.slice(0, found.idx);
161
+ input = input.slice(found.idx + found.open.length);
162
+ this.activeClose = THINK_OPEN_TO_CLOSE.get(found.open) ?? null;
98
163
  this.state = "IN_THINK";
99
164
  } else {
100
165
  // IN_THINK
101
- const idx = input.indexOf(THINK_CLOSE);
166
+ const closeTag = this.activeClose;
167
+ if (closeTag === null) {
168
+ // Defensive: should never happen, but recover gracefully.
169
+ this.state = "NORMAL";
170
+ break;
171
+ }
172
+ const idx = input.indexOf(closeTag);
102
173
  if (idx === -1) {
103
174
  // Still inside a think block; might have a partial close at tail.
104
- const tail = keepPartialTail(input, [THINK_CLOSE]);
175
+ const tail = keepPartialTail(input, [closeTag]);
105
176
  // Discard everything except the possible partial tail.
106
177
  this.pending = tail;
107
178
  input = "";
108
179
  break;
109
180
  }
110
- input = input.slice(idx + THINK_CLOSE.length);
181
+ input = input.slice(idx + closeTag.length);
111
182
  this.state = "NORMAL";
183
+ this.activeClose = null;
112
184
  // Drop any whitespace that immediately follows the close tag so
113
185
  // the next emitted content does not start with extra blank lines.
114
186
  const wsMatch = input.match(/^\s*/);
@@ -126,6 +198,7 @@ class ThinkStripper {
126
198
  this.pending = "";
127
199
  if (this.state === "IN_THINK") {
128
200
  this.state = "NORMAL";
201
+ this.activeClose = null;
129
202
  return tail;
130
203
  }
131
204
  return tail;
@@ -185,7 +258,7 @@ function cleanNonStreamingJson(text: string): string {
185
258
  let touched = false;
186
259
  for (const choice of choices) {
187
260
  const msg = choice?.message;
188
- if (msg && typeof msg.content === "string" && msg.content.includes(THINK_OPEN)) {
261
+ if (msg && typeof msg.content === "string" && contentHasAnyThinkOpen(msg.content)) {
189
262
  const cleaned = stripInlineThinkBlocks(msg.content);
190
263
  if (cleaned !== msg.content) {
191
264
  msg.content = cleaned;
@@ -196,6 +269,16 @@ function cleanNonStreamingJson(text: string): string {
196
269
  return touched ? JSON.stringify(data) : text;
197
270
  }
198
271
 
272
+ /** Cheap fast-path check: does `content` contain any of the known
273
+ * think-tag open prefixes? Avoids invoking the (more expensive) full
274
+ * regex on responses that obviously don't need cleaning. */
275
+ function contentHasAnyThinkOpen(content: string): boolean {
276
+ for (const open of ALL_OPENS) {
277
+ if (content.includes(open)) return true;
278
+ }
279
+ return false;
280
+ }
281
+
199
282
  /**
200
283
  * Process one SSE event line of the form `data: <payload>`. Mutates the
201
284
  * decoded payload in place to strip inline think blocks from
@@ -343,18 +426,29 @@ export function wrapFetchForReasoningCleanup(
343
426
  });
344
427
  }
345
428
  // Non-streaming JSON.
429
+ let text: string;
346
430
  try {
347
- const text = await response.text();
348
- const cleaned = cleanNonStreamingJson(text);
349
- if (cleaned === text) return response;
350
- return new Response(cleaned, {
351
- status: response.status,
352
- statusText: response.statusText,
353
- headers: response.headers,
354
- });
431
+ text = await response.text();
355
432
  } catch (err) {
356
- debug?.(`reasoning-clean: clean failed, passing through: ${(err as Error).message}`);
433
+ debug?.(`reasoning-clean: read body failed, passing through: ${(err as Error).message}`);
357
434
  return response;
358
435
  }
436
+ let cleaned: string;
437
+ try {
438
+ cleaned = cleanNonStreamingJson(text);
439
+ } catch (err) {
440
+ debug?.(`reasoning-clean: parse failed, passing through original body: ${(err as Error).message}`);
441
+ // Re-wrap the original text in a fresh Response so the caller
442
+ // can read the body (we already consumed the original via
443
+ // .text()). The status/headers are preserved.
444
+ cleaned = text;
445
+ }
446
+ // Always return a fresh Response so the caller can read the body
447
+ // (the original `response` was consumed by `.text()`).
448
+ return new Response(cleaned, {
449
+ status: response.status,
450
+ statusText: response.statusText,
451
+ headers: response.headers,
452
+ });
359
453
  };
360
454
  }
@@ -88,7 +88,7 @@ function makeDraft(overrides: Partial<BackgroundState> = {}): BackgroundState {
88
88
  agent: "mimir",
89
89
  status: "pending",
90
90
  startedAt: Date.now(),
91
- model: "openrouter/minimax-m3",
91
+ model: "minimax/minimax-m3",
92
92
  promptPreview: "test",
93
93
  resultPreview: undefined,
94
94
  resultMessageIds: [],
@@ -124,6 +124,7 @@ describe("InstanceManager.add — empty sessionId (BUGFIX v0.5.1)", () => {
124
124
  warn: () => {},
125
125
  error: () => {},
126
126
  } as never,
127
+ worktree: os.tmpdir(),
127
128
  serve: { worktree: os.tmpdir() } as never,
128
129
  http: {} as never,
129
130
  stream: stream as never,
@@ -40,7 +40,7 @@ function makeState(overrides: Partial<BackgroundState> = {}): BackgroundState {
40
40
  agent: "mimir",
41
41
  status: "running",
42
42
  startedAt: Date.now(),
43
- model: "openrouter/minimax-m3",
43
+ model: "minimax/minimax-m3",
44
44
  promptPreview: "Do the thing",
45
45
  resultPreview: undefined,
46
46
  resultMessageIds: [],
@@ -22,7 +22,7 @@ function makeBgState(overrides: Partial<BackgroundState> = {}): BackgroundState
22
22
  agent: "mimir",
23
23
  status: "pending",
24
24
  startedAt: Date.now(),
25
- model: "openrouter/minimax-m3",
25
+ model: "minimax/minimax-m3",
26
26
  promptPreview: "Do the thing",
27
27
  resultPreview: undefined,
28
28
  resultMessageIds: [],
@@ -0,0 +1,422 @@
1
+ /**
2
+ * reasoning-clean unit tests (v0.6.2).
3
+ *
4
+ * Covers the inline-think-block stripper used by the global fetch
5
+ * wrapper. The wrapper exists to defeat the M3-via-OpenRouter pattern
6
+ * where the model emits its chain-of-thought in BOTH the structured
7
+ * `reasoning` field AND inline in `message.content`. opencode's
8
+ * openrouter SDK renders the structured field as a separate "Thought"
9
+ * panel, but it does NOT strip the inline blocks — so the user sees
10
+ * the same thinking twice. The wrapper post-processes the response
11
+ * stream to drop the inline blocks.
12
+ *
13
+ * Tests here cover the pure functions in isolation (no opencode, no
14
+ * fetch, no networking):
15
+ * 1. `stripInlineThinkBlocks` — regex strip on a full string.
16
+ * 2. The streaming `ThinkStripper` state machine — verified via the
17
+ * `cleanSseLine` public path (since `ThinkStripper` itself is
18
+ * private). Cross-chunk boundaries are the interesting case.
19
+ * 3. `wrapFetchForReasoningCleanup` — provider routing, pass-through
20
+ * for non-chat-completions, and the actual JSON / SSE rewriting
21
+ * via a fake `fetch`.
22
+ *
23
+ * If the file grows beyond ~300 lines, split into multiple files
24
+ * (one per concern).
25
+ */
26
+
27
+ import { describe, test, expect } from "bun:test";
28
+
29
+ import {
30
+ stripInlineThinkBlocks,
31
+ wrapFetchForReasoningCleanup,
32
+ type FetchLike,
33
+ } from "../src/reasoning-clean.js";
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // stripInlineThinkBlocks — regex strip
37
+ // ---------------------------------------------------------------------------
38
+
39
+ describe("stripInlineThinkBlocks", () => {
40
+ test("strips <think>…</think>", () => {
41
+ expect(stripInlineThinkBlocks("<think>secret</think>public")).toBe("public");
42
+ });
43
+
44
+ test("strips <thinking>…</thinking> (the original dashboard target)", () => {
45
+ expect(stripInlineThinkBlocks("<thinking>secret</thinking>public")).toBe("public");
46
+ });
47
+
48
+ test("strips <reasoning>…</reasoning>", () => {
49
+ expect(stripInlineThinkBlocks("<reasoning>secret</reasoning>public")).toBe("public");
50
+ });
51
+
52
+ test("strips <ant_thinking>…</ant_thinking>", () => {
53
+ expect(stripInlineThinkBlocks("<ant_thinking>secret</ant_thinking>public")).toBe("public");
54
+ });
55
+
56
+ test("consumes trailing whitespace after the close tag", () => {
57
+ expect(stripInlineThinkBlocks("<think>x</think>\n\n public")).toBe("public");
58
+ });
59
+
60
+ test("does not treat <think> as a prefix of <thinking> when the next char is 'i'", () => {
61
+ // Regression: a naive indexOf("<think") would match the `<think` inside
62
+ // `<thinking>` and slice past 7 chars, leaving us mid-tag. The
63
+ // boundary check in the streaming state machine (findOpen) prevents
64
+ // this for the streaming case; the regex here uses `\b[^>]*>` to
65
+ // require a proper tag boundary, so it should leave `<thinking>` alone
66
+ // when the close tag is `</thinking>`.
67
+ const input = "<thinking>NOT STRIPPED</thinking>after";
68
+ expect(stripInlineThinkBlocks(input)).toBe("after");
69
+ });
70
+
71
+ test("handles attributes inside the open tag", () => {
72
+ expect(stripInlineThinkBlocks('<think> foo="bar" >secret</think>ok')).toBe("ok");
73
+ });
74
+
75
+ test("handles multiple blocks in one string", () => {
76
+ expect(
77
+ stripInlineThinkBlocks(
78
+ "a<think>x</think>b<thinking>y</thinking>c<reasoning>z</reasoning>d",
79
+ ),
80
+ ).toBe("abcd");
81
+ });
82
+
83
+ test("returns input unchanged when no think tags are present", () => {
84
+ const input = "just a normal response with no inline thinking";
85
+ expect(stripInlineThinkBlocks(input)).toBe(input);
86
+ });
87
+
88
+ test("preserves content that LOOKS like a think tag but is incomplete", () => {
89
+ // No closing tag → regex should not match (lazy quantifier needs a close).
90
+ expect(stripInlineThinkBlocks("<think>unfinished")).toBe("<think>unfinished");
91
+ });
92
+ });
93
+
94
+ // ---------------------------------------------------------------------------
95
+ // wrapFetchForReasoningCleanup — provider routing
96
+ // ---------------------------------------------------------------------------
97
+
98
+ /** A minimal fake `fetch` that returns a canned `Response` and records
99
+ * every URL it was called with. */
100
+ function makeFakeFetch(responder: (url: string) => Response): FetchLike & {
101
+ calls: string[];
102
+ } {
103
+ const calls: string[] = [];
104
+ const fn: FetchLike & { calls: string[] } = Object.assign(
105
+ async (input: Parameters<typeof fetch>[0], _init?: RequestInit) => {
106
+ const url =
107
+ typeof input === "string"
108
+ ? input
109
+ : input instanceof URL
110
+ ? input.toString()
111
+ : (input as Request).url;
112
+ calls.push(url);
113
+ return responder(url);
114
+ },
115
+ { calls },
116
+ );
117
+ return fn;
118
+ }
119
+
120
+ describe("wrapFetchForReasoningCleanup — provider routing", () => {
121
+ test("passes through non-chat-completions requests", async () => {
122
+ const fake = makeFakeFetch(
123
+ (url) =>
124
+ new Response("not a chat completion", { status: 200, headers: { "content-type": "text/plain" } }),
125
+ );
126
+ const wrapped = wrapFetchForReasoningCleanup(fake, {
127
+ providers: ["openrouter"],
128
+ });
129
+ const res = await wrapped("https://example.com/some/other/endpoint");
130
+ expect(await res.text()).toBe("not a chat completion");
131
+ });
132
+
133
+ test("passes through chat-completions to a non-targeted provider", async () => {
134
+ const fake = makeFakeFetch(
135
+ () =>
136
+ new Response('{"choices":[{"message":{"content":"<think>x</think>hi"}}]}', {
137
+ status: 200,
138
+ headers: { "content-type": "application/json" },
139
+ }),
140
+ );
141
+ const wrapped = wrapFetchForReasoningCleanup(fake, {
142
+ providers: ["openrouter"],
143
+ });
144
+ // Anthropic endpoint — not in the providers list, so no cleaning.
145
+ const res = await wrapped("https://api.anthropic.com/v1/chat/completions", { method: "POST" });
146
+ const body = await res.text();
147
+ expect(body).toContain("<think>x</think>"); // unchanged
148
+ });
149
+
150
+ test("intercepts chat-completions to the targeted provider (openrouter)", async () => {
151
+ const fake = makeFakeFetch(
152
+ () =>
153
+ new Response('{"choices":[{"message":{"content":"<think>x</think>hi"}}]}', {
154
+ status: 200,
155
+ headers: { "content-type": "application/json" },
156
+ }),
157
+ );
158
+ const wrapped = wrapFetchForReasoningCleanup(fake, {
159
+ providers: ["openrouter"],
160
+ });
161
+ const res = await wrapped(
162
+ "https://openrouter.ai/api/v1/chat/completions",
163
+ { method: "POST" },
164
+ );
165
+ const body = await res.text();
166
+ expect(body).not.toContain("<think>");
167
+ expect(body).toContain('"content":"hi"');
168
+ });
169
+
170
+ test("intercepts chat-completions to the targeted provider (minimax)", async () => {
171
+ const fake = makeFakeFetch(
172
+ () =>
173
+ new Response('{"choices":[{"message":{"content":"<thinking>x</thinking>hi"}}]}', {
174
+ status: 200,
175
+ headers: { "content-type": "application/json" },
176
+ }),
177
+ );
178
+ const wrapped = wrapFetchForReasoningCleanup(fake, {
179
+ providers: ["minimax"],
180
+ });
181
+ const res = await wrapped("https://minimax.io/v1/chat/completions", {
182
+ method: "POST",
183
+ });
184
+ const body = await res.text();
185
+ expect(body).not.toContain("<thinking>");
186
+ expect(body).toContain('"content":"hi"');
187
+ });
188
+ });
189
+
190
+ // ---------------------------------------------------------------------------
191
+ // wrapFetchForReasoningCleanup — non-streaming JSON rewriting
192
+ // ---------------------------------------------------------------------------
193
+
194
+ describe("wrapFetchForReasoningCleanup — non-streaming JSON", () => {
195
+ test("strips think blocks from a single choice", async () => {
196
+ const fake = makeFakeFetch(
197
+ () =>
198
+ new Response(
199
+ JSON.stringify({
200
+ choices: [
201
+ {
202
+ message: {
203
+ role: "assistant",
204
+ content: "<think>step 1\nstep 2</think>The answer is 42.",
205
+ },
206
+ },
207
+ ],
208
+ }),
209
+ { status: 200, headers: { "content-type": "application/json" } },
210
+ ),
211
+ );
212
+ const wrapped = wrapFetchForReasoningCleanup(fake);
213
+ const res = await wrapped(
214
+ "https://openrouter.ai/api/v1/chat/completions",
215
+ { method: "POST" },
216
+ );
217
+ const body = JSON.parse(await res.text());
218
+ expect(body.choices[0].message.content).toBe("The answer is 42.");
219
+ });
220
+
221
+ test("preserves structured reasoning field while stripping inline blocks", async () => {
222
+ const fake = makeFakeFetch(
223
+ () =>
224
+ new Response(
225
+ JSON.stringify({
226
+ choices: [
227
+ {
228
+ message: {
229
+ role: "assistant",
230
+ reasoning: "the structured chain of thought",
231
+ reasoning_details: [{ type: "reasoning.text", text: "the structured chain of thought" }],
232
+ content: "<think>the same text inline</think>final answer",
233
+ },
234
+ },
235
+ ],
236
+ }),
237
+ { status: 200, headers: { "content-type": "application/json" } },
238
+ ),
239
+ );
240
+ const wrapped = wrapFetchForReasoningCleanup(fake);
241
+ const res = await wrapped(
242
+ "https://openrouter.ai/api/v1/chat/completions",
243
+ { method: "POST" },
244
+ );
245
+ const body = JSON.parse(await res.text());
246
+ expect(body.choices[0].message.reasoning).toBe("the structured chain of thought");
247
+ expect(body.choices[0].message.reasoning_details).toEqual([
248
+ { type: "reasoning.text", text: "the structured chain of thought" },
249
+ ]);
250
+ expect(body.choices[0].message.content).toBe("final answer");
251
+ });
252
+
253
+ test("returns the original response untouched when no think blocks are present", async () => {
254
+ const original = JSON.stringify({
255
+ choices: [{ message: { role: "assistant", content: "clean response" } }],
256
+ });
257
+ const fake = makeFakeFetch(
258
+ () =>
259
+ new Response(original, {
260
+ status: 200,
261
+ headers: { "content-type": "application/json" },
262
+ }),
263
+ );
264
+ const wrapped = wrapFetchForReasoningCleanup(fake);
265
+ const res = await wrapped(
266
+ "https://openrouter.ai/api/v1/chat/completions",
267
+ { method: "POST" },
268
+ );
269
+ expect(await res.text()).toBe(original);
270
+ });
271
+
272
+ test("forwards the response unchanged on JSON parse error (safety net)", async () => {
273
+ const fake = makeFakeFetch(
274
+ () =>
275
+ new Response("not json {{{", {
276
+ status: 200,
277
+ headers: { "content-type": "application/json" },
278
+ }),
279
+ );
280
+ const wrapped = wrapFetchForReasoningCleanup(fake);
281
+ const res = await wrapped(
282
+ "https://openrouter.ai/api/v1/chat/completions",
283
+ { method: "POST" },
284
+ );
285
+ expect(await res.text()).toBe("not json {{{");
286
+ });
287
+ });
288
+
289
+ // ---------------------------------------------------------------------------
290
+ // wrapFetchForReasoningCleanup — SSE streaming
291
+ // ---------------------------------------------------------------------------
292
+
293
+ /** Build an SSE Response body from a list of event payloads (without
294
+ * the `data: ` prefix — the prefix is added here for convenience). */
295
+ function sseResponse(events: string[], finalChoiceIndex = 0): Response {
296
+ const lines: string[] = [];
297
+ for (const payload of events) {
298
+ lines.push(`data: ${payload}`);
299
+ }
300
+ // Add a finish_reason on the last event so the stripper flushes.
301
+ lines.push(
302
+ `data: ${JSON.stringify({
303
+ choices: [{ index: finalChoiceIndex, delta: {}, finish_reason: "stop" }],
304
+ })}`,
305
+ );
306
+ lines.push("data: [DONE]");
307
+ const body = lines.join("\n\n") + "\n\n";
308
+ return new Response(body, {
309
+ status: 200,
310
+ headers: { "content-type": "text/event-stream" },
311
+ });
312
+ }
313
+
314
+ /** Read the full text of a (possibly transformed) response body. */
315
+ async function readBodyText(res: Response): Promise<string> {
316
+ if (!res.body) return "";
317
+ const reader = res.body.getReader();
318
+ const decoder = new TextDecoder();
319
+ let out = "";
320
+ while (true) {
321
+ const { value, done } = await reader.read();
322
+ if (done) break;
323
+ if (value) out += decoder.decode(value, { stream: true });
324
+ }
325
+ out += decoder.decode();
326
+ return out;
327
+ }
328
+
329
+ describe("wrapFetchForReasoningCleanup — SSE streaming", () => {
330
+ test("strips a complete think block split across multiple deltas", async () => {
331
+ // The model emits a `<think>...</think>` block across many deltas,
332
+ // then a clean final answer. After cleaning, only the final answer
333
+ // should remain in the SSE stream.
334
+ const fake = makeFakeFetch(() =>
335
+ sseResponse([
336
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "<think>" } }] }),
337
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "step 1. " } }] }),
338
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "step 2. " } }] }),
339
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "</think>" } }] }),
340
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "final answer" } }] }),
341
+ ]),
342
+ );
343
+ const wrapped = wrapFetchForReasoningCleanup(fake);
344
+ const res = await wrapped(
345
+ "https://openrouter.ai/api/v1/chat/completions",
346
+ { method: "POST" },
347
+ );
348
+ const body = await readBodyText(res);
349
+ expect(body).not.toContain("<think>");
350
+ expect(body).not.toContain("</think>");
351
+ expect(body).not.toContain("step 1.");
352
+ expect(body).not.toContain("step 2.");
353
+ expect(body).toContain("final answer");
354
+ });
355
+
356
+ test("strips a think block split ACROSS byte-level chunk boundaries", async () => {
357
+ // Simulate a real network: the SSE body is delivered as a stream
358
+ // of arbitrary byte chunks. The `<think>` open tag itself straddles
359
+ // two chunks, so the `streamTransformer` must buffer the partial
360
+ // first event until the `\n\n` boundary arrives in the second
361
+ // chunk, then run the full event through `cleanSseLine` and
362
+ // strip the think block.
363
+ const sseBody =
364
+ `data: {"choices":[{"index":0,"delta":{"content":"<th` +
365
+ `ink>step A. step B.</think>The answer is 7."}}]}\n\n` +
366
+ `data: {"choices":[{"index":0,"delta":{"content":""},"finish_reason":"stop"}]}\n\n` +
367
+ `data: [DONE]\n\n`;
368
+
369
+ // Split the body at byte offset 80 (which lands inside the `<th`
370
+ // open tag). The first chunk ends with the open tag half-written;
371
+ // the second chunk starts with the rest of the open tag and
372
+ // includes the `\n\n` boundary.
373
+ const splitAt = 80;
374
+ const chunk1 = sseBody.slice(0, splitAt);
375
+ const chunk2 = sseBody.slice(splitAt);
376
+
377
+ const fake = makeFakeFetch(
378
+ () =>
379
+ new Response(
380
+ new ReadableStream({
381
+ start(controller) {
382
+ controller.enqueue(new TextEncoder().encode(chunk1));
383
+ controller.enqueue(new TextEncoder().encode(chunk2));
384
+ controller.close();
385
+ },
386
+ }),
387
+ {
388
+ status: 200,
389
+ headers: { "content-type": "text/event-stream" },
390
+ },
391
+ ),
392
+ );
393
+ const wrapped = wrapFetchForReasoningCleanup(fake);
394
+ const res = await wrapped(
395
+ "https://openrouter.ai/api/v1/chat/completions",
396
+ { method: "POST" },
397
+ );
398
+ const body = await readBodyText(res);
399
+ expect(body).not.toContain("<think>");
400
+ expect(body).not.toContain("step A.");
401
+ expect(body).not.toContain("step B.");
402
+ expect(body).toContain("The answer is 7.");
403
+ });
404
+
405
+ test("strips <thinking> (not just <think>) in streaming mode", async () => {
406
+ const fake = makeFakeFetch(() =>
407
+ sseResponse([
408
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "<thinking>step</thinking>" } }] }),
409
+ JSON.stringify({ choices: [{ index: 0, delta: { content: "after" } }] }),
410
+ ]),
411
+ );
412
+ const wrapped = wrapFetchForReasoningCleanup(fake);
413
+ const res = await wrapped(
414
+ "https://openrouter.ai/api/v1/chat/completions",
415
+ { method: "POST" },
416
+ );
417
+ const body = await readBodyText(res);
418
+ expect(body).not.toContain("<thinking>");
419
+ expect(body).not.toContain("</thinking>");
420
+ expect(body).toContain("after");
421
+ });
422
+ });