multiarena 0.1.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,60 @@
1
1
  import OpenAI from "openai";
2
2
  const DEFAULT_MODEL = "gpt-4o";
3
3
  const DEFAULT_TIMEOUT_MS = 120_000;
4
+ /**
5
+ * Filter think blocks from reasoning model output (DeepSeek-R1, MiniMax).
6
+ * Returns the text that should be yielded to the user, and updates state.
7
+ * Callers yield the returned text and replace their state with the returned state.
8
+ */
9
+ export function filterThinkText(deltaText, state) {
10
+ let text = deltaText;
11
+ let { inThink, buf } = state;
12
+ if (inThink) {
13
+ buf += text;
14
+ const endIdx = buf.indexOf("</think>");
15
+ if (endIdx !== -1) {
16
+ inThink = false;
17
+ text = buf.slice(endIdx + "</think>".length);
18
+ buf = "";
19
+ if (!text)
20
+ return { text: "", state: { inThink, buf } };
21
+ }
22
+ else {
23
+ return { text: "", state: { inThink: true, buf } };
24
+ }
25
+ }
26
+ // Check for <think> opening tag
27
+ const startIdx = text.indexOf("<think>");
28
+ if (startIdx !== -1) {
29
+ const before = text.slice(0, startIdx);
30
+ const rest = text.slice(startIdx + "<think>".length);
31
+ const endIdx = rest.indexOf("</think>");
32
+ if (endIdx !== -1) {
33
+ // Complete think block in this chunk
34
+ const after = rest.slice(endIdx + "</think>".length);
35
+ text = before + after;
36
+ if (!text)
37
+ return { text: "", state: { inThink, buf } };
38
+ }
39
+ else {
40
+ // Think block spans chunks
41
+ if (before)
42
+ return { text: before, state: { inThink: true, buf: rest } };
43
+ return { text: "", state: { inThink: true, buf: rest } };
44
+ }
45
+ }
46
+ return { text, state: { inThink, buf } };
47
+ }
48
+ /** Called when the stream ends (finish_reason = stop). Flush any buffered think content. */
49
+ export function flushThinkBuf(state) {
50
+ if (!state.inThink || !state.buf)
51
+ return "";
52
+ const endIdx = state.buf.indexOf("</think>");
53
+ if (endIdx !== -1) {
54
+ return state.buf.slice(endIdx + "</think>".length);
55
+ }
56
+ return "";
57
+ }
4
58
  export class OpenAIProvider {
5
59
  client;
6
60
  activeController = null;
@@ -23,8 +77,7 @@ export class OpenAIProvider {
23
77
  const pendingToolCalls = new Map();
24
78
  let doneYielded = false;
25
79
  // Stateful think-tag filter for reasoning models (e.g. MiniMax, DeepSeek-R1)
26
- let inThink = false;
27
- let thinkBuf = "";
80
+ let thinkState = { inThink: false, buf: "" };
28
81
  for await (const chunk of stream) {
29
82
  if (chunk.usage) {
30
83
  inputTokens = chunk.usage.prompt_tokens;
@@ -54,44 +107,11 @@ export class OpenAIProvider {
54
107
  }
55
108
  // Text delta with think-tag filtering (for reasoning models)
56
109
  if (delta.content) {
57
- let text = delta.content;
58
- if (inThink) {
59
- thinkBuf += text;
60
- const endIdx = thinkBuf.indexOf("</think>");
61
- if (endIdx !== -1) {
62
- inThink = false;
63
- text = thinkBuf.slice(endIdx + "</think>".length);
64
- thinkBuf = "";
65
- if (!text)
66
- continue;
67
- }
68
- else {
69
- continue;
70
- }
110
+ const result = filterThinkText(delta.content, thinkState);
111
+ thinkState = result.state;
112
+ if (result.text) {
113
+ yield { type: "text", content: result.text };
71
114
  }
72
- // Check for <think> opening tag
73
- const startIdx = text.indexOf("<think>");
74
- if (startIdx !== -1) {
75
- const before = text.slice(0, startIdx);
76
- const rest = text.slice(startIdx + "<think>".length);
77
- const endIdx = rest.indexOf("</think>");
78
- if (endIdx !== -1) {
79
- // Complete think block in this chunk
80
- const after = rest.slice(endIdx + "</think>".length);
81
- text = before + after;
82
- if (!text)
83
- continue;
84
- }
85
- else {
86
- // Think block spans chunks
87
- if (before)
88
- yield { type: "text", content: before };
89
- inThink = true;
90
- thinkBuf = rest;
91
- continue;
92
- }
93
- }
94
- yield { type: "text", content: text };
95
115
  }
96
116
  if (choice.finish_reason === "tool_calls") {
97
117
  for (const [, tc] of pendingToolCalls) {
@@ -111,16 +131,10 @@ export class OpenAIProvider {
111
131
  }
112
132
  if (choice.finish_reason === "stop") {
113
133
  // Flush any remaining think buffer
114
- if (inThink && thinkBuf) {
115
- const endIdx = thinkBuf.indexOf("</think>");
116
- if (endIdx !== -1) {
117
- const after = thinkBuf.slice(endIdx + "</think>".length);
118
- if (after)
119
- yield { type: "text", content: after };
120
- }
121
- inThink = false;
122
- thinkBuf = "";
123
- }
134
+ const flushed = flushThinkBuf(thinkState);
135
+ if (flushed)
136
+ yield { type: "text", content: flushed };
137
+ thinkState = { inThink: false, buf: "" };
124
138
  yield {
125
139
  type: "done",
126
140
  usage: { input: inputTokens, output: outputTokens },