multiarena 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +131 -0
  2. package/LICENSE +21 -0
  3. package/README.md +282 -0
  4. package/dist/cli/args.d.ts +11 -0
  5. package/dist/cli/args.js +56 -0
  6. package/dist/config/loader.js +2 -2
  7. package/dist/config/types.d.ts +11 -1
  8. package/dist/core/deliberation.d.ts +53 -0
  9. package/dist/core/deliberation.js +356 -0
  10. package/dist/core/session.d.ts +3 -1
  11. package/dist/core/session.js +20 -17
  12. package/dist/core/turn.d.ts +2 -0
  13. package/dist/core/turn.js +32 -5
  14. package/dist/index.js +3 -49
  15. package/dist/isolation/worktree.d.ts +1 -1
  16. package/dist/isolation/worktree.js +8 -8
  17. package/dist/persistence/session.js +1 -1
  18. package/dist/provider/adapters/openai.d.ts +15 -0
  19. package/dist/provider/adapters/openai.js +67 -8
  20. package/dist/provider/provider.js +4 -0
  21. package/dist/tools/builtin/bash.js +6 -1
  22. package/dist/ui/app.js +426 -46
  23. package/dist/ui/components/BroadcastSummary.d.ts +1 -0
  24. package/dist/ui/components/BroadcastSummary.js +24 -8
  25. package/dist/ui/components/DeliberationView.d.ts +17 -0
  26. package/dist/ui/components/DeliberationView.js +81 -0
  27. package/dist/ui/components/InputBar.d.ts +3 -0
  28. package/dist/ui/components/InputBar.js +18 -8
  29. package/dist/ui/components/ModelDetail.js +16 -4
  30. package/dist/ui/components/OutputArea.d.ts +8 -0
  31. package/dist/ui/components/OutputArea.js +32 -4
  32. package/dist/ui/components/formatTokens.d.ts +1 -0
  33. package/dist/ui/components/formatTokens.js +7 -0
  34. package/dist/ui/modeTransitions.d.ts +80 -0
  35. package/dist/ui/modeTransitions.js +176 -0
  36. package/package.json +13 -8
  37. package/dist/ui/components/StatusBar.d.ts +0 -9
  38. package/dist/ui/components/StatusBar.js +0 -51
@@ -1,6 +1,60 @@
1
1
  import OpenAI from "openai";
2
2
  const DEFAULT_MODEL = "gpt-4o";
3
3
  const DEFAULT_TIMEOUT_MS = 120_000;
4
+ /**
5
+ * Filter think blocks from reasoning model output (DeepSeek-R1, MiniMax).
6
+ * Returns the text that should be yielded to the user, and updates state.
7
+ * Callers yield the returned text and replace their state with the returned state.
8
+ */
9
+ export function filterThinkText(deltaText, state) {
10
+ let text = deltaText;
11
+ let { inThink, buf } = state;
12
+ if (inThink) {
13
+ buf += text;
14
+ const endIdx = buf.indexOf("</think>");
15
+ if (endIdx !== -1) {
16
+ inThink = false;
17
+ text = buf.slice(endIdx + "</think>".length);
18
+ buf = "";
19
+ if (!text)
20
+ return { text: "", state: { inThink, buf } };
21
+ }
22
+ else {
23
+ return { text: "", state: { inThink: true, buf } };
24
+ }
25
+ }
26
+ // Check for <think> opening tag
27
+ const startIdx = text.indexOf("<think>");
28
+ if (startIdx !== -1) {
29
+ const before = text.slice(0, startIdx);
30
+ const rest = text.slice(startIdx + "<think>".length);
31
+ const endIdx = rest.indexOf("</think>");
32
+ if (endIdx !== -1) {
33
+ // Complete think block in this chunk
34
+ const after = rest.slice(endIdx + "</think>".length);
35
+ text = before + after;
36
+ if (!text)
37
+ return { text: "", state: { inThink, buf } };
38
+ }
39
+ else {
40
+ // Think block spans chunks
41
+ if (before)
42
+ return { text: before, state: { inThink: true, buf: rest } };
43
+ return { text: "", state: { inThink: true, buf: rest } };
44
+ }
45
+ }
46
+ return { text, state: { inThink, buf } };
47
+ }
48
+ /** Called when the stream ends (finish_reason = stop). Flush any buffered think content. */
49
+ export function flushThinkBuf(state) {
50
+ if (!state.inThink || !state.buf)
51
+ return "";
52
+ const endIdx = state.buf.indexOf("</think>");
53
+ if (endIdx !== -1) {
54
+ return state.buf.slice(endIdx + "</think>".length);
55
+ }
56
+ return "";
57
+ }
4
58
  export class OpenAIProvider {
5
59
  client;
6
60
  activeController = null;
@@ -20,11 +74,11 @@ export class OpenAIProvider {
20
74
  stream: true,
21
75
  stream_options: { include_usage: true },
22
76
  }, { signal: abortController.signal });
23
- // Track pending tool calls across stream chunks
24
77
  const pendingToolCalls = new Map();
25
78
  let doneYielded = false;
79
+ // Stateful think-tag filter for reasoning models (e.g. MiniMax, DeepSeek-R1)
80
+ let thinkState = { inThink: false, buf: "" };
26
81
  for await (const chunk of stream) {
27
- // Token usage chunk (when stream_options.include_usage is true)
28
82
  if (chunk.usage) {
29
83
  inputTokens = chunk.usage.prompt_tokens;
30
84
  outputTokens = chunk.usage.completion_tokens;
@@ -34,7 +88,6 @@ export class OpenAIProvider {
34
88
  if (!choice)
35
89
  continue;
36
90
  const delta = choice.delta;
37
- // Accumulate tool call deltas
38
91
  if (delta.tool_calls) {
39
92
  for (const tc of delta.tool_calls) {
40
93
  const idx = tc.index;
@@ -52,11 +105,14 @@ export class OpenAIProvider {
52
105
  pendingToolCalls.set(idx, existing);
53
106
  }
54
107
  }
55
- // Text delta
108
+ // Text delta with think-tag filtering (for reasoning models)
56
109
  if (delta.content) {
57
- yield { type: "text", content: delta.content };
110
+ const result = filterThinkText(delta.content, thinkState);
111
+ thinkState = result.state;
112
+ if (result.text) {
113
+ yield { type: "text", content: result.text };
114
+ }
58
115
  }
59
- // On finish_reason === "tool_calls", emit all pending tool_calls
60
116
  if (choice.finish_reason === "tool_calls") {
61
117
  for (const [, tc] of pendingToolCalls) {
62
118
  yield {
@@ -73,8 +129,12 @@ export class OpenAIProvider {
73
129
  };
74
130
  doneYielded = true;
75
131
  }
76
- // handle stop
77
132
  if (choice.finish_reason === "stop") {
133
+ // Flush any remaining think buffer
134
+ const flushed = flushThinkBuf(thinkState);
135
+ if (flushed)
136
+ yield { type: "text", content: flushed };
137
+ thinkState = { inThink: false, buf: "" };
78
138
  yield {
79
139
  type: "done",
80
140
  usage: { input: inputTokens, output: outputTokens },
@@ -82,7 +142,6 @@ export class OpenAIProvider {
82
142
  doneYielded = true;
83
143
  }
84
144
  }
85
- // Ensure done is always yielded (e.g. when stream ends on usage chunk)
86
145
  if (!doneYielded) {
87
146
  yield {
88
147
  type: "done",
@@ -15,6 +15,10 @@ export function createProvider(config) {
15
15
  const baseURL = config.endpoint?.replace(/\/v1\/?$/, "") ?? "http://localhost:11434";
16
16
  return new OllamaProvider(baseURL);
17
17
  }
18
+ case "deepseek":
19
+ return new OpenAIProvider(key, config.endpoint ?? "https://api.deepseek.com/v1");
20
+ case "minimax":
21
+ return new OpenAIProvider(key, config.endpoint ?? "https://api.minimax.chat/v1");
18
22
  default:
19
23
  throw new Error(`Unknown provider: ${config.provider}`);
20
24
  }
@@ -13,6 +13,9 @@ export const bashTool = {
13
13
  },
14
14
  async execute(args, worktreePath) {
15
15
  const command = args.command;
16
+ if (!command || command.trim() === "") {
17
+ return "Error: no command provided";
18
+ }
16
19
  const dangerous = ["rm -rf /", "sudo ", "mkfs.", "dd if=", "> /dev/sda"];
17
20
  for (const d of dangerous) {
18
21
  if (command.includes(d))
@@ -24,11 +27,13 @@ export const bashTool = {
24
27
  encoding: "utf-8",
25
28
  timeout: 30000,
26
29
  maxBuffer: 1024 * 1024,
30
+ shell: "bash",
27
31
  });
28
32
  return output || "(no output)";
29
33
  }
30
34
  catch (err) {
31
- return `Command failed (exit ${err.status}): ${err.stderr ?? err.message}`;
35
+ const detail = (err.stderr || err.message || "unknown error").trim();
36
+ return `Command failed (exit ${err.status ?? "?"}): ${detail}\n[cmd] ${command}`;
32
37
  }
33
38
  },
34
39
  };