codemini-cli 0.5.12 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/codemini-web/dist/assets/{highlighted-body-OFNGDK62-B-G99D0A.js → highlighted-body-OFNGDK62-BX9ap8j1.js} +1 -1
  2. package/codemini-web/dist/assets/index-C3evlvOM.css +2 -0
  3. package/codemini-web/dist/assets/{index-DIGUEzan.js → index-qLKprU0e.js} +98 -93
  4. package/codemini-web/dist/assets/mermaid-GHXKKRXX-Cl-7dYEC.js +1 -0
  5. package/codemini-web/dist/index.html +2 -2
  6. package/codemini-web/dist/logos/chatglm-color.svg +1 -0
  7. package/codemini-web/dist/logos/claude-color.svg +1 -0
  8. package/codemini-web/dist/logos/deepseek-color.svg +1 -0
  9. package/codemini-web/dist/logos/gemini-color.svg +1 -0
  10. package/codemini-web/dist/logos/glm-color.svg +1 -0
  11. package/codemini-web/dist/logos/google-color.svg +1 -0
  12. package/codemini-web/dist/logos/kimi-color.svg +1 -0
  13. package/codemini-web/dist/logos/minimax-color.svg +1 -0
  14. package/codemini-web/dist/logos/moonshot.svg +1 -0
  15. package/codemini-web/dist/logos/nvidia-color.svg +1 -0
  16. package/codemini-web/dist/logos/openai.svg +1 -0
  17. package/codemini-web/dist/logos/qwen-color.svg +1 -0
  18. package/codemini-web/dist/logos/zhipu-color.svg +1 -0
  19. package/codemini-web/lib/runtime-bridge.js +151 -27
  20. package/package.json +1 -1
  21. package/src/core/agent-loop.js +12 -11
  22. package/src/core/chat-runtime.js +342 -51
  23. package/src/core/provider/anthropic.js +12 -9
  24. package/src/core/provider/openai-compatible.js +80 -50
  25. package/src/core/session-store.js +63 -24
  26. package/codemini-web/dist/assets/index-Dkq1DdDX.css +0 -2
  27. package/codemini-web/dist/assets/mermaid-GHXKKRXX-va2Kl89u.js +0 -1
  28. /package/codemini-web/dist/{codemini_logo.png → logos/codemini_logo.png} +0 -0
@@ -83,11 +83,266 @@ function slugify(input) {
83
83
  return base || 'untitled';
84
84
  }
85
85
 
86
- function nowStamp() {
87
- return new Date().toISOString().replace(/[:.]/g, '-');
88
- }
89
-
90
- function prioritizeByPreferredOrder(items, preferredOrder) {
86
+ function nowStamp() {
87
+ return new Date().toISOString().replace(/[:.]/g, '-');
88
+ }
89
+
90
+ function numberFromPath(obj, pathParts) {
91
+ let current = obj;
92
+ for (const part of pathParts) {
93
+ if (!current || typeof current !== 'object') return null;
94
+ current = current[part];
95
+ }
96
+ const value = Number(current);
97
+ return Number.isFinite(value) ? Math.max(0, value) : null;
98
+ }
99
+
100
+ function firstFiniteNumber(obj, paths) {
101
+ for (const pathParts of paths) {
102
+ const value = numberFromPath(obj, pathParts);
103
+ if (value != null) return value;
104
+ }
105
+ return null;
106
+ }
107
+
108
+ function sumFiniteNumbers(obj, paths) {
109
+ let sum = 0;
110
+ let found = false;
111
+ for (const pathParts of paths) {
112
+ const value = numberFromPath(obj, pathParts);
113
+ if (value != null) {
114
+ sum += value;
115
+ found = true;
116
+ }
117
+ }
118
+ return found ? sum : null;
119
+ }
120
+
121
+ function collectRawUsage(usage) {
122
+ if (!usage || typeof usage !== 'object') return [];
123
+ if (Array.isArray(usage.raw)) {
124
+ return usage.raw
125
+ .filter((item) => item && typeof item === 'object')
126
+ .map((item) => ({ ...item }));
127
+ }
128
+ return [{ ...usage }];
129
+ }
130
+
131
+ function normalizeModelUsage(usage) {
132
+ if (!usage || typeof usage !== 'object') return null;
133
+ const promptCacheHitTokens = firstFiniteNumber(usage, [
134
+ ['prompt_cache_hit_tokens'],
135
+ ['promptCacheHitTokens'],
136
+ ['cache_hit_tokens'],
137
+ ['cacheHitTokens']
138
+ ]);
139
+ const promptCacheMissTokens = firstFiniteNumber(usage, [
140
+ ['prompt_cache_miss_tokens'],
141
+ ['promptCacheMissTokens'],
142
+ ['cache_miss_tokens'],
143
+ ['cacheMissTokens']
144
+ ]);
145
+ const explicitInputTokens = firstFiniteNumber(usage, [
146
+ ['prompt_tokens'],
147
+ ['input_tokens'],
148
+ ['inputTokens'],
149
+ ['promptTokens'],
150
+ ['prompt_token_count'],
151
+ ['promptTokenCount'],
152
+ ['input_token_count'],
153
+ ['inputTokenCount'],
154
+ ['input_total_tokens'],
155
+ ['total_input_tokens'],
156
+ ['usage', 'prompt_tokens'],
157
+ ['usage', 'input_tokens'],
158
+ ['usage_metadata', 'prompt_token_count'],
159
+ ['usage_metadata', 'input_token_count'],
160
+ ['usageMetadata', 'promptTokenCount'],
161
+ ['usageMetadata', 'inputTokenCount'],
162
+ ['token_usage', 'prompt_tokens'],
163
+ ['token_usage', 'input_tokens'],
164
+ ['tokenUsage', 'promptTokens'],
165
+ ['tokenUsage', 'inputTokens'],
166
+ ['tokens', 'input_tokens'],
167
+ ['tokens', 'inputTokens'],
168
+ ['tokens', 'prompt_tokens'],
169
+ ['tokens', 'promptTokens'],
170
+ ['billed_units', 'input_tokens'],
171
+ ['billedUnits', 'inputTokens']
172
+ ]);
173
+ const inputTokens = explicitInputTokens ?? (
174
+ promptCacheHitTokens != null || promptCacheMissTokens != null
175
+ ? Number(promptCacheHitTokens || 0) + Number(promptCacheMissTokens || 0)
176
+ : null
177
+ );
178
+ const outputTokens = firstFiniteNumber(usage, [
179
+ ['completion_tokens'],
180
+ ['output_tokens'],
181
+ ['outputTokens'],
182
+ ['completionTokens'],
183
+ ['completion_token_count'],
184
+ ['completionTokenCount'],
185
+ ['output_token_count'],
186
+ ['outputTokenCount'],
187
+ ['candidates_token_count'],
188
+ ['candidatesTokenCount'],
189
+ ['usage', 'completion_tokens'],
190
+ ['usage', 'output_tokens'],
191
+ ['usage_metadata', 'candidates_token_count'],
192
+ ['usage_metadata', 'output_token_count'],
193
+ ['usageMetadata', 'candidatesTokenCount'],
194
+ ['usageMetadata', 'outputTokenCount'],
195
+ ['token_usage', 'completion_tokens'],
196
+ ['token_usage', 'output_tokens'],
197
+ ['tokenUsage', 'completionTokens'],
198
+ ['tokenUsage', 'outputTokens'],
199
+ ['tokens', 'output_tokens'],
200
+ ['tokens', 'outputTokens'],
201
+ ['tokens', 'completion_tokens'],
202
+ ['tokens', 'completionTokens'],
203
+ ['billed_units', 'output_tokens'],
204
+ ['billedUnits', 'outputTokens']
205
+ ]);
206
+ const explicitTotal = firstFiniteNumber(usage, [
207
+ ['total_tokens'],
208
+ ['totalTokens'],
209
+ ['total_token_count'],
210
+ ['totalTokenCount'],
211
+ ['usage', 'total_tokens'],
212
+ ['usage_metadata', 'total_token_count'],
213
+ ['usageMetadata', 'totalTokenCount'],
214
+ ['token_usage', 'total_tokens'],
215
+ ['tokenUsage', 'totalTokens'],
216
+ ['tokens', 'total_tokens'],
217
+ ['tokens', 'totalTokens']
218
+ ]);
219
+ const cachedInputTokens = firstFiniteNumber(usage, [
220
+ ['prompt_tokens_details', 'cached_tokens'],
221
+ ['input_tokens_details', 'cached_tokens'],
222
+ ['promptTokensDetails', 'cachedTokens'],
223
+ ['inputTokensDetails', 'cachedTokens'],
224
+ ['cache_read_input_tokens'],
225
+ ['cacheReadInputTokens'],
226
+ ['cache_read_tokens'],
227
+ ['cacheReadTokens'],
228
+ ['cached_tokens'],
229
+ ['cachedTokens'],
230
+ ['cached_input_tokens'],
231
+ ['cachedInputTokens'],
232
+ ['cached_content_token_count'],
233
+ ['cachedContentTokenCount'],
234
+ ['usage', 'prompt_tokens_details', 'cached_tokens'],
235
+ ['usage', 'input_tokens_details', 'cached_tokens'],
236
+ ['usage_metadata', 'cached_content_token_count'],
237
+ ['usageMetadata', 'cachedContentTokenCount'],
238
+ ['token_usage', 'prompt_tokens_details', 'cached_tokens'],
239
+ ['tokenUsage', 'promptTokensDetails', 'cachedTokens'],
240
+ ['tokens', 'cached_tokens'],
241
+ ['tokens', 'cachedTokens'],
242
+ ['prompt_cache_hit_tokens'],
243
+ ['promptCacheHitTokens'],
244
+ ['cache_hit_tokens'],
245
+ ['cacheHitTokens']
246
+ ]);
247
+ const cacheMissInputTokens = firstFiniteNumber(usage, [
248
+ ['prompt_cache_miss_tokens'],
249
+ ['promptCacheMissTokens'],
250
+ ['cache_miss_tokens'],
251
+ ['cacheMissTokens']
252
+ ]);
253
+ const cacheWriteInputTokens = firstFiniteNumber(usage, [
254
+ ['cache_creation_input_tokens'],
255
+ ['cacheCreationInputTokens'],
256
+ ['cache_write_input_tokens'],
257
+ ['cacheWriteInputTokens'],
258
+ ['cache_creation_tokens'],
259
+ ['cacheCreationTokens'],
260
+ ['usage', 'cache_creation_input_tokens'],
261
+ ['usage', 'cache_write_input_tokens'],
262
+ ['token_usage', 'cache_creation_input_tokens'],
263
+ ['tokenUsage', 'cacheCreationInputTokens']
264
+ ]) ?? sumFiniteNumbers(usage, [
265
+ ['cache_creation', 'ephemeral_5m_input_tokens'],
266
+ ['cache_creation', 'ephemeral_1h_input_tokens'],
267
+ ['cacheCreation', 'ephemeral5mInputTokens'],
268
+ ['cacheCreation', 'ephemeral1hInputTokens'],
269
+ ['usage', 'cache_creation', 'ephemeral_5m_input_tokens'],
270
+ ['usage', 'cache_creation', 'ephemeral_1h_input_tokens']
271
+ ]);
272
+ const reasoningOutputTokens = firstFiniteNumber(usage, [
273
+ ['completion_tokens_details', 'reasoning_tokens'],
274
+ ['output_tokens_details', 'reasoning_tokens'],
275
+ ['completionTokensDetails', 'reasoningTokens'],
276
+ ['outputTokensDetails', 'reasoningTokens'],
277
+ ['reasoning_tokens'],
278
+ ['reasoningTokens'],
279
+ ['thoughts_token_count'],
280
+ ['thoughtsTokenCount'],
281
+ ['usage', 'completion_tokens_details', 'reasoning_tokens'],
282
+ ['usage_metadata', 'thoughts_token_count'],
283
+ ['usageMetadata', 'thoughtsTokenCount']
284
+ ]);
285
+ const totalTokens = explicitTotal ?? (
286
+ inputTokens != null || outputTokens != null
287
+ ? Number(inputTokens || 0) + Number(outputTokens || 0)
288
+ : null
289
+ );
290
+ if (
291
+ inputTokens == null &&
292
+ outputTokens == null &&
293
+ totalTokens == null &&
294
+ cachedInputTokens == null &&
295
+ cacheWriteInputTokens == null
296
+ ) {
297
+ return null;
298
+ }
299
+ return {
300
+ inputTokens: Math.round(inputTokens || 0),
301
+ outputTokens: Math.round(outputTokens || 0),
302
+ totalTokens: Math.round(totalTokens || 0),
303
+ cachedInputTokens: Math.round(cachedInputTokens || 0),
304
+ cacheMissInputTokens: Math.round(cacheMissInputTokens || 0),
305
+ cacheWriteInputTokens: Math.round(cacheWriteInputTokens || 0),
306
+ reasoningOutputTokens: Math.round(reasoningOutputTokens || 0),
307
+ requests: 1,
308
+ raw: collectRawUsage(usage)
309
+ };
310
+ }
311
+
312
+ function cloneModelUsage(usage) {
313
+ if (!usage || typeof usage !== 'object') return null;
314
+ return {
315
+ inputTokens: Math.max(0, Math.round(Number(usage.inputTokens || 0))),
316
+ outputTokens: Math.max(0, Math.round(Number(usage.outputTokens || 0))),
317
+ totalTokens: Math.max(0, Math.round(Number(usage.totalTokens || 0))),
318
+ cachedInputTokens: Math.max(0, Math.round(Number(usage.cachedInputTokens || 0))),
319
+ cacheMissInputTokens: Math.max(0, Math.round(Number(usage.cacheMissInputTokens || 0))),
320
+ cacheWriteInputTokens: Math.max(0, Math.round(Number(usage.cacheWriteInputTokens || 0))),
321
+ reasoningOutputTokens: Math.max(0, Math.round(Number(usage.reasoningOutputTokens || 0))),
322
+ requests: Math.max(0, Math.round(Number(usage.requests || 0))),
323
+ raw: Array.isArray(usage.raw) ? usage.raw.map((item) => ({ ...item })) : []
324
+ };
325
+ }
326
+
327
+ function mergeModelUsage(left, right) {
328
+ const a = cloneModelUsage(left);
329
+ const b = cloneModelUsage(right);
330
+ if (!a) return b;
331
+ if (!b) return a;
332
+ return {
333
+ inputTokens: a.inputTokens + b.inputTokens,
334
+ outputTokens: a.outputTokens + b.outputTokens,
335
+ totalTokens: a.totalTokens + b.totalTokens,
336
+ cachedInputTokens: a.cachedInputTokens + b.cachedInputTokens,
337
+ cacheMissInputTokens: a.cacheMissInputTokens + b.cacheMissInputTokens,
338
+ cacheWriteInputTokens: a.cacheWriteInputTokens + b.cacheWriteInputTokens,
339
+ reasoningOutputTokens: a.reasoningOutputTokens + b.reasoningOutputTokens,
340
+ requests: a.requests + b.requests,
341
+ raw: [...a.raw, ...b.raw]
342
+ };
343
+ }
344
+
345
+ function prioritizeByPreferredOrder(items, preferredOrder) {
91
346
  const source = Array.isArray(items) ? items : [];
92
347
  const priorities = new Map((Array.isArray(preferredOrder) ? preferredOrder : []).map((value, index) => [value, index]));
93
348
  return [...source].sort((left, right) => {
@@ -2885,30 +3140,56 @@ async function askModel({
2885
3140
  session.messages.push(stampedMessage('assistant', ''));
2886
3141
  activeAssistantIndex = session.messages.length - 1;
2887
3142
  if (persistSession) scheduleSessionSave();
2888
- } else if (event?.type === 'assistant:delta') {
2889
- if (activeAssistantIndex >= 0 && session.messages[activeAssistantIndex]) {
2890
- const current = session.messages[activeAssistantIndex];
2891
- current.content = `${current.content || ''}${event.text || ''}`;
2892
- current.at = new Date().toISOString();
2893
- if (persistSession) scheduleSessionSave();
2894
- }
2895
- } else if (event?.type === 'assistant:response') {
2896
- if (activeAssistantIndex >= 0 && session.messages[activeAssistantIndex]) {
2897
- const current = session.messages[activeAssistantIndex];
2898
- current.content = event.assistantMessage?.content ?? event.text ?? current.content;
2899
- if (typeof event.assistantMessage?.reasoning_content === 'string' && event.assistantMessage.reasoning_content) {
2900
- current.reasoning_content = event.assistantMessage.reasoning_content;
2901
- }
3143
+ } else if (event?.type === 'assistant:delta') {
3144
+ if (activeAssistantIndex >= 0 && session.messages[activeAssistantIndex]) {
3145
+ const current = session.messages[activeAssistantIndex];
3146
+ current.content = `${current.content || ''}${event.text || ''}`;
3147
+ current.at = new Date().toISOString();
3148
+ if (persistSession) scheduleSessionSave();
3149
+ }
3150
+ } else if (event?.type === 'assistant:reasoning_delta') {
3151
+ if (activeAssistantIndex >= 0 && session.messages[activeAssistantIndex]) {
3152
+ const current = session.messages[activeAssistantIndex];
3153
+ const now = new Date();
3154
+ if (!current.reasoning_started_at) current.reasoning_started_at = now.toISOString();
3155
+ current.reasoning_content = `${current.reasoning_content || ''}${event.text || ''}`;
3156
+ current.reasoning_duration_ms = Math.max(
3157
+ 0,
3158
+ now.getTime() - Date.parse(current.reasoning_started_at)
3159
+ );
3160
+ current.at = now.toISOString();
3161
+ if (persistSession) scheduleSessionSave();
3162
+ }
3163
+ } else if (event?.type === 'assistant:response') {
3164
+ const eventUsage = normalizeModelUsage(event.usage || event.assistantMessage?.usage);
3165
+ if (eventUsage) event.usage = eventUsage;
3166
+ if (activeAssistantIndex >= 0 && session.messages[activeAssistantIndex]) {
3167
+ const current = session.messages[activeAssistantIndex];
3168
+ const now = new Date();
3169
+ current.content = event.assistantMessage?.content ?? event.text ?? current.content;
3170
+ if (typeof event.assistantMessage?.reasoning_content === 'string' && event.assistantMessage.reasoning_content) {
3171
+ current.reasoning_content = event.assistantMessage.reasoning_content;
3172
+ }
2902
3173
  if (Array.isArray(event.assistantMessage?.reasoning_details) && event.assistantMessage.reasoning_details.length > 0) {
2903
3174
  current.reasoning_details = event.assistantMessage.reasoning_details;
2904
3175
  }
2905
- if (Array.isArray(event.assistantMessage?.tool_calls) && event.assistantMessage.tool_calls.length > 0) {
2906
- current.tool_calls = event.assistantMessage.tool_calls;
2907
- }
2908
- current.at = new Date().toISOString();
2909
- if (persistSession) scheduleSessionSave();
2910
- } else {
2911
- const assistantMessage = event.assistantMessage && typeof event.assistantMessage === 'object'
3176
+ if (Array.isArray(event.assistantMessage?.tool_calls) && event.assistantMessage.tool_calls.length > 0) {
3177
+ current.tool_calls = event.assistantMessage.tool_calls;
3178
+ }
3179
+ if (eventUsage) {
3180
+ current.usage = mergeModelUsage(current.usage, eventUsage);
3181
+ }
3182
+ if ((current.reasoning_content || current.reasoning_details) && current.reasoning_started_at) {
3183
+ current.reasoning_ended_at = current.reasoning_ended_at || now.toISOString();
3184
+ current.reasoning_duration_ms = Math.max(
3185
+ Number(current.reasoning_duration_ms || 0),
3186
+ Date.parse(current.reasoning_ended_at) - Date.parse(current.reasoning_started_at)
3187
+ );
3188
+ }
3189
+ current.at = now.toISOString();
3190
+ if (persistSession) scheduleSessionSave();
3191
+ } else {
3192
+ const assistantMessage = event.assistantMessage && typeof event.assistantMessage === 'object'
2912
3193
  ? event.assistantMessage
2913
3194
  : { content: event.text || '' };
2914
3195
  session.messages.push(stampedMessage('assistant', assistantMessage.content || event.text || '', {
@@ -2918,12 +3199,13 @@ async function askModel({
2918
3199
  ...(Array.isArray(assistantMessage.reasoning_details) && assistantMessage.reasoning_details.length > 0
2919
3200
  ? { reasoning_details: assistantMessage.reasoning_details }
2920
3201
  : {}),
2921
- ...(Array.isArray(assistantMessage.tool_calls) && assistantMessage.tool_calls.length > 0
2922
- ? { tool_calls: assistantMessage.tool_calls }
2923
- : {})
2924
- }));
2925
- if (persistSession) scheduleSessionSave();
2926
- }
3202
+ ...(Array.isArray(assistantMessage.tool_calls) && assistantMessage.tool_calls.length > 0
3203
+ ? { tool_calls: assistantMessage.tool_calls }
3204
+ : {}),
3205
+ ...(eventUsage ? { usage: eventUsage } : {})
3206
+ }));
3207
+ if (persistSession) scheduleSessionSave();
3208
+ }
2927
3209
  activeAssistantIndex = -1;
2928
3210
  } else if (event?.type === 'tool:end' || event?.type === 'tool:error' || event?.type === 'tool:blocked') {
2929
3211
  const toolId = String(event.id || '');
@@ -3000,11 +3282,15 @@ async function askModel({
3000
3282
  timeoutMs: config.gateway.timeout_ms || 1800000,
3001
3283
  maxRetries: config.gateway.max_retries ?? 2,
3002
3284
  signal,
3003
- onTextDelta: (delta) => {
3004
- startAssistantStream();
3005
- wrappedAgentEvent({ type: 'assistant:delta', text: delta });
3006
- },
3007
- onToolCallDelta: (toolCall) => {
3285
+ onTextDelta: (delta) => {
3286
+ startAssistantStream();
3287
+ wrappedAgentEvent({ type: 'assistant:delta', text: delta });
3288
+ },
3289
+ onReasoningDelta: (delta) => {
3290
+ startAssistantStream();
3291
+ wrappedAgentEvent({ type: 'assistant:reasoning_delta', text: delta });
3292
+ },
3293
+ onToolCallDelta: (toolCall) => {
3008
3294
  startAssistantStream();
3009
3295
  wrappedAgentEvent({ type: 'assistant:tool_call_delta', toolCall });
3010
3296
  }
@@ -3131,7 +3417,7 @@ async function runSubAgentTask({
3131
3417
  }
3132
3418
  if (
3133
3419
  role !== 'summarizer' &&
3134
- ['assistant:start', 'assistant:delta', 'assistant:response', 'assistant:tool_call_delta'].includes(String(evt?.type || ''))
3420
+ ['assistant:start', 'assistant:delta', 'assistant:reasoning_delta', 'assistant:response', 'assistant:tool_call_delta'].includes(String(evt?.type || ''))
3135
3421
  ) {
3136
3422
  return;
3137
3423
  }
@@ -3171,14 +3457,18 @@ async function runSubAgentTask({
3171
3457
  };
3172
3458
  }
3173
3459
 
3174
- function buildPlanStepTranscript({ stepRecord, stepIndex, totalSteps, messages }) {
3175
- const toolCardsById = new Map();
3176
- const toolCards = [];
3177
- const source = Array.isArray(messages) ? messages : [];
3178
-
3179
- for (const msg of source) {
3180
- if (msg?.role === 'assistant' && Array.isArray(msg.tool_calls)) {
3181
- for (const tc of msg.tool_calls) {
3460
+ function buildPlanStepTranscript({ stepRecord, stepIndex, totalSteps, messages }) {
3461
+ const toolCardsById = new Map();
3462
+ const toolCards = [];
3463
+ const source = Array.isArray(messages) ? messages : [];
3464
+ let usage = null;
3465
+
3466
+ for (const msg of source) {
3467
+ if (msg?.role === 'assistant' && msg.usage) {
3468
+ usage = mergeModelUsage(usage, msg.usage);
3469
+ }
3470
+ if (msg?.role === 'assistant' && Array.isArray(msg.tool_calls)) {
3471
+ for (const tc of msg.tool_calls) {
3182
3472
  const id = String(tc?.id || `tool-${toolCards.length + 1}`);
3183
3473
  if (toolCardsById.has(id)) continue;
3184
3474
  const card = {
@@ -3217,11 +3507,12 @@ function buildPlanStepTranscript({ stepRecord, stepIndex, totalSteps, messages }
3217
3507
  total: totalSteps,
3218
3508
  role: stepRecord.role || 'general',
3219
3509
  title: stepRecord.title || '',
3220
- status: stepRecord.failed ? 'failed' : 'done',
3221
- summary: stepRecord.failed ? stepRecord.failureReason : trimInline(stepRecord.output || '', 160),
3222
- segments
3223
- };
3224
- }
3510
+ status: stepRecord.failed ? 'failed' : 'done',
3511
+ summary: stepRecord.failed ? stepRecord.failureReason : trimInline(stepRecord.output || '', 160),
3512
+ segments,
3513
+ ...(usage ? { usage } : {})
3514
+ };
3515
+ }
3225
3516
 
3226
3517
  async function executePlanWithSubAgents({
3227
3518
  planState,
@@ -403,9 +403,10 @@ export async function createChatCompletionStream({
403
403
  model,
404
404
  messages,
405
405
  temperature = 0.2,
406
- tools,
407
- onTextDelta,
408
- onToolCallDelta,
406
+ tools,
407
+ onTextDelta,
408
+ onReasoningDelta,
409
+ onToolCallDelta,
409
410
  timeoutMs = 1800000,
410
411
  maxTokens = 4096,
411
412
  signal: externalSignal
@@ -476,12 +477,14 @@ export async function createChatCompletionStream({
476
477
  continue;
477
478
  }
478
479
 
479
- if (delta.type === 'thinking_delta') {
480
- const current = thinkingBlocksByIndex.get(index) || { type: 'thinking', thinking: '' };
481
- current.thinking = `${current.thinking || ''}${String(delta.thinking || '')}`;
482
- thinkingBlocksByIndex.set(index, current);
483
- continue;
484
- }
480
+ if (delta.type === 'thinking_delta') {
481
+ const current = thinkingBlocksByIndex.get(index) || { type: 'thinking', thinking: '' };
482
+ const thinkingDelta = String(delta.thinking || '');
483
+ current.thinking = `${current.thinking || ''}${thinkingDelta}`;
484
+ thinkingBlocksByIndex.set(index, current);
485
+ if (thinkingDelta && onReasoningDelta) onReasoningDelta(thinkingDelta);
486
+ continue;
487
+ }
485
488
 
486
489
  if (delta.type === 'signature_delta') {
487
490
  const current = thinkingBlocksByIndex.get(index) || { type: 'thinking', thinking: '' };
@@ -135,17 +135,32 @@ function normalizeToolCallArguments(argumentsText) {
135
135
  return '{}';
136
136
  }
137
137
 
138
- function normalizeIncomingToolCallArguments(argumentsValue) {
139
- if (typeof argumentsValue === 'string') return argumentsValue;
140
- if (argumentsValue == null) return '{}';
138
+ function normalizeIncomingToolCallArguments(argumentsValue) {
139
+ if (typeof argumentsValue === 'string') return argumentsValue;
140
+ if (argumentsValue == null) return '{}';
141
141
  try {
142
142
  return JSON.stringify(argumentsValue);
143
143
  } catch {
144
144
  return '{}';
145
- }
146
- }
147
-
148
- function sanitizeGatewayMessages(messages) {
145
+ }
146
+ }
147
+
148
+ function extractUsageObject(data) {
149
+ if (!data || typeof data !== 'object') return null;
150
+ return data.usage
151
+ || data.usage_metadata
152
+ || data.usageMetadata
153
+ || data.token_usage
154
+ || data.tokenUsage
155
+ || data.meta?.tokens
156
+ || data.meta?.billed_units
157
+ || data.meta?.billedUnits
158
+ || data.response?.usage
159
+ || data.response?.usage_metadata
160
+ || null;
161
+ }
162
+
163
+ function sanitizeGatewayMessages(messages) {
149
164
  const source = Array.isArray(messages) ? messages : [];
150
165
  return source
151
166
  .filter((message) => message && typeof message === 'object')
@@ -221,9 +236,10 @@ function buildPayload({ model, temperature, messages, tools, stream = false }) {
221
236
  temperature,
222
237
  messages: isMiniMaxModel(model) ? sanitizeMiniMaxMessages(sanitizedMessages) : sanitizedMessages
223
238
  };
224
- if (stream) {
225
- payload.stream = true;
226
- }
239
+ if (stream) {
240
+ payload.stream = true;
241
+ payload.stream_options = { include_usage: true };
242
+ }
227
243
  if (Array.isArray(tools) && tools.length > 0) {
228
244
  payload.tools = tools;
229
245
  payload.tool_choice = 'auto';
@@ -365,25 +381,25 @@ export async function createChatCompletion({
365
381
  }));
366
382
  const normalizedText = String(text || '').trim();
367
383
 
368
- if (!normalizedText && toolCalls.length === 0) {
369
- if (hasTrailingToolContext(messages)) {
370
- return {
371
- text: '',
372
- toolCalls: [],
373
- usage: data?.usage || null,
374
- incomplete: true
375
- };
376
- }
377
- throw new Error('Gateway returned empty assistant response');
378
- }
379
-
380
- return {
381
- text,
382
- toolCalls,
383
- usage: data?.usage || null,
384
- assistantMessage: buildAssistantMessage({
385
- text,
386
- toolCalls,
384
+ if (!normalizedText && toolCalls.length === 0) {
385
+ if (hasTrailingToolContext(messages)) {
386
+ return {
387
+ text: '',
388
+ toolCalls: [],
389
+ usage: extractUsageObject(data),
390
+ incomplete: true
391
+ };
392
+ }
393
+ throw new Error('Gateway returned empty assistant response');
394
+ }
395
+
396
+ return {
397
+ text,
398
+ toolCalls,
399
+ usage: extractUsageObject(data),
400
+ assistantMessage: buildAssistantMessage({
401
+ text,
402
+ toolCalls,
387
403
  content: message.content ?? text,
388
404
  reasoningContent
389
405
  })
@@ -396,9 +412,10 @@ export async function createChatCompletionStream({
396
412
  model,
397
413
  messages,
398
414
  temperature = 0.2,
399
- tools,
400
- onTextDelta,
401
- onToolCallDelta,
415
+ tools,
416
+ onTextDelta,
417
+ onReasoningDelta,
418
+ onToolCallDelta,
402
419
  timeoutMs = 1800000,
403
420
  maxRetries = 2,
404
421
  signal: externalSignal
@@ -415,33 +432,46 @@ export async function createChatCompletionStream({
415
432
  externalSignal.addEventListener('abort', onAbort, { once: true });
416
433
  }
417
434
  }
418
- const payload = buildPayload({ model, temperature, messages, tools, stream: true });
419
- const response = await fetchWithRetry(buildChatCompletionsUrl(baseUrl), {
420
- method: 'POST',
421
- headers: createHeaders(apiKey),
422
- body: JSON.stringify(payload),
423
- signal: controller.signal
424
- }, { maxRetries });
425
- if (!response.ok || !response.body) {
426
- const text = await response.text().catch(() => '');
427
- throw new Error(`Gateway error ${response.status}: ${text || response.statusText}`);
428
- }
435
+ const url = buildChatCompletionsUrl(baseUrl);
436
+ const payload = buildPayload({ model, temperature, messages, tools, stream: true });
437
+ const buildRequest = (bodyPayload) => ({
438
+ method: 'POST',
439
+ headers: createHeaders(apiKey),
440
+ body: JSON.stringify(bodyPayload),
441
+ signal: controller.signal
442
+ });
443
+ let response = await fetchWithRetry(url, buildRequest(payload), { maxRetries });
444
+ if (!response.ok && payload.stream_options) {
445
+ const errorText = await response.text().catch(() => '');
446
+ if (/\b(stream_options|include_usage|unsupported|unknown|unrecognized|forbidden)\b/i.test(errorText)) {
447
+ const fallbackPayload = { ...payload };
448
+ delete fallbackPayload.stream_options;
449
+ response = await fetchWithRetry(url, buildRequest(fallbackPayload), { maxRetries });
450
+ } else {
451
+ throw new Error(`Gateway error ${response.status}: ${errorText || response.statusText}`);
452
+ }
453
+ }
454
+ if (!response.ok || !response.body) {
455
+ const text = await response.text().catch(() => '');
456
+ throw new Error(`Gateway error ${response.status}: ${text || response.statusText}`);
457
+ }
429
458
  let text = '';
430
459
  let reasoningContent = '';
431
460
  const toolCallsByIndex = new Map();
432
461
  let usage = null;
433
462
  let miniMaxStreamState = { rawContent: '', visibleText: '' };
434
463
 
435
- try {
436
- for await (const chunk of iterateSseEvents(response.body)) {
437
- usage = chunk?.usage || usage;
464
+ try {
465
+ for await (const chunk of iterateSseEvents(response.body)) {
466
+ usage = extractUsageObject(chunk) || usage;
438
467
  const choice0 = chunk?.choices?.[0] || {};
439
468
  const delta = choice0?.delta || {};
440
469
  const content = delta.content;
441
- const reasoningDelta = extractReasoningContent(delta.reasoning_content);
442
- if (reasoningDelta) {
443
- reasoningContent += reasoningDelta;
444
- }
470
+ const reasoningDelta = extractReasoningContent(delta.reasoning_content);
471
+ if (reasoningDelta) {
472
+ reasoningContent += reasoningDelta;
473
+ if (onReasoningDelta) onReasoningDelta(reasoningDelta);
474
+ }
445
475
  if (isMiniMaxModel(model)) {
446
476
  const next = nextMiniMaxVisibleChunk(miniMaxStreamState, content);
447
477
  miniMaxStreamState = next.nextState;