@ebowwa/coder 0.7.64 → 0.7.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/dist/index.js +36233 -32
  2. package/dist/interfaces/ui/terminal/cli/index.js +34318 -158
  3. package/dist/interfaces/ui/terminal/native/README.md +53 -0
  4. package/dist/interfaces/ui/terminal/native/claude_code_native.darwin-x64.node +0 -0
  5. package/dist/interfaces/ui/terminal/native/claude_code_native.dylib +0 -0
  6. package/dist/interfaces/ui/terminal/native/index.d.ts +0 -0
  7. package/dist/interfaces/ui/terminal/native/index.darwin-arm64.node +0 -0
  8. package/dist/interfaces/ui/terminal/native/index.js +43 -0
  9. package/dist/interfaces/ui/terminal/native/index.node +0 -0
  10. package/dist/interfaces/ui/terminal/native/package.json +34 -0
  11. package/dist/native/README.md +53 -0
  12. package/dist/native/claude_code_native.darwin-x64.node +0 -0
  13. package/dist/native/claude_code_native.dylib +0 -0
  14. package/dist/native/index.d.ts +0 -480
  15. package/dist/native/index.darwin-arm64.node +0 -0
  16. package/dist/native/index.js +43 -1625
  17. package/dist/native/index.node +0 -0
  18. package/dist/native/package.json +34 -0
  19. package/native/index.darwin-arm64.node +0 -0
  20. package/native/index.js +33 -19
  21. package/package.json +3 -2
  22. package/packages/src/core/agent-loop/__tests__/compaction.test.ts +17 -14
  23. package/packages/src/core/agent-loop/compaction.ts +6 -2
  24. package/packages/src/core/agent-loop/index.ts +2 -0
  25. package/packages/src/core/agent-loop/loop-state.ts +1 -1
  26. package/packages/src/core/agent-loop/turn-executor.ts +4 -0
  27. package/packages/src/core/agent-loop/types.ts +4 -0
  28. package/packages/src/core/api-client-impl.ts +377 -176
  29. package/packages/src/core/cognitive-security/hooks.ts +2 -1
  30. package/packages/src/core/config/todo +7 -0
  31. package/packages/src/core/context/__tests__/integration.test.ts +334 -0
  32. package/packages/src/core/context/compaction.ts +170 -0
  33. package/packages/src/core/context/constants.ts +58 -0
  34. package/packages/src/core/context/extraction.ts +85 -0
  35. package/packages/src/core/context/index.ts +66 -0
  36. package/packages/src/core/context/summarization.ts +251 -0
  37. package/packages/src/core/context/token-estimation.ts +98 -0
  38. package/packages/src/core/context/types.ts +59 -0
  39. package/packages/src/core/models.ts +81 -4
  40. package/packages/src/core/normalizers/todo +5 -1
  41. package/packages/src/core/providers/README.md +230 -0
  42. package/packages/src/core/providers/__tests__/providers.test.ts +135 -0
  43. package/packages/src/core/providers/index.ts +419 -0
  44. package/packages/src/core/providers/types.ts +132 -0
  45. package/packages/src/core/retry.ts +10 -0
  46. package/packages/src/ecosystem/tools/index.ts +174 -0
  47. package/packages/src/index.ts +23 -2
  48. package/packages/src/interfaces/ui/index.ts +17 -20
  49. package/packages/src/interfaces/ui/spinner.ts +2 -2
  50. package/packages/src/interfaces/ui/terminal/bridge/index.ts +370 -0
  51. package/packages/src/interfaces/ui/terminal/bridge/ipc.ts +829 -0
  52. package/packages/src/interfaces/ui/terminal/bridge/screen-export.ts +968 -0
  53. package/packages/src/interfaces/ui/terminal/bridge/types.ts +226 -0
  54. package/packages/src/interfaces/ui/terminal/bridge/useBridge.ts +210 -0
  55. package/packages/src/interfaces/ui/terminal/cli/bootstrap.ts +132 -0
  56. package/packages/src/interfaces/ui/terminal/cli/index.ts +200 -13
  57. package/packages/src/interfaces/ui/terminal/cli/interactive/index.ts +110 -0
  58. package/packages/src/interfaces/ui/terminal/cli/interactive/input-handler.ts +402 -0
  59. package/packages/src/interfaces/ui/terminal/cli/interactive/interactive-runner.ts +820 -0
  60. package/packages/src/interfaces/ui/terminal/cli/interactive/message-store.ts +299 -0
  61. package/packages/src/interfaces/ui/terminal/cli/interactive/types.ts +274 -0
  62. package/packages/src/interfaces/ui/terminal/shared/index.ts +13 -0
  63. package/packages/src/interfaces/ui/terminal/shared/query.ts +9 -3
  64. package/packages/src/interfaces/ui/terminal/shared/setup.ts +5 -1
  65. package/packages/src/interfaces/ui/terminal/shared/spinner-frames.ts +73 -0
  66. package/packages/src/interfaces/ui/terminal/shared/status-line.ts +10 -2
  67. package/packages/src/native/index.ts +404 -27
  68. package/packages/src/native/tui_v2_types.ts +39 -0
  69. package/packages/src/teammates/coordination.test.ts +279 -0
  70. package/packages/src/teammates/coordination.ts +646 -0
  71. package/packages/src/teammates/index.ts +95 -25
  72. package/packages/src/teammates/integration.test.ts +272 -0
  73. package/packages/src/teammates/runner.test.ts +235 -0
  74. package/packages/src/teammates/runner.ts +750 -0
  75. package/packages/src/teammates/schemas.ts +673 -0
  76. package/packages/src/types/index.ts +1 -0
  77. package/packages/src/core/context-compaction.ts +0 -578
  78. package/packages/src/interfaces/ui/Screenshot 2026-03-02 at 9.23.10/342/200/257PM.png +0 -0
  79. package/packages/src/interfaces/ui/Screenshot 2026-03-03 at 10.55.11/342/200/257AM.png +0 -0
  80. package/packages/src/interfaces/ui/terminal/tui/HelpPanel.tsx +0 -262
  81. package/packages/src/interfaces/ui/terminal/tui/InputContext.tsx +0 -232
  82. package/packages/src/interfaces/ui/terminal/tui/InputField.tsx +0 -62
  83. package/packages/src/interfaces/ui/terminal/tui/InteractiveTUI.tsx +0 -537
  84. package/packages/src/interfaces/ui/terminal/tui/MessageArea.tsx +0 -107
  85. package/packages/src/interfaces/ui/terminal/tui/MessageStore.tsx +0 -240
  86. package/packages/src/interfaces/ui/terminal/tui/StatusBar.tsx +0 -54
  87. package/packages/src/interfaces/ui/terminal/tui/commands.ts +0 -438
  88. package/packages/src/interfaces/ui/terminal/tui/components/InteractiveElements.tsx +0 -584
  89. package/packages/src/interfaces/ui/terminal/tui/components/MultilineInput.tsx +0 -614
  90. package/packages/src/interfaces/ui/terminal/tui/components/PaneManager.tsx +0 -333
  91. package/packages/src/interfaces/ui/terminal/tui/components/Sidebar.tsx +0 -604
  92. package/packages/src/interfaces/ui/terminal/tui/components/index.ts +0 -118
  93. package/packages/src/interfaces/ui/terminal/tui/console.ts +0 -49
  94. package/packages/src/interfaces/ui/terminal/tui/index.ts +0 -90
  95. package/packages/src/interfaces/ui/terminal/tui/run.tsx +0 -42
  96. package/packages/src/interfaces/ui/terminal/tui/spinner.ts +0 -69
  97. package/packages/src/interfaces/ui/terminal/tui/tui-app.tsx +0 -390
  98. package/packages/src/interfaces/ui/terminal/tui/tui-footer.ts +0 -422
  99. package/packages/src/interfaces/ui/terminal/tui/types.ts +0 -186
  100. package/packages/src/interfaces/ui/terminal/tui/useInputHandler.ts +0 -104
  101. package/packages/src/interfaces/ui/terminal/tui/useNativeInput.ts +0 -239
@@ -1,5 +1,10 @@
1
1
  /**
2
2
  * API Client - SSE streaming for LLM APIs
3
+ *
4
+ * Supports multiple providers:
5
+ * - Zhipu (Z.AI / GLM models) - OpenAI format
6
+ * - MiniMax (M2.5) - Anthropic format
7
+ * - OpenAI (future)
3
8
  */
4
9
 
5
10
  import type {
@@ -33,6 +38,30 @@ import {
33
38
  DEFAULT_MODEL,
34
39
  supportsExtendedThinking,
35
40
  } from "./models.js";
41
+ import {
42
+ resolveProvider,
43
+ getProviderForModel,
44
+ recordProviderSuccess,
45
+ recordProviderFailure,
46
+ type ProviderName,
47
+ type ProviderConfig,
48
+ } from "./providers/index.js";
49
+
50
+ /**
51
+ * Convert Anthropic-style tools to OpenAI-style tools
52
+ * Anthropic: { name, description, input_schema }
53
+ * OpenAI: { type: "function", function: { name, description, parameters } }
54
+ */
55
+ function convertToolsToOpenAIFormat(tools: APITool[]): unknown[] {
56
+ return tools.map((tool) => ({
57
+ type: "function",
58
+ function: {
59
+ name: tool.name,
60
+ description: tool.description,
61
+ parameters: tool.input_schema,
62
+ },
63
+ }));
64
+ }
36
65
 
37
66
  export interface StreamOptions {
38
67
  apiKey: string;
@@ -50,6 +79,8 @@ export interface StreamOptions {
50
79
  /** Called when redacted thinking is received (data is base64) */
51
80
  onRedactedThinking?: (data: string) => void;
52
81
  onToolUse?: (toolUse: { id: string; name: string; input: unknown }) => void;
82
+ /** Called when a retry is about to start - UI should reset streaming state */
83
+ onRetryStart?: () => void;
53
84
  signal?: AbortSignal;
54
85
  }
55
86
 
@@ -197,153 +228,55 @@ export function calculateCacheMetrics(usage: UsageMetrics): CacheMetrics {
197
228
  }
198
229
 
199
230
  /**
200
- * Create a streaming message request to Anthropic API
231
+ * Callbacks to emit during streaming (passed in, not buffered)
201
232
  */
202
- export async function createMessageStream(
203
- messages: Message[],
204
- options: StreamOptions
205
- ): Promise<StreamResult> {
206
- const {
207
- apiKey,
208
- model = "claude-sonnet-4-6",
209
- maxTokens = 4096,
210
- tools,
211
- systemPrompt,
212
- cacheConfig = DEFAULT_CACHE_CONFIG,
213
- thinking,
214
- extendedThinking,
215
- onToken,
216
- onThinking,
217
- onRedactedThinking,
218
- onToolUse,
219
- signal,
220
- } = options;
221
-
222
- const startTime = Date.now();
223
- let ttft = 0;
224
- let firstToken = true;
225
- let totalThinkingTokens = 0;
226
-
227
- // Build cached messages
228
- const cachedMessages = buildCachedMessages(messages, cacheConfig);
229
-
230
- // Build system prompt with cache control
231
- const cachedSystemPrompt = buildSystemPrompt(systemPrompt, cacheConfig);
232
-
233
- // Build request
234
- const request: APIRequest = {
235
- model,
236
- max_tokens: maxTokens,
237
- messages: cachedMessages.map((m) => ({
238
- role: m.role,
239
- content: m.content,
240
- })),
241
- stream: true,
242
- };
243
-
244
- // Add system prompt if provided
245
- if (cachedSystemPrompt) {
246
- request.system = cachedSystemPrompt;
247
- }
248
-
249
- // Add tools if provided (with optional caching)
250
- if (tools && tools.length > 0) {
251
- request.tools = tools;
252
- }
253
-
254
- // Determine API endpoint (support custom base URL for GLM, etc.)
255
- const baseUrl = process.env.ANTHROPIC_BASE_URL || "https://api.anthropic.com";
256
- const apiEndpoint = `${baseUrl}/v1/messages`;
257
-
258
- // Build headers
259
- const headers: Record<string, string> = {
260
- "Content-Type": "application/json",
261
- "x-api-key": apiKey,
262
- "anthropic-version": "2023-06-01",
263
- };
264
-
265
- // Determine thinking configuration
266
- const shouldUseExtendedThinking =
267
- (extendedThinking?.enabled ?? false) ||
268
- (thinking && thinking.type !== "disabled");
269
-
270
- if (shouldUseExtendedThinking && supportsExtendedThinking(model)) {
271
- // Calculate budget tokens
272
- let budgetTokens: number;
273
-
274
- if (extendedThinking?.budgetTokens) {
275
- budgetTokens = extendedThinking.budgetTokens;
276
- } else if (thinking?.type === "enabled") {
277
- budgetTokens = thinking.budget_tokens;
278
- } else {
279
- // Use effort level to determine budget
280
- const effort = extendedThinking?.effort || "medium";
281
- budgetTokens = calculateBudgetTokens(
282
- {
283
- enabled: true,
284
- effort,
285
- modelMultiplier: model.includes("opus") ? 2 : 1,
286
- },
287
- model
288
- );
289
- }
290
-
291
- // Clamp budget to valid range
292
- budgetTokens = Math.max(1024, Math.min(budgetTokens, 100000));
293
-
294
- request.thinking = {
295
- type: "enabled",
296
- budget_tokens: budgetTokens,
297
- };
298
-
299
- // Add beta headers for extended thinking features
300
- const betaFeatures: string[] = ["extended-thinking-2025-01-24"];
301
-
302
- // Add interleaved thinking support if enabled
303
- if (extendedThinking?.interleaved !== false) {
304
- betaFeatures.push("interleaved-thinking-2025-01-24");
305
- }
306
-
307
- headers["anthropic-beta"] = betaFeatures.join(",");
308
- } else {
309
- // Default beta header
310
- headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15";
311
- }
312
-
313
- // Make API request with retry logic
314
- const retryOptions: RetryOptions = {
315
- maxRetries: 3,
316
- baseDelayMs: 1000,
317
- maxDelayMs: 30000,
318
- retryableStatusCodes: [429, 500, 502, 503, 504, 529],
319
- onRetry: (attempt, error, delayMs) => {
320
- console.log(`\x1b[33mAPI retry ${attempt}/3 after ${delayMs}ms: ${error.message}\x1b[0m`);
321
- },
322
- };
233
+ interface StreamCallbacks {
234
+ onToken?: (text: string) => void;
235
+ onThinking?: (thinking: string) => void;
236
+ onRedactedThinking?: (data: string) => void;
237
+ onToolUse?: (toolUse: { id: string; name: string; input: unknown }) => void;
238
+ onRetryStart?: () => void;
239
+ }
323
240
 
324
- const response = await withRetry(
325
- async () => {
326
- const res = await fetch(apiEndpoint, {
327
- method: "POST",
328
- headers,
329
- body: JSON.stringify(request),
330
- signal,
331
- });
332
-
333
- // Throw for retryable status codes so withRetry can handle them
334
- if (!res.ok && retryOptions.retryableStatusCodes?.includes(res.status)) {
335
- const errorText = await res.text();
336
- throw new Error(`API error: ${res.status} - ${errorText}`);
337
- }
241
+ /**
242
+ * Internal result from a single stream attempt
243
+ */
244
+ interface StreamAttemptResult {
245
+ message: APIResponse | null;
246
+ content: ContentBlock[];
247
+ usage: UsageMetrics;
248
+ thinkingTokens: number;
249
+ ttftMs: number;
250
+ }
338
251
 
339
- return res;
340
- },
341
- retryOptions
342
- );
252
+ /**
253
+ * Execute a single streaming API attempt
254
+ * Emits callbacks in real-time for streaming display
255
+ */
256
+ async function executeStreamAttempt(
257
+ request: APIRequest,
258
+ headers: Record<string, string>,
259
+ apiEndpoint: string,
260
+ signal: AbortSignal | undefined,
261
+ model: string,
262
+ retryableStatusCodes: number[],
263
+ startTime: number,
264
+ callbacks: StreamCallbacks
265
+ ): Promise<StreamAttemptResult> {
266
+ const response = await fetch(apiEndpoint, {
267
+ method: "POST",
268
+ headers,
269
+ body: JSON.stringify(request),
270
+ signal,
271
+ });
343
272
 
273
+ // Throw for retryable status codes so withRetry can handle them
344
274
  if (!response.ok) {
345
- const error = await response.text();
346
- throw new Error(`API error: ${response.status} - ${error}`);
275
+ const errorText = await response.text();
276
+ if (retryableStatusCodes.includes(response.status)) {
277
+ throw new Error(`API error: ${response.status} - ${errorText}`);
278
+ }
279
+ throw new Error(`API error: ${response.status} - ${errorText}`);
347
280
  }
348
281
 
349
282
  if (!response.body) {
@@ -363,7 +296,9 @@ export async function createMessageStream(
363
296
  let currentToolUseBlock: ToolUseBlock | null = null;
364
297
  let toolUseInput = "";
365
298
 
366
- const buffer = "";
299
+ let ttft = 0;
300
+ let firstToken = true;
301
+ let totalThinkingTokens = 0;
367
302
 
368
303
  try {
369
304
  let buffer = "";
@@ -440,7 +375,7 @@ export async function createMessageStream(
440
375
  if (delta.type === "text_delta" && currentTextBlock) {
441
376
  const text = delta.text as string;
442
377
  currentTextBlock.text += text;
443
- onToken?.(text);
378
+ callbacks.onToken?.(text); // Emit in real-time
444
379
 
445
380
  if (firstToken) {
446
381
  ttft = Date.now() - startTime;
@@ -449,14 +384,13 @@ export async function createMessageStream(
449
384
  } else if (delta.type === "thinking_delta" && currentThinkingBlock) {
450
385
  const thinking = delta.thinking as string;
451
386
  currentThinkingBlock.thinking += thinking;
452
- onThinking?.(thinking);
453
- totalThinkingTokens += Math.ceil(thinking.length / 4); // Rough estimate
387
+ callbacks.onThinking?.(thinking); // Emit in real-time
388
+ totalThinkingTokens += Math.ceil(thinking.length / 4);
454
389
  } else if (delta.type === "redacted_thinking_delta" && currentRedactedThinkingBlock) {
455
- // Handle redacted thinking deltas
456
390
  const redactedData = delta.data as string;
457
391
  currentRedactedThinkingBlock.data += redactedData;
458
- onRedactedThinking?.(redactedData);
459
- totalThinkingTokens += Math.ceil(redactedData.length / 4); // Rough estimate
392
+ callbacks.onRedactedThinking?.(redactedData); // Emit in real-time
393
+ totalThinkingTokens += Math.ceil(redactedData.length / 4);
460
394
  } else if (delta.type === "input_json_delta" && currentToolUseBlock) {
461
395
  toolUseInput += delta.partial_json as string;
462
396
  }
@@ -464,8 +398,6 @@ export async function createMessageStream(
464
398
  }
465
399
 
466
400
  case "content_block_stop": {
467
- // content_block_stop event has { index: number }, not the block itself
468
- // We need to check which current block is active and push it
469
401
  if (currentTextBlock !== null) {
470
402
  currentContent.push(currentTextBlock);
471
403
  currentTextBlock = null;
@@ -474,7 +406,6 @@ export async function createMessageStream(
474
406
  currentThinkingBlock = null;
475
407
  } else if (currentRedactedThinkingBlock !== null) {
476
408
  currentContent.push(currentRedactedThinkingBlock);
477
- onRedactedThinking?.(currentRedactedThinkingBlock.data);
478
409
  currentRedactedThinkingBlock = null;
479
410
  } else if (currentToolUseBlock !== null) {
480
411
  try {
@@ -483,11 +414,11 @@ export async function createMessageStream(
483
414
  currentToolUseBlock.input = {};
484
415
  }
485
416
  currentContent.push(currentToolUseBlock);
486
- onToolUse?.({
417
+ callbacks.onToolUse?.({
487
418
  id: currentToolUseBlock.id,
488
419
  name: currentToolUseBlock.name,
489
420
  input: currentToolUseBlock.input,
490
- });
421
+ }); // Emit in real-time
491
422
  currentToolUseBlock = null;
492
423
  toolUseInput = "";
493
424
  }
@@ -506,15 +437,27 @@ export async function createMessageStream(
506
437
  }
507
438
 
508
439
  case "message_stop":
509
- // Message complete
510
440
  break;
511
441
 
512
442
  // OpenAI/Z.AI compatible format (for GLM-5, etc.)
513
- // OpenAI streaming sends chunks with choices array
514
443
  default: {
515
- // Check for OpenAI format: { choices: [{ delta: { content: "..." } }], usage: {...} }
516
444
  if (event.choices && Array.isArray(event.choices)) {
517
- const choice = event.choices[0] as { delta?: { content?: string }; finish_reason?: string } | undefined;
445
+ const choice = event.choices[0] as {
446
+ delta?: {
447
+ content?: string;
448
+ tool_calls?: Array<{
449
+ id?: string;
450
+ index?: number;
451
+ function?: {
452
+ name?: string;
453
+ arguments?: string;
454
+ };
455
+ }>;
456
+ };
457
+ finish_reason?: string;
458
+ } | undefined;
459
+
460
+ // Handle text content
518
461
  if (choice?.delta?.content) {
519
462
  const text = choice.delta.content;
520
463
  if (currentTextBlock) {
@@ -522,18 +465,93 @@ export async function createMessageStream(
522
465
  } else {
523
466
  currentTextBlock = { type: "text", text };
524
467
  }
525
- onToken?.(text);
468
+ callbacks.onToken?.(text); // Emit in real-time
526
469
  if (firstToken) {
527
470
  ttft = Date.now() - startTime;
528
471
  firstToken = false;
529
472
  }
530
473
  }
531
- // Check for finish
474
+
475
+ // Handle tool calls (OpenAI format)
476
+ if (choice?.delta?.tool_calls && Array.isArray(choice.delta.tool_calls)) {
477
+ for (const toolCallDelta of choice.delta.tool_calls) {
478
+ const index = toolCallDelta.index ?? 0;
479
+ const toolCallId = toolCallDelta.id;
480
+
481
+ // Start a new tool call if we got an ID
482
+ if (toolCallId) {
483
+ // Finalize any existing tool use block at this index
484
+ if (currentToolUseBlock) {
485
+ try {
486
+ currentToolUseBlock.input = JSON.parse(toolUseInput);
487
+ } catch {
488
+ currentToolUseBlock.input = {};
489
+ }
490
+ currentContent.push(currentToolUseBlock);
491
+ callbacks.onToolUse?.({
492
+ id: currentToolUseBlock.id,
493
+ name: currentToolUseBlock.name,
494
+ input: currentToolUseBlock.input,
495
+ });
496
+ }
497
+
498
+ // Start new tool use block
499
+ currentToolUseBlock = {
500
+ type: "tool_use",
501
+ id: toolCallId,
502
+ name: toolCallDelta.function?.name || "",
503
+ input: {},
504
+ };
505
+ toolUseInput = "";
506
+
507
+ if (firstToken) {
508
+ ttft = Date.now() - startTime;
509
+ firstToken = false;
510
+ }
511
+ }
512
+
513
+ // Accumulate arguments for current tool call
514
+ if (toolCallDelta.function?.arguments && currentToolUseBlock) {
515
+ toolUseInput += toolCallDelta.function.arguments;
516
+ }
517
+ }
518
+ }
519
+
520
+ // Handle finish reason
532
521
  if (choice?.finish_reason) {
522
+ // Finalize any pending text block
533
523
  if (currentTextBlock) {
534
524
  currentContent.push(currentTextBlock);
535
525
  currentTextBlock = null;
536
526
  }
527
+
528
+ // Finalize any pending tool use block
529
+ if (currentToolUseBlock) {
530
+ try {
531
+ currentToolUseBlock.input = JSON.parse(toolUseInput);
532
+ } catch {
533
+ currentToolUseBlock.input = {};
534
+ }
535
+ currentContent.push(currentToolUseBlock);
536
+ callbacks.onToolUse?.({
537
+ id: currentToolUseBlock.id,
538
+ name: currentToolUseBlock.name,
539
+ input: currentToolUseBlock.input,
540
+ });
541
+ currentToolUseBlock = null;
542
+ toolUseInput = "";
543
+ }
544
+
545
+ // Map finish reasons
546
+ let stopReason: StopReason = "end_turn";
547
+ if (choice.finish_reason === "tool_calls" || choice.finish_reason === "function_call") {
548
+ stopReason = "tool_use";
549
+ } else if (choice.finish_reason === "length") {
550
+ stopReason = "max_tokens";
551
+ } else if (choice.finish_reason === "stop") {
552
+ stopReason = "end_turn";
553
+ }
554
+
537
555
  if (!message) {
538
556
  message = {
539
557
  id: `msg-${Date.now()}`,
@@ -541,16 +559,15 @@ export async function createMessageStream(
541
559
  role: "assistant",
542
560
  content: currentContent,
543
561
  model: model,
544
- stop_reason: (choice.finish_reason === "stop" ? "end_turn" : choice.finish_reason === "length" ? "max_tokens" : "end_turn") as StopReason,
562
+ stop_reason: stopReason,
545
563
  stop_sequence: null,
546
564
  usage: { input_tokens: 0, output_tokens: 0 },
547
565
  };
548
566
  } else {
549
- message.stop_reason = (choice.finish_reason === "stop" ? "end_turn" : choice.finish_reason === "length" ? "max_tokens" : "end_turn") as StopReason;
567
+ message.stop_reason = stopReason;
550
568
  }
551
569
  }
552
570
  }
553
- // OpenAI usage format (often in final chunk)
554
571
  if (event.usage) {
555
572
  const openaiUsage = event.usage as { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
556
573
  usage.input_tokens = openaiUsage.prompt_tokens || 0;
@@ -560,11 +577,13 @@ export async function createMessageStream(
560
577
  }
561
578
  }
562
579
  } catch (err: unknown) {
563
- // Log the parse error with more detail
580
+ // Only rethrow if it's an API error, not a JSON parse error
581
+ if (err instanceof Error && err.message.startsWith("API error:")) {
582
+ throw err;
583
+ }
564
584
  if (process.env.DEBUG_API === '1') {
565
585
  console.error('\x1b[91m[DEBUG] JSON parse error:', err);
566
586
  console.error('\x1b[91m[DEBUG] Error parsing SSE data:', data.substring(0, 200));
567
- console.error('\x1b[91m[DEBUG] Original buffer:', buffer.substring(0, 500));
568
587
  }
569
588
  }
570
589
  }
@@ -573,8 +592,8 @@ export async function createMessageStream(
573
592
  reader.releaseLock();
574
593
  }
575
594
 
595
+ // Handle "No message received" case - this is retryable
576
596
  if (!message) {
577
- // If we received content via OpenAI format but no message_start, create a message
578
597
  if (currentContent.length > 0) {
579
598
  message = {
580
599
  id: `msg-${Date.now()}`,
@@ -587,31 +606,213 @@ export async function createMessageStream(
587
606
  usage: { input_tokens: 0, output_tokens: 0 },
588
607
  };
589
608
  } else {
590
- // Debug: Log what we did receive
591
- if (process.env.DEBUG_API === '1') {
592
- console.log('\x1b[91m[DEBUG] No message_start event received. Buffer:\x1b[0m', buffer.substring(0, 500));
593
- }
609
+ // This is a transient error - throw to trigger retry
594
610
  throw new Error("No message received from API");
595
611
  }
596
612
  }
597
613
 
598
- message.content = currentContent;
614
+ return {
615
+ message,
616
+ content: currentContent,
617
+ usage,
618
+ thinkingTokens: totalThinkingTokens,
619
+ ttftMs: ttft,
620
+ };
621
+ }
622
+
623
+ /**
624
+ * Create a streaming message request to Anthropic API
625
+ * Full retry support including stream parsing errors
626
+ */
627
+ export async function createMessageStream(
628
+ messages: Message[],
629
+ options: StreamOptions
630
+ ): Promise<StreamResult> {
631
+ const {
632
+ apiKey,
633
+ model = "claude-sonnet-4-6",
634
+ maxTokens = 4096,
635
+ tools,
636
+ systemPrompt,
637
+ cacheConfig = DEFAULT_CACHE_CONFIG,
638
+ thinking,
639
+ extendedThinking,
640
+ onToken,
641
+ onThinking,
642
+ onRedactedThinking,
643
+ onToolUse,
644
+ onRetryStart,
645
+ signal,
646
+ } = options;
647
+
648
+ const startTime = Date.now();
649
+
650
+ // Build cached messages
651
+ const cachedMessages = buildCachedMessages(messages, cacheConfig);
652
+
653
+ // Build system prompt with cache control
654
+ const cachedSystemPrompt = buildSystemPrompt(systemPrompt, cacheConfig);
655
+
656
+ // Build request
657
+ const request: APIRequest = {
658
+ model,
659
+ max_tokens: maxTokens,
660
+ messages: cachedMessages.map((m) => ({
661
+ role: m.role,
662
+ content: m.content,
663
+ })),
664
+ stream: true,
665
+ };
666
+
667
+ if (cachedSystemPrompt) {
668
+ request.system = cachedSystemPrompt;
669
+ }
670
+
671
+ // Tools will be set after determining API format (for format conversion)
672
+
673
+ // Resolve provider based on model name
674
+ const providerInfo = resolveProvider(model);
675
+
676
+ // Determine API endpoint and headers based on provider
677
+ let apiEndpoint: string;
678
+ let headers: Record<string, string>;
679
+ let apiFormat: "anthropic" | "openai";
680
+
681
+ if (providerInfo) {
682
+ // Use provider-specific configuration
683
+ apiEndpoint = providerInfo.endpoint;
684
+ apiFormat = providerInfo.config.format;
685
+
686
+ if (apiFormat === "anthropic") {
687
+ // Anthropic/MiniMax format
688
+ headers = {
689
+ "Content-Type": "application/json",
690
+ [providerInfo.config.authHeader]: providerInfo.apiKey,
691
+ "anthropic-version": "2023-06-01",
692
+ };
693
+ } else {
694
+ // OpenAI/Zhipu format
695
+ headers = {
696
+ "Content-Type": "application/json",
697
+ [providerInfo.config.authHeader]: `Bearer ${providerInfo.apiKey}`,
698
+ };
699
+ }
700
+ } else {
701
+ // Fallback to environment-based configuration (legacy)
702
+ const baseUrl = process.env.ANTHROPIC_BASE_URL || "https://api.anthropic.com";
703
+ apiEndpoint = `${baseUrl}/v1/messages`;
704
+ apiFormat = "anthropic";
705
+
706
+ headers = {
707
+ "Content-Type": "application/json",
708
+ "x-api-key": apiKey,
709
+ "anthropic-version": "2023-06-01",
710
+ };
711
+ }
712
+
713
+ // Set tools with format conversion if needed
714
+ if (tools && tools.length > 0) {
715
+ if (apiFormat === "openai") {
716
+ // Convert Anthropic-style tools to OpenAI format
717
+ // Cast needed because OpenAI format differs from APITool
718
+ (request as unknown as Record<string, unknown>).tools = convertToolsToOpenAIFormat(tools);
719
+ } else {
720
+ // Keep Anthropic format as-is
721
+ request.tools = tools;
722
+ }
723
+ }
724
+
725
+ const shouldUseExtendedThinking =
726
+ (extendedThinking?.enabled ?? false) ||
727
+ (thinking && thinking.type !== "disabled");
728
+
729
+ if (shouldUseExtendedThinking && supportsExtendedThinking(model)) {
730
+ let budgetTokens: number;
731
+
732
+ if (extendedThinking?.budgetTokens) {
733
+ budgetTokens = extendedThinking.budgetTokens;
734
+ } else if (thinking?.type === "enabled") {
735
+ budgetTokens = thinking.budget_tokens;
736
+ } else {
737
+ const effort = extendedThinking?.effort || "medium";
738
+ budgetTokens = calculateBudgetTokens(
739
+ { enabled: true, effort, modelMultiplier: model.includes("opus") ? 2 : 1 },
740
+ model
741
+ );
742
+ }
743
+
744
+ budgetTokens = Math.max(1024, Math.min(budgetTokens, 100000));
745
+
746
+ request.thinking = { type: "enabled", budget_tokens: budgetTokens };
747
+
748
+ const betaFeatures: string[] = ["extended-thinking-2025-01-24"];
749
+ if (extendedThinking?.interleaved !== false) {
750
+ betaFeatures.push("interleaved-thinking-2025-01-24");
751
+ }
752
+ headers["anthropic-beta"] = betaFeatures.join(",");
753
+ } else if (apiFormat === "anthropic") {
754
+ headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15";
755
+ }
756
+
757
+ // Retry options - now covers entire stream parsing
758
+ const retryOptions: RetryOptions = {
759
+ maxRetries: 10,
760
+ baseDelayMs: 1000,
761
+ maxDelayMs: 60000,
762
+ retryableStatusCodes: [429, 500, 502, 503, 504, 529],
763
+ onRetry: (attempt, error, delayMs) => {
764
+ console.log(`\x1b[33mAPI retry ${attempt}/10 after ${delayMs}ms: ${error.message}\x1b[0m`);
765
+ // Notify UI to reset streaming state before retry
766
+ onRetryStart?.();
767
+ // Track provider failure on retry
768
+ const providerName = getProviderForModel(model);
769
+ if (providerName) {
770
+ recordProviderFailure(providerName);
771
+ }
772
+ },
773
+ };
774
+
775
+ // Execute with retry - wraps entire fetch + stream parsing
776
+ // Callbacks are emitted in real-time during streaming
777
+ const result = await withRetry(
778
+ () => executeStreamAttempt(
779
+ request,
780
+ headers,
781
+ apiEndpoint,
782
+ signal,
783
+ model,
784
+ retryOptions.retryableStatusCodes ?? [],
785
+ startTime,
786
+ { onToken, onThinking, onRedactedThinking, onToolUse }
787
+ ),
788
+ retryOptions
789
+ );
790
+
791
+ // Build final message
792
+ const message = result.message!;
793
+ message.content = result.content;
599
794
 
600
795
  // Calculate cost and cache metrics
601
- const { costUSD, estimatedSavingsUSD } = calculateCost(model, usage);
602
- const cacheMetrics = calculateCacheMetrics(usage);
796
+ const { costUSD, estimatedSavingsUSD } = calculateCost(model, result.usage);
797
+ const cacheMetrics = calculateCacheMetrics(result.usage);
603
798
  cacheMetrics.estimatedSavingsUSD = estimatedSavingsUSD;
604
799
 
605
800
  const durationMs = Date.now() - startTime;
606
801
 
802
+ // Track provider health on success
803
+ const providerName = getProviderForModel(model);
804
+ if (providerName) {
805
+ recordProviderSuccess(providerName, durationMs);
806
+ }
807
+
607
808
  return {
608
809
  message,
609
- usage,
810
+ usage: result.usage,
610
811
  cacheMetrics,
611
812
  costUSD,
612
813
  durationMs,
613
- ttftMs: ttft || durationMs,
614
- thinkingTokens: totalThinkingTokens,
814
+ ttftMs: result.ttftMs || durationMs,
815
+ thinkingTokens: result.thinkingTokens,
615
816
  };
616
817
  }
617
818