@ebowwa/coder 0.7.64 → 0.7.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +36233 -32
- package/dist/interfaces/ui/terminal/cli/index.js +34318 -158
- package/dist/interfaces/ui/terminal/native/README.md +53 -0
- package/dist/interfaces/ui/terminal/native/claude_code_native.darwin-x64.node +0 -0
- package/dist/interfaces/ui/terminal/native/claude_code_native.dylib +0 -0
- package/dist/interfaces/ui/terminal/native/index.d.ts +0 -0
- package/dist/interfaces/ui/terminal/native/index.darwin-arm64.node +0 -0
- package/dist/interfaces/ui/terminal/native/index.js +43 -0
- package/dist/interfaces/ui/terminal/native/index.node +0 -0
- package/dist/interfaces/ui/terminal/native/package.json +34 -0
- package/dist/native/README.md +53 -0
- package/dist/native/claude_code_native.darwin-x64.node +0 -0
- package/dist/native/claude_code_native.dylib +0 -0
- package/dist/native/index.d.ts +0 -480
- package/dist/native/index.darwin-arm64.node +0 -0
- package/dist/native/index.js +43 -1625
- package/dist/native/index.node +0 -0
- package/dist/native/package.json +34 -0
- package/native/index.darwin-arm64.node +0 -0
- package/native/index.js +33 -19
- package/package.json +3 -2
- package/packages/src/core/agent-loop/__tests__/compaction.test.ts +17 -14
- package/packages/src/core/agent-loop/compaction.ts +6 -2
- package/packages/src/core/agent-loop/index.ts +2 -0
- package/packages/src/core/agent-loop/loop-state.ts +1 -1
- package/packages/src/core/agent-loop/turn-executor.ts +4 -0
- package/packages/src/core/agent-loop/types.ts +4 -0
- package/packages/src/core/api-client-impl.ts +377 -176
- package/packages/src/core/cognitive-security/hooks.ts +2 -1
- package/packages/src/core/config/todo +7 -0
- package/packages/src/core/context/__tests__/integration.test.ts +334 -0
- package/packages/src/core/context/compaction.ts +170 -0
- package/packages/src/core/context/constants.ts +58 -0
- package/packages/src/core/context/extraction.ts +85 -0
- package/packages/src/core/context/index.ts +66 -0
- package/packages/src/core/context/summarization.ts +251 -0
- package/packages/src/core/context/token-estimation.ts +98 -0
- package/packages/src/core/context/types.ts +59 -0
- package/packages/src/core/models.ts +81 -4
- package/packages/src/core/normalizers/todo +5 -1
- package/packages/src/core/providers/README.md +230 -0
- package/packages/src/core/providers/__tests__/providers.test.ts +135 -0
- package/packages/src/core/providers/index.ts +419 -0
- package/packages/src/core/providers/types.ts +132 -0
- package/packages/src/core/retry.ts +10 -0
- package/packages/src/ecosystem/tools/index.ts +174 -0
- package/packages/src/index.ts +23 -2
- package/packages/src/interfaces/ui/index.ts +17 -20
- package/packages/src/interfaces/ui/spinner.ts +2 -2
- package/packages/src/interfaces/ui/terminal/bridge/index.ts +370 -0
- package/packages/src/interfaces/ui/terminal/bridge/ipc.ts +829 -0
- package/packages/src/interfaces/ui/terminal/bridge/screen-export.ts +968 -0
- package/packages/src/interfaces/ui/terminal/bridge/types.ts +226 -0
- package/packages/src/interfaces/ui/terminal/bridge/useBridge.ts +210 -0
- package/packages/src/interfaces/ui/terminal/cli/bootstrap.ts +132 -0
- package/packages/src/interfaces/ui/terminal/cli/index.ts +200 -13
- package/packages/src/interfaces/ui/terminal/cli/interactive/index.ts +110 -0
- package/packages/src/interfaces/ui/terminal/cli/interactive/input-handler.ts +402 -0
- package/packages/src/interfaces/ui/terminal/cli/interactive/interactive-runner.ts +820 -0
- package/packages/src/interfaces/ui/terminal/cli/interactive/message-store.ts +299 -0
- package/packages/src/interfaces/ui/terminal/cli/interactive/types.ts +274 -0
- package/packages/src/interfaces/ui/terminal/shared/index.ts +13 -0
- package/packages/src/interfaces/ui/terminal/shared/query.ts +9 -3
- package/packages/src/interfaces/ui/terminal/shared/setup.ts +5 -1
- package/packages/src/interfaces/ui/terminal/shared/spinner-frames.ts +73 -0
- package/packages/src/interfaces/ui/terminal/shared/status-line.ts +10 -2
- package/packages/src/native/index.ts +404 -27
- package/packages/src/native/tui_v2_types.ts +39 -0
- package/packages/src/teammates/coordination.test.ts +279 -0
- package/packages/src/teammates/coordination.ts +646 -0
- package/packages/src/teammates/index.ts +95 -25
- package/packages/src/teammates/integration.test.ts +272 -0
- package/packages/src/teammates/runner.test.ts +235 -0
- package/packages/src/teammates/runner.ts +750 -0
- package/packages/src/teammates/schemas.ts +673 -0
- package/packages/src/types/index.ts +1 -0
- package/packages/src/core/context-compaction.ts +0 -578
- package/packages/src/interfaces/ui/Screenshot 2026-03-02 at 9.23.10/342/200/257PM.png +0 -0
- package/packages/src/interfaces/ui/Screenshot 2026-03-03 at 10.55.11/342/200/257AM.png +0 -0
- package/packages/src/interfaces/ui/terminal/tui/HelpPanel.tsx +0 -262
- package/packages/src/interfaces/ui/terminal/tui/InputContext.tsx +0 -232
- package/packages/src/interfaces/ui/terminal/tui/InputField.tsx +0 -62
- package/packages/src/interfaces/ui/terminal/tui/InteractiveTUI.tsx +0 -537
- package/packages/src/interfaces/ui/terminal/tui/MessageArea.tsx +0 -107
- package/packages/src/interfaces/ui/terminal/tui/MessageStore.tsx +0 -240
- package/packages/src/interfaces/ui/terminal/tui/StatusBar.tsx +0 -54
- package/packages/src/interfaces/ui/terminal/tui/commands.ts +0 -438
- package/packages/src/interfaces/ui/terminal/tui/components/InteractiveElements.tsx +0 -584
- package/packages/src/interfaces/ui/terminal/tui/components/MultilineInput.tsx +0 -614
- package/packages/src/interfaces/ui/terminal/tui/components/PaneManager.tsx +0 -333
- package/packages/src/interfaces/ui/terminal/tui/components/Sidebar.tsx +0 -604
- package/packages/src/interfaces/ui/terminal/tui/components/index.ts +0 -118
- package/packages/src/interfaces/ui/terminal/tui/console.ts +0 -49
- package/packages/src/interfaces/ui/terminal/tui/index.ts +0 -90
- package/packages/src/interfaces/ui/terminal/tui/run.tsx +0 -42
- package/packages/src/interfaces/ui/terminal/tui/spinner.ts +0 -69
- package/packages/src/interfaces/ui/terminal/tui/tui-app.tsx +0 -390
- package/packages/src/interfaces/ui/terminal/tui/tui-footer.ts +0 -422
- package/packages/src/interfaces/ui/terminal/tui/types.ts +0 -186
- package/packages/src/interfaces/ui/terminal/tui/useInputHandler.ts +0 -104
- package/packages/src/interfaces/ui/terminal/tui/useNativeInput.ts +0 -239
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* API Client - SSE streaming for LLM APIs
|
|
3
|
+
*
|
|
4
|
+
* Supports multiple providers:
|
|
5
|
+
* - Zhipu (Z.AI / GLM models) - OpenAI format
|
|
6
|
+
* - MiniMax (M2.5) - Anthropic format
|
|
7
|
+
* - OpenAI (future)
|
|
3
8
|
*/
|
|
4
9
|
|
|
5
10
|
import type {
|
|
@@ -33,6 +38,30 @@ import {
|
|
|
33
38
|
DEFAULT_MODEL,
|
|
34
39
|
supportsExtendedThinking,
|
|
35
40
|
} from "./models.js";
|
|
41
|
+
import {
|
|
42
|
+
resolveProvider,
|
|
43
|
+
getProviderForModel,
|
|
44
|
+
recordProviderSuccess,
|
|
45
|
+
recordProviderFailure,
|
|
46
|
+
type ProviderName,
|
|
47
|
+
type ProviderConfig,
|
|
48
|
+
} from "./providers/index.js";
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Convert Anthropic-style tools to OpenAI-style tools
|
|
52
|
+
* Anthropic: { name, description, input_schema }
|
|
53
|
+
* OpenAI: { type: "function", function: { name, description, parameters } }
|
|
54
|
+
*/
|
|
55
|
+
function convertToolsToOpenAIFormat(tools: APITool[]): unknown[] {
|
|
56
|
+
return tools.map((tool) => ({
|
|
57
|
+
type: "function",
|
|
58
|
+
function: {
|
|
59
|
+
name: tool.name,
|
|
60
|
+
description: tool.description,
|
|
61
|
+
parameters: tool.input_schema,
|
|
62
|
+
},
|
|
63
|
+
}));
|
|
64
|
+
}
|
|
36
65
|
|
|
37
66
|
export interface StreamOptions {
|
|
38
67
|
apiKey: string;
|
|
@@ -50,6 +79,8 @@ export interface StreamOptions {
|
|
|
50
79
|
/** Called when redacted thinking is received (data is base64) */
|
|
51
80
|
onRedactedThinking?: (data: string) => void;
|
|
52
81
|
onToolUse?: (toolUse: { id: string; name: string; input: unknown }) => void;
|
|
82
|
+
/** Called when a retry is about to start - UI should reset streaming state */
|
|
83
|
+
onRetryStart?: () => void;
|
|
53
84
|
signal?: AbortSignal;
|
|
54
85
|
}
|
|
55
86
|
|
|
@@ -197,153 +228,55 @@ export function calculateCacheMetrics(usage: UsageMetrics): CacheMetrics {
|
|
|
197
228
|
}
|
|
198
229
|
|
|
199
230
|
/**
|
|
200
|
-
*
|
|
231
|
+
* Callbacks to emit during streaming (passed in, not buffered)
|
|
201
232
|
*/
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
maxTokens = 4096,
|
|
210
|
-
tools,
|
|
211
|
-
systemPrompt,
|
|
212
|
-
cacheConfig = DEFAULT_CACHE_CONFIG,
|
|
213
|
-
thinking,
|
|
214
|
-
extendedThinking,
|
|
215
|
-
onToken,
|
|
216
|
-
onThinking,
|
|
217
|
-
onRedactedThinking,
|
|
218
|
-
onToolUse,
|
|
219
|
-
signal,
|
|
220
|
-
} = options;
|
|
221
|
-
|
|
222
|
-
const startTime = Date.now();
|
|
223
|
-
let ttft = 0;
|
|
224
|
-
let firstToken = true;
|
|
225
|
-
let totalThinkingTokens = 0;
|
|
226
|
-
|
|
227
|
-
// Build cached messages
|
|
228
|
-
const cachedMessages = buildCachedMessages(messages, cacheConfig);
|
|
229
|
-
|
|
230
|
-
// Build system prompt with cache control
|
|
231
|
-
const cachedSystemPrompt = buildSystemPrompt(systemPrompt, cacheConfig);
|
|
232
|
-
|
|
233
|
-
// Build request
|
|
234
|
-
const request: APIRequest = {
|
|
235
|
-
model,
|
|
236
|
-
max_tokens: maxTokens,
|
|
237
|
-
messages: cachedMessages.map((m) => ({
|
|
238
|
-
role: m.role,
|
|
239
|
-
content: m.content,
|
|
240
|
-
})),
|
|
241
|
-
stream: true,
|
|
242
|
-
};
|
|
243
|
-
|
|
244
|
-
// Add system prompt if provided
|
|
245
|
-
if (cachedSystemPrompt) {
|
|
246
|
-
request.system = cachedSystemPrompt;
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
// Add tools if provided (with optional caching)
|
|
250
|
-
if (tools && tools.length > 0) {
|
|
251
|
-
request.tools = tools;
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
// Determine API endpoint (support custom base URL for GLM, etc.)
|
|
255
|
-
const baseUrl = process.env.ANTHROPIC_BASE_URL || "https://api.anthropic.com";
|
|
256
|
-
const apiEndpoint = `${baseUrl}/v1/messages`;
|
|
257
|
-
|
|
258
|
-
// Build headers
|
|
259
|
-
const headers: Record<string, string> = {
|
|
260
|
-
"Content-Type": "application/json",
|
|
261
|
-
"x-api-key": apiKey,
|
|
262
|
-
"anthropic-version": "2023-06-01",
|
|
263
|
-
};
|
|
264
|
-
|
|
265
|
-
// Determine thinking configuration
|
|
266
|
-
const shouldUseExtendedThinking =
|
|
267
|
-
(extendedThinking?.enabled ?? false) ||
|
|
268
|
-
(thinking && thinking.type !== "disabled");
|
|
269
|
-
|
|
270
|
-
if (shouldUseExtendedThinking && supportsExtendedThinking(model)) {
|
|
271
|
-
// Calculate budget tokens
|
|
272
|
-
let budgetTokens: number;
|
|
273
|
-
|
|
274
|
-
if (extendedThinking?.budgetTokens) {
|
|
275
|
-
budgetTokens = extendedThinking.budgetTokens;
|
|
276
|
-
} else if (thinking?.type === "enabled") {
|
|
277
|
-
budgetTokens = thinking.budget_tokens;
|
|
278
|
-
} else {
|
|
279
|
-
// Use effort level to determine budget
|
|
280
|
-
const effort = extendedThinking?.effort || "medium";
|
|
281
|
-
budgetTokens = calculateBudgetTokens(
|
|
282
|
-
{
|
|
283
|
-
enabled: true,
|
|
284
|
-
effort,
|
|
285
|
-
modelMultiplier: model.includes("opus") ? 2 : 1,
|
|
286
|
-
},
|
|
287
|
-
model
|
|
288
|
-
);
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
// Clamp budget to valid range
|
|
292
|
-
budgetTokens = Math.max(1024, Math.min(budgetTokens, 100000));
|
|
293
|
-
|
|
294
|
-
request.thinking = {
|
|
295
|
-
type: "enabled",
|
|
296
|
-
budget_tokens: budgetTokens,
|
|
297
|
-
};
|
|
298
|
-
|
|
299
|
-
// Add beta headers for extended thinking features
|
|
300
|
-
const betaFeatures: string[] = ["extended-thinking-2025-01-24"];
|
|
301
|
-
|
|
302
|
-
// Add interleaved thinking support if enabled
|
|
303
|
-
if (extendedThinking?.interleaved !== false) {
|
|
304
|
-
betaFeatures.push("interleaved-thinking-2025-01-24");
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
headers["anthropic-beta"] = betaFeatures.join(",");
|
|
308
|
-
} else {
|
|
309
|
-
// Default beta header
|
|
310
|
-
headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15";
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
// Make API request with retry logic
|
|
314
|
-
const retryOptions: RetryOptions = {
|
|
315
|
-
maxRetries: 3,
|
|
316
|
-
baseDelayMs: 1000,
|
|
317
|
-
maxDelayMs: 30000,
|
|
318
|
-
retryableStatusCodes: [429, 500, 502, 503, 504, 529],
|
|
319
|
-
onRetry: (attempt, error, delayMs) => {
|
|
320
|
-
console.log(`\x1b[33mAPI retry ${attempt}/3 after ${delayMs}ms: ${error.message}\x1b[0m`);
|
|
321
|
-
},
|
|
322
|
-
};
|
|
233
|
+
interface StreamCallbacks {
|
|
234
|
+
onToken?: (text: string) => void;
|
|
235
|
+
onThinking?: (thinking: string) => void;
|
|
236
|
+
onRedactedThinking?: (data: string) => void;
|
|
237
|
+
onToolUse?: (toolUse: { id: string; name: string; input: unknown }) => void;
|
|
238
|
+
onRetryStart?: () => void;
|
|
239
|
+
}
|
|
323
240
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
if (!res.ok && retryOptions.retryableStatusCodes?.includes(res.status)) {
|
|
335
|
-
const errorText = await res.text();
|
|
336
|
-
throw new Error(`API error: ${res.status} - ${errorText}`);
|
|
337
|
-
}
|
|
241
|
+
/**
|
|
242
|
+
* Internal result from a single stream attempt
|
|
243
|
+
*/
|
|
244
|
+
interface StreamAttemptResult {
|
|
245
|
+
message: APIResponse | null;
|
|
246
|
+
content: ContentBlock[];
|
|
247
|
+
usage: UsageMetrics;
|
|
248
|
+
thinkingTokens: number;
|
|
249
|
+
ttftMs: number;
|
|
250
|
+
}
|
|
338
251
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
252
|
+
/**
|
|
253
|
+
* Execute a single streaming API attempt
|
|
254
|
+
* Emits callbacks in real-time for streaming display
|
|
255
|
+
*/
|
|
256
|
+
async function executeStreamAttempt(
|
|
257
|
+
request: APIRequest,
|
|
258
|
+
headers: Record<string, string>,
|
|
259
|
+
apiEndpoint: string,
|
|
260
|
+
signal: AbortSignal | undefined,
|
|
261
|
+
model: string,
|
|
262
|
+
retryableStatusCodes: number[],
|
|
263
|
+
startTime: number,
|
|
264
|
+
callbacks: StreamCallbacks
|
|
265
|
+
): Promise<StreamAttemptResult> {
|
|
266
|
+
const response = await fetch(apiEndpoint, {
|
|
267
|
+
method: "POST",
|
|
268
|
+
headers,
|
|
269
|
+
body: JSON.stringify(request),
|
|
270
|
+
signal,
|
|
271
|
+
});
|
|
343
272
|
|
|
273
|
+
// Throw for retryable status codes so withRetry can handle them
|
|
344
274
|
if (!response.ok) {
|
|
345
|
-
const
|
|
346
|
-
|
|
275
|
+
const errorText = await response.text();
|
|
276
|
+
if (retryableStatusCodes.includes(response.status)) {
|
|
277
|
+
throw new Error(`API error: ${response.status} - ${errorText}`);
|
|
278
|
+
}
|
|
279
|
+
throw new Error(`API error: ${response.status} - ${errorText}`);
|
|
347
280
|
}
|
|
348
281
|
|
|
349
282
|
if (!response.body) {
|
|
@@ -363,7 +296,9 @@ export async function createMessageStream(
|
|
|
363
296
|
let currentToolUseBlock: ToolUseBlock | null = null;
|
|
364
297
|
let toolUseInput = "";
|
|
365
298
|
|
|
366
|
-
|
|
299
|
+
let ttft = 0;
|
|
300
|
+
let firstToken = true;
|
|
301
|
+
let totalThinkingTokens = 0;
|
|
367
302
|
|
|
368
303
|
try {
|
|
369
304
|
let buffer = "";
|
|
@@ -440,7 +375,7 @@ export async function createMessageStream(
|
|
|
440
375
|
if (delta.type === "text_delta" && currentTextBlock) {
|
|
441
376
|
const text = delta.text as string;
|
|
442
377
|
currentTextBlock.text += text;
|
|
443
|
-
onToken?.(text);
|
|
378
|
+
callbacks.onToken?.(text); // Emit in real-time
|
|
444
379
|
|
|
445
380
|
if (firstToken) {
|
|
446
381
|
ttft = Date.now() - startTime;
|
|
@@ -449,14 +384,13 @@ export async function createMessageStream(
|
|
|
449
384
|
} else if (delta.type === "thinking_delta" && currentThinkingBlock) {
|
|
450
385
|
const thinking = delta.thinking as string;
|
|
451
386
|
currentThinkingBlock.thinking += thinking;
|
|
452
|
-
onThinking?.(thinking);
|
|
453
|
-
totalThinkingTokens += Math.ceil(thinking.length / 4);
|
|
387
|
+
callbacks.onThinking?.(thinking); // Emit in real-time
|
|
388
|
+
totalThinkingTokens += Math.ceil(thinking.length / 4);
|
|
454
389
|
} else if (delta.type === "redacted_thinking_delta" && currentRedactedThinkingBlock) {
|
|
455
|
-
// Handle redacted thinking deltas
|
|
456
390
|
const redactedData = delta.data as string;
|
|
457
391
|
currentRedactedThinkingBlock.data += redactedData;
|
|
458
|
-
onRedactedThinking?.(redactedData);
|
|
459
|
-
totalThinkingTokens += Math.ceil(redactedData.length / 4);
|
|
392
|
+
callbacks.onRedactedThinking?.(redactedData); // Emit in real-time
|
|
393
|
+
totalThinkingTokens += Math.ceil(redactedData.length / 4);
|
|
460
394
|
} else if (delta.type === "input_json_delta" && currentToolUseBlock) {
|
|
461
395
|
toolUseInput += delta.partial_json as string;
|
|
462
396
|
}
|
|
@@ -464,8 +398,6 @@ export async function createMessageStream(
|
|
|
464
398
|
}
|
|
465
399
|
|
|
466
400
|
case "content_block_stop": {
|
|
467
|
-
// content_block_stop event has { index: number }, not the block itself
|
|
468
|
-
// We need to check which current block is active and push it
|
|
469
401
|
if (currentTextBlock !== null) {
|
|
470
402
|
currentContent.push(currentTextBlock);
|
|
471
403
|
currentTextBlock = null;
|
|
@@ -474,7 +406,6 @@ export async function createMessageStream(
|
|
|
474
406
|
currentThinkingBlock = null;
|
|
475
407
|
} else if (currentRedactedThinkingBlock !== null) {
|
|
476
408
|
currentContent.push(currentRedactedThinkingBlock);
|
|
477
|
-
onRedactedThinking?.(currentRedactedThinkingBlock.data);
|
|
478
409
|
currentRedactedThinkingBlock = null;
|
|
479
410
|
} else if (currentToolUseBlock !== null) {
|
|
480
411
|
try {
|
|
@@ -483,11 +414,11 @@ export async function createMessageStream(
|
|
|
483
414
|
currentToolUseBlock.input = {};
|
|
484
415
|
}
|
|
485
416
|
currentContent.push(currentToolUseBlock);
|
|
486
|
-
onToolUse?.({
|
|
417
|
+
callbacks.onToolUse?.({
|
|
487
418
|
id: currentToolUseBlock.id,
|
|
488
419
|
name: currentToolUseBlock.name,
|
|
489
420
|
input: currentToolUseBlock.input,
|
|
490
|
-
});
|
|
421
|
+
}); // Emit in real-time
|
|
491
422
|
currentToolUseBlock = null;
|
|
492
423
|
toolUseInput = "";
|
|
493
424
|
}
|
|
@@ -506,15 +437,27 @@ export async function createMessageStream(
|
|
|
506
437
|
}
|
|
507
438
|
|
|
508
439
|
case "message_stop":
|
|
509
|
-
// Message complete
|
|
510
440
|
break;
|
|
511
441
|
|
|
512
442
|
// OpenAI/Z.AI compatible format (for GLM-5, etc.)
|
|
513
|
-
// OpenAI streaming sends chunks with choices array
|
|
514
443
|
default: {
|
|
515
|
-
// Check for OpenAI format: { choices: [{ delta: { content: "..." } }], usage: {...} }
|
|
516
444
|
if (event.choices && Array.isArray(event.choices)) {
|
|
517
|
-
const choice = event.choices[0] as {
|
|
445
|
+
const choice = event.choices[0] as {
|
|
446
|
+
delta?: {
|
|
447
|
+
content?: string;
|
|
448
|
+
tool_calls?: Array<{
|
|
449
|
+
id?: string;
|
|
450
|
+
index?: number;
|
|
451
|
+
function?: {
|
|
452
|
+
name?: string;
|
|
453
|
+
arguments?: string;
|
|
454
|
+
};
|
|
455
|
+
}>;
|
|
456
|
+
};
|
|
457
|
+
finish_reason?: string;
|
|
458
|
+
} | undefined;
|
|
459
|
+
|
|
460
|
+
// Handle text content
|
|
518
461
|
if (choice?.delta?.content) {
|
|
519
462
|
const text = choice.delta.content;
|
|
520
463
|
if (currentTextBlock) {
|
|
@@ -522,18 +465,93 @@ export async function createMessageStream(
|
|
|
522
465
|
} else {
|
|
523
466
|
currentTextBlock = { type: "text", text };
|
|
524
467
|
}
|
|
525
|
-
onToken?.(text);
|
|
468
|
+
callbacks.onToken?.(text); // Emit in real-time
|
|
526
469
|
if (firstToken) {
|
|
527
470
|
ttft = Date.now() - startTime;
|
|
528
471
|
firstToken = false;
|
|
529
472
|
}
|
|
530
473
|
}
|
|
531
|
-
|
|
474
|
+
|
|
475
|
+
// Handle tool calls (OpenAI format)
|
|
476
|
+
if (choice?.delta?.tool_calls && Array.isArray(choice.delta.tool_calls)) {
|
|
477
|
+
for (const toolCallDelta of choice.delta.tool_calls) {
|
|
478
|
+
const index = toolCallDelta.index ?? 0;
|
|
479
|
+
const toolCallId = toolCallDelta.id;
|
|
480
|
+
|
|
481
|
+
// Start a new tool call if we got an ID
|
|
482
|
+
if (toolCallId) {
|
|
483
|
+
// Finalize any existing tool use block at this index
|
|
484
|
+
if (currentToolUseBlock) {
|
|
485
|
+
try {
|
|
486
|
+
currentToolUseBlock.input = JSON.parse(toolUseInput);
|
|
487
|
+
} catch {
|
|
488
|
+
currentToolUseBlock.input = {};
|
|
489
|
+
}
|
|
490
|
+
currentContent.push(currentToolUseBlock);
|
|
491
|
+
callbacks.onToolUse?.({
|
|
492
|
+
id: currentToolUseBlock.id,
|
|
493
|
+
name: currentToolUseBlock.name,
|
|
494
|
+
input: currentToolUseBlock.input,
|
|
495
|
+
});
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// Start new tool use block
|
|
499
|
+
currentToolUseBlock = {
|
|
500
|
+
type: "tool_use",
|
|
501
|
+
id: toolCallId,
|
|
502
|
+
name: toolCallDelta.function?.name || "",
|
|
503
|
+
input: {},
|
|
504
|
+
};
|
|
505
|
+
toolUseInput = "";
|
|
506
|
+
|
|
507
|
+
if (firstToken) {
|
|
508
|
+
ttft = Date.now() - startTime;
|
|
509
|
+
firstToken = false;
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// Accumulate arguments for current tool call
|
|
514
|
+
if (toolCallDelta.function?.arguments && currentToolUseBlock) {
|
|
515
|
+
toolUseInput += toolCallDelta.function.arguments;
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// Handle finish reason
|
|
532
521
|
if (choice?.finish_reason) {
|
|
522
|
+
// Finalize any pending text block
|
|
533
523
|
if (currentTextBlock) {
|
|
534
524
|
currentContent.push(currentTextBlock);
|
|
535
525
|
currentTextBlock = null;
|
|
536
526
|
}
|
|
527
|
+
|
|
528
|
+
// Finalize any pending tool use block
|
|
529
|
+
if (currentToolUseBlock) {
|
|
530
|
+
try {
|
|
531
|
+
currentToolUseBlock.input = JSON.parse(toolUseInput);
|
|
532
|
+
} catch {
|
|
533
|
+
currentToolUseBlock.input = {};
|
|
534
|
+
}
|
|
535
|
+
currentContent.push(currentToolUseBlock);
|
|
536
|
+
callbacks.onToolUse?.({
|
|
537
|
+
id: currentToolUseBlock.id,
|
|
538
|
+
name: currentToolUseBlock.name,
|
|
539
|
+
input: currentToolUseBlock.input,
|
|
540
|
+
});
|
|
541
|
+
currentToolUseBlock = null;
|
|
542
|
+
toolUseInput = "";
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
// Map finish reasons
|
|
546
|
+
let stopReason: StopReason = "end_turn";
|
|
547
|
+
if (choice.finish_reason === "tool_calls" || choice.finish_reason === "function_call") {
|
|
548
|
+
stopReason = "tool_use";
|
|
549
|
+
} else if (choice.finish_reason === "length") {
|
|
550
|
+
stopReason = "max_tokens";
|
|
551
|
+
} else if (choice.finish_reason === "stop") {
|
|
552
|
+
stopReason = "end_turn";
|
|
553
|
+
}
|
|
554
|
+
|
|
537
555
|
if (!message) {
|
|
538
556
|
message = {
|
|
539
557
|
id: `msg-${Date.now()}`,
|
|
@@ -541,16 +559,15 @@ export async function createMessageStream(
|
|
|
541
559
|
role: "assistant",
|
|
542
560
|
content: currentContent,
|
|
543
561
|
model: model,
|
|
544
|
-
stop_reason:
|
|
562
|
+
stop_reason: stopReason,
|
|
545
563
|
stop_sequence: null,
|
|
546
564
|
usage: { input_tokens: 0, output_tokens: 0 },
|
|
547
565
|
};
|
|
548
566
|
} else {
|
|
549
|
-
message.stop_reason =
|
|
567
|
+
message.stop_reason = stopReason;
|
|
550
568
|
}
|
|
551
569
|
}
|
|
552
570
|
}
|
|
553
|
-
// OpenAI usage format (often in final chunk)
|
|
554
571
|
if (event.usage) {
|
|
555
572
|
const openaiUsage = event.usage as { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
|
|
556
573
|
usage.input_tokens = openaiUsage.prompt_tokens || 0;
|
|
@@ -560,11 +577,13 @@ export async function createMessageStream(
|
|
|
560
577
|
}
|
|
561
578
|
}
|
|
562
579
|
} catch (err: unknown) {
|
|
563
|
-
//
|
|
580
|
+
// Only rethrow if it's an API error, not a JSON parse error
|
|
581
|
+
if (err instanceof Error && err.message.startsWith("API error:")) {
|
|
582
|
+
throw err;
|
|
583
|
+
}
|
|
564
584
|
if (process.env.DEBUG_API === '1') {
|
|
565
585
|
console.error('\x1b[91m[DEBUG] JSON parse error:', err);
|
|
566
586
|
console.error('\x1b[91m[DEBUG] Error parsing SSE data:', data.substring(0, 200));
|
|
567
|
-
console.error('\x1b[91m[DEBUG] Original buffer:', buffer.substring(0, 500));
|
|
568
587
|
}
|
|
569
588
|
}
|
|
570
589
|
}
|
|
@@ -573,8 +592,8 @@ export async function createMessageStream(
|
|
|
573
592
|
reader.releaseLock();
|
|
574
593
|
}
|
|
575
594
|
|
|
595
|
+
// Handle "No message received" case - this is retryable
|
|
576
596
|
if (!message) {
|
|
577
|
-
// If we received content via OpenAI format but no message_start, create a message
|
|
578
597
|
if (currentContent.length > 0) {
|
|
579
598
|
message = {
|
|
580
599
|
id: `msg-${Date.now()}`,
|
|
@@ -587,31 +606,213 @@ export async function createMessageStream(
|
|
|
587
606
|
usage: { input_tokens: 0, output_tokens: 0 },
|
|
588
607
|
};
|
|
589
608
|
} else {
|
|
590
|
-
//
|
|
591
|
-
if (process.env.DEBUG_API === '1') {
|
|
592
|
-
console.log('\x1b[91m[DEBUG] No message_start event received. Buffer:\x1b[0m', buffer.substring(0, 500));
|
|
593
|
-
}
|
|
609
|
+
// This is a transient error - throw to trigger retry
|
|
594
610
|
throw new Error("No message received from API");
|
|
595
611
|
}
|
|
596
612
|
}
|
|
597
613
|
|
|
598
|
-
|
|
614
|
+
return {
|
|
615
|
+
message,
|
|
616
|
+
content: currentContent,
|
|
617
|
+
usage,
|
|
618
|
+
thinkingTokens: totalThinkingTokens,
|
|
619
|
+
ttftMs: ttft,
|
|
620
|
+
};
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
/**
|
|
624
|
+
* Create a streaming message request to Anthropic API
|
|
625
|
+
* Full retry support including stream parsing errors
|
|
626
|
+
*/
|
|
627
|
+
export async function createMessageStream(
|
|
628
|
+
messages: Message[],
|
|
629
|
+
options: StreamOptions
|
|
630
|
+
): Promise<StreamResult> {
|
|
631
|
+
const {
|
|
632
|
+
apiKey,
|
|
633
|
+
model = "claude-sonnet-4-6",
|
|
634
|
+
maxTokens = 4096,
|
|
635
|
+
tools,
|
|
636
|
+
systemPrompt,
|
|
637
|
+
cacheConfig = DEFAULT_CACHE_CONFIG,
|
|
638
|
+
thinking,
|
|
639
|
+
extendedThinking,
|
|
640
|
+
onToken,
|
|
641
|
+
onThinking,
|
|
642
|
+
onRedactedThinking,
|
|
643
|
+
onToolUse,
|
|
644
|
+
onRetryStart,
|
|
645
|
+
signal,
|
|
646
|
+
} = options;
|
|
647
|
+
|
|
648
|
+
const startTime = Date.now();
|
|
649
|
+
|
|
650
|
+
// Build cached messages
|
|
651
|
+
const cachedMessages = buildCachedMessages(messages, cacheConfig);
|
|
652
|
+
|
|
653
|
+
// Build system prompt with cache control
|
|
654
|
+
const cachedSystemPrompt = buildSystemPrompt(systemPrompt, cacheConfig);
|
|
655
|
+
|
|
656
|
+
// Build request
|
|
657
|
+
const request: APIRequest = {
|
|
658
|
+
model,
|
|
659
|
+
max_tokens: maxTokens,
|
|
660
|
+
messages: cachedMessages.map((m) => ({
|
|
661
|
+
role: m.role,
|
|
662
|
+
content: m.content,
|
|
663
|
+
})),
|
|
664
|
+
stream: true,
|
|
665
|
+
};
|
|
666
|
+
|
|
667
|
+
if (cachedSystemPrompt) {
|
|
668
|
+
request.system = cachedSystemPrompt;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
// Tools will be set after determining API format (for format conversion)
|
|
672
|
+
|
|
673
|
+
// Resolve provider based on model name
|
|
674
|
+
const providerInfo = resolveProvider(model);
|
|
675
|
+
|
|
676
|
+
// Determine API endpoint and headers based on provider
|
|
677
|
+
let apiEndpoint: string;
|
|
678
|
+
let headers: Record<string, string>;
|
|
679
|
+
let apiFormat: "anthropic" | "openai";
|
|
680
|
+
|
|
681
|
+
if (providerInfo) {
|
|
682
|
+
// Use provider-specific configuration
|
|
683
|
+
apiEndpoint = providerInfo.endpoint;
|
|
684
|
+
apiFormat = providerInfo.config.format;
|
|
685
|
+
|
|
686
|
+
if (apiFormat === "anthropic") {
|
|
687
|
+
// Anthropic/MiniMax format
|
|
688
|
+
headers = {
|
|
689
|
+
"Content-Type": "application/json",
|
|
690
|
+
[providerInfo.config.authHeader]: providerInfo.apiKey,
|
|
691
|
+
"anthropic-version": "2023-06-01",
|
|
692
|
+
};
|
|
693
|
+
} else {
|
|
694
|
+
// OpenAI/Zhipu format
|
|
695
|
+
headers = {
|
|
696
|
+
"Content-Type": "application/json",
|
|
697
|
+
[providerInfo.config.authHeader]: `Bearer ${providerInfo.apiKey}`,
|
|
698
|
+
};
|
|
699
|
+
}
|
|
700
|
+
} else {
|
|
701
|
+
// Fallback to environment-based configuration (legacy)
|
|
702
|
+
const baseUrl = process.env.ANTHROPIC_BASE_URL || "https://api.anthropic.com";
|
|
703
|
+
apiEndpoint = `${baseUrl}/v1/messages`;
|
|
704
|
+
apiFormat = "anthropic";
|
|
705
|
+
|
|
706
|
+
headers = {
|
|
707
|
+
"Content-Type": "application/json",
|
|
708
|
+
"x-api-key": apiKey,
|
|
709
|
+
"anthropic-version": "2023-06-01",
|
|
710
|
+
};
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
// Set tools with format conversion if needed
|
|
714
|
+
if (tools && tools.length > 0) {
|
|
715
|
+
if (apiFormat === "openai") {
|
|
716
|
+
// Convert Anthropic-style tools to OpenAI format
|
|
717
|
+
// Cast needed because OpenAI format differs from APITool
|
|
718
|
+
(request as unknown as Record<string, unknown>).tools = convertToolsToOpenAIFormat(tools);
|
|
719
|
+
} else {
|
|
720
|
+
// Keep Anthropic format as-is
|
|
721
|
+
request.tools = tools;
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
const shouldUseExtendedThinking =
|
|
726
|
+
(extendedThinking?.enabled ?? false) ||
|
|
727
|
+
(thinking && thinking.type !== "disabled");
|
|
728
|
+
|
|
729
|
+
if (shouldUseExtendedThinking && supportsExtendedThinking(model)) {
|
|
730
|
+
let budgetTokens: number;
|
|
731
|
+
|
|
732
|
+
if (extendedThinking?.budgetTokens) {
|
|
733
|
+
budgetTokens = extendedThinking.budgetTokens;
|
|
734
|
+
} else if (thinking?.type === "enabled") {
|
|
735
|
+
budgetTokens = thinking.budget_tokens;
|
|
736
|
+
} else {
|
|
737
|
+
const effort = extendedThinking?.effort || "medium";
|
|
738
|
+
budgetTokens = calculateBudgetTokens(
|
|
739
|
+
{ enabled: true, effort, modelMultiplier: model.includes("opus") ? 2 : 1 },
|
|
740
|
+
model
|
|
741
|
+
);
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
budgetTokens = Math.max(1024, Math.min(budgetTokens, 100000));
|
|
745
|
+
|
|
746
|
+
request.thinking = { type: "enabled", budget_tokens: budgetTokens };
|
|
747
|
+
|
|
748
|
+
const betaFeatures: string[] = ["extended-thinking-2025-01-24"];
|
|
749
|
+
if (extendedThinking?.interleaved !== false) {
|
|
750
|
+
betaFeatures.push("interleaved-thinking-2025-01-24");
|
|
751
|
+
}
|
|
752
|
+
headers["anthropic-beta"] = betaFeatures.join(",");
|
|
753
|
+
} else if (apiFormat === "anthropic") {
|
|
754
|
+
headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15";
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
// Retry options - now covers entire stream parsing
|
|
758
|
+
const retryOptions: RetryOptions = {
|
|
759
|
+
maxRetries: 10,
|
|
760
|
+
baseDelayMs: 1000,
|
|
761
|
+
maxDelayMs: 60000,
|
|
762
|
+
retryableStatusCodes: [429, 500, 502, 503, 504, 529],
|
|
763
|
+
onRetry: (attempt, error, delayMs) => {
|
|
764
|
+
console.log(`\x1b[33mAPI retry ${attempt}/10 after ${delayMs}ms: ${error.message}\x1b[0m`);
|
|
765
|
+
// Notify UI to reset streaming state before retry
|
|
766
|
+
onRetryStart?.();
|
|
767
|
+
// Track provider failure on retry
|
|
768
|
+
const providerName = getProviderForModel(model);
|
|
769
|
+
if (providerName) {
|
|
770
|
+
recordProviderFailure(providerName);
|
|
771
|
+
}
|
|
772
|
+
},
|
|
773
|
+
};
|
|
774
|
+
|
|
775
|
+
// Execute with retry - wraps entire fetch + stream parsing
|
|
776
|
+
// Callbacks are emitted in real-time during streaming
|
|
777
|
+
const result = await withRetry(
|
|
778
|
+
() => executeStreamAttempt(
|
|
779
|
+
request,
|
|
780
|
+
headers,
|
|
781
|
+
apiEndpoint,
|
|
782
|
+
signal,
|
|
783
|
+
model,
|
|
784
|
+
retryOptions.retryableStatusCodes ?? [],
|
|
785
|
+
startTime,
|
|
786
|
+
{ onToken, onThinking, onRedactedThinking, onToolUse }
|
|
787
|
+
),
|
|
788
|
+
retryOptions
|
|
789
|
+
);
|
|
790
|
+
|
|
791
|
+
// Build final message
|
|
792
|
+
const message = result.message!;
|
|
793
|
+
message.content = result.content;
|
|
599
794
|
|
|
600
795
|
// Calculate cost and cache metrics
|
|
601
|
-
const { costUSD, estimatedSavingsUSD } = calculateCost(model, usage);
|
|
602
|
-
const cacheMetrics = calculateCacheMetrics(usage);
|
|
796
|
+
const { costUSD, estimatedSavingsUSD } = calculateCost(model, result.usage);
|
|
797
|
+
const cacheMetrics = calculateCacheMetrics(result.usage);
|
|
603
798
|
cacheMetrics.estimatedSavingsUSD = estimatedSavingsUSD;
|
|
604
799
|
|
|
605
800
|
const durationMs = Date.now() - startTime;
|
|
606
801
|
|
|
802
|
+
// Track provider health on success
|
|
803
|
+
const providerName = getProviderForModel(model);
|
|
804
|
+
if (providerName) {
|
|
805
|
+
recordProviderSuccess(providerName, durationMs);
|
|
806
|
+
}
|
|
807
|
+
|
|
607
808
|
return {
|
|
608
809
|
message,
|
|
609
|
-
usage,
|
|
810
|
+
usage: result.usage,
|
|
610
811
|
cacheMetrics,
|
|
611
812
|
costUSD,
|
|
612
813
|
durationMs,
|
|
613
|
-
ttftMs:
|
|
614
|
-
thinkingTokens:
|
|
814
|
+
ttftMs: result.ttftMs || durationMs,
|
|
815
|
+
thinkingTokens: result.thinkingTokens,
|
|
615
816
|
};
|
|
616
817
|
}
|
|
617
818
|
|