skyloom 1.14.8 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/.github/workflows/ci.yml +2 -2
  2. package/.github/workflows/publish.yml +51 -4
  3. package/CONVERSION_PLAN.md +191 -191
  4. package/config/default.yaml +46 -43
  5. package/config/models.yaml +928 -155
  6. package/config/providers.yaml +109 -6
  7. package/dist/agents/snow.d.ts +2 -0
  8. package/dist/agents/snow.d.ts.map +1 -1
  9. package/dist/agents/snow.js +36 -5
  10. package/dist/agents/snow.js.map +1 -1
  11. package/dist/cli/loom_chat.d.ts.map +1 -1
  12. package/dist/cli/loom_chat.js +207 -1
  13. package/dist/cli/loom_chat.js.map +1 -1
  14. package/dist/cli/main.js +190 -40
  15. package/dist/cli/main.js.map +1 -1
  16. package/dist/cli/tui.d.ts.map +1 -1
  17. package/dist/cli/tui.js +6 -31
  18. package/dist/cli/tui.js.map +1 -1
  19. package/dist/core/agent.d.ts +6 -4
  20. package/dist/core/agent.d.ts.map +1 -1
  21. package/dist/core/agent.js +61 -20
  22. package/dist/core/agent.js.map +1 -1
  23. package/dist/core/catalog.d.ts.map +1 -1
  24. package/dist/core/catalog.js +30 -9
  25. package/dist/core/catalog.js.map +1 -1
  26. package/dist/core/commands.d.ts +110 -0
  27. package/dist/core/commands.d.ts.map +1 -0
  28. package/dist/core/commands.js +633 -0
  29. package/dist/core/commands.js.map +1 -0
  30. package/dist/core/concurrency.d.ts +38 -0
  31. package/dist/core/concurrency.d.ts.map +1 -0
  32. package/dist/core/concurrency.js +65 -0
  33. package/dist/core/concurrency.js.map +1 -0
  34. package/dist/core/factory.js +16 -16
  35. package/dist/core/file_checkpoint.d.ts +9 -0
  36. package/dist/core/file_checkpoint.d.ts.map +1 -1
  37. package/dist/core/file_checkpoint.js +33 -1
  38. package/dist/core/file_checkpoint.js.map +1 -1
  39. package/dist/core/llm.d.ts.map +1 -1
  40. package/dist/core/llm.js +66 -13
  41. package/dist/core/llm.js.map +1 -1
  42. package/dist/core/memory.js +51 -51
  43. package/dist/core/schemas.d.ts +16 -0
  44. package/dist/core/schemas.d.ts.map +1 -1
  45. package/dist/core/schemas.js +32 -0
  46. package/dist/core/schemas.js.map +1 -1
  47. package/dist/core/security.d.ts.map +1 -1
  48. package/dist/core/security.js +27 -0
  49. package/dist/core/security.js.map +1 -1
  50. package/dist/core/skymd.js +14 -14
  51. package/dist/core/trace.d.ts +105 -0
  52. package/dist/core/trace.d.ts.map +1 -0
  53. package/dist/core/trace.js +213 -0
  54. package/dist/core/trace.js.map +1 -0
  55. package/dist/tools/builtin.d.ts +2 -6
  56. package/dist/tools/builtin.d.ts.map +1 -1
  57. package/dist/tools/builtin.js +18 -111
  58. package/dist/tools/builtin.js.map +1 -1
  59. package/dist/tools/extra.d.ts +13 -0
  60. package/dist/tools/extra.d.ts.map +1 -0
  61. package/dist/tools/extra.js +827 -0
  62. package/dist/tools/extra.js.map +1 -0
  63. package/dist/tools/guards.d.ts +12 -0
  64. package/dist/tools/guards.d.ts.map +1 -0
  65. package/dist/tools/guards.js +143 -0
  66. package/dist/tools/guards.js.map +1 -0
  67. package/dist/tools/model_tool.d.ts.map +1 -1
  68. package/dist/tools/model_tool.js +24 -4
  69. package/dist/tools/model_tool.js.map +1 -1
  70. package/dist/web/markdown.d.ts +32 -0
  71. package/dist/web/markdown.d.ts.map +1 -0
  72. package/dist/web/markdown.js +202 -0
  73. package/dist/web/markdown.js.map +1 -0
  74. package/dist/web/server.d.ts +4 -0
  75. package/dist/web/server.d.ts.map +1 -1
  76. package/dist/web/server.js +14 -582
  77. package/dist/web/server.js.map +1 -1
  78. package/dist/web/ui.d.ts +31 -0
  79. package/dist/web/ui.d.ts.map +1 -0
  80. package/dist/web/ui.js +1009 -0
  81. package/dist/web/ui.js.map +1 -0
  82. package/docs/AESTHETIC_DESIGN.md +152 -152
  83. package/docs/OPTIMIZATION_PLAN.md +178 -178
  84. package/package.json +1 -1
  85. package/src/agents/snow.ts +38 -5
  86. package/src/cli/commands_md.ts +112 -112
  87. package/src/cli/input_macros.ts +83 -83
  88. package/src/cli/loom.ts +1041 -1041
  89. package/src/cli/loom_chat.ts +772 -603
  90. package/src/cli/main.ts +853 -723
  91. package/src/cli/tui.ts +264 -289
  92. package/src/core/agent/guard.ts +133 -133
  93. package/src/core/agent/task.ts +100 -100
  94. package/src/core/agent.ts +1630 -1590
  95. package/src/core/agent_helpers.ts +500 -500
  96. package/src/core/bus.ts +221 -221
  97. package/src/core/cache.ts +153 -153
  98. package/src/core/catalog.ts +199 -178
  99. package/src/core/circuit_breaker.ts +119 -119
  100. package/src/core/commands.ts +704 -0
  101. package/src/core/concurrency.ts +73 -0
  102. package/src/core/config.ts +365 -365
  103. package/src/core/constants.ts +95 -95
  104. package/src/core/factory.ts +656 -656
  105. package/src/core/file_checkpoint.ts +163 -136
  106. package/src/core/hooks.ts +126 -126
  107. package/src/core/llm.ts +972 -915
  108. package/src/core/logger.ts +143 -143
  109. package/src/core/mcp.ts +1001 -1001
  110. package/src/core/memory.ts +1201 -1201
  111. package/src/core/middleware.ts +350 -350
  112. package/src/core/model_config.ts +159 -159
  113. package/src/core/pipelines.ts +424 -424
  114. package/src/core/schemas.ts +319 -282
  115. package/src/core/security.ts +27 -0
  116. package/src/core/semantic.ts +211 -211
  117. package/src/core/skill.ts +384 -384
  118. package/src/core/skymd.ts +143 -143
  119. package/src/core/theme.ts +65 -65
  120. package/src/core/tool.ts +457 -457
  121. package/src/core/trace.ts +236 -0
  122. package/src/core/verify.ts +71 -71
  123. package/src/plugins/loader.ts +91 -91
  124. package/src/skills/loader.ts +75 -75
  125. package/src/tools/builtin.ts +571 -642
  126. package/src/tools/computer.ts +279 -279
  127. package/src/tools/extra.ts +662 -0
  128. package/src/tools/guards.ts +82 -0
  129. package/src/tools/model_tool.ts +93 -74
  130. package/src/tools/todo.ts +76 -76
  131. package/src/web/markdown.ts +193 -0
  132. package/src/web/server.ts +117 -693
  133. package/src/web/ui.ts +949 -0
  134. package/tests/agent.test.ts +211 -159
  135. package/tests/agent_helpers.test.ts +48 -48
  136. package/tests/catalog.test.ts +86 -86
  137. package/tests/checkpoint_commands.test.ts +124 -124
  138. package/tests/claude_compat.test.ts +110 -110
  139. package/tests/commands.test.ts +103 -0
  140. package/tests/concurrency.test.ts +102 -0
  141. package/tests/config.test.ts +41 -41
  142. package/tests/extra_tools.test.ts +212 -0
  143. package/tests/fence_plugin.test.ts +52 -52
  144. package/tests/guard.test.ts +75 -75
  145. package/tests/loom.test.ts +337 -337
  146. package/tests/memory.test.ts +170 -170
  147. package/tests/model_config.test.ts +109 -109
  148. package/tests/skymd.test.ts +146 -146
  149. package/tests/ssrf.test.ts +38 -38
  150. package/tests/structured_retry.test.ts +87 -0
  151. package/tests/task.test.ts +60 -60
  152. package/tests/todo_toolstats.test.ts +94 -94
  153. package/tests/trace.test.ts +128 -0
  154. package/tests/tui.test.ts +67 -67
  155. package/tests/web.test.ts +169 -0
  156. package/tsconfig.json +38 -38
package/src/core/llm.ts CHANGED
@@ -1,915 +1,972 @@
1
- /**
2
- * LLM abstraction layer with LiteLLM-compatible routing, retry, fallback, cost tracking, and budget control.
3
- *
4
- * Provides unified interface for multiple LLM providers (OpenAI, Anthropic, DeepSeek, etc.)
5
- * with automatic fallback chains, prompt caching for Anthropic, and cost estimation.
6
- */
7
-
8
- import type { Logger } from "./logger";
9
- import { LLMCache } from "./cache";
10
- import type { ToolRegistry } from "./tool";
11
-
12
- /**
13
- * LLM response from completion.
14
- */
15
- export interface LLMResponse {
16
- content: string;
17
- toolCalls: ToolCall[];
18
- model: string;
19
- usage: UsageStats;
20
- cost: number;
21
- reasoningContent?: string;
22
- // True when LLM loop ran out of iterations before producing a tool-call-free answer
23
- truncated: boolean;
24
- }
25
-
26
- /**
27
- * Tool call extracted from LLM response.
28
- */
29
- export interface ToolCall {
30
- id: string;
31
- type: string;
32
- function: {
33
- name: string;
34
- arguments: string;
35
- };
36
- }
37
-
38
- /**
39
- * Token usage statistics.
40
- */
41
- export interface UsageStats {
42
- promptTokens: number;
43
- completionTokens: number;
44
- }
45
-
46
- /**
47
- * Streaming event from LLM.
48
- */
49
- export interface StreamEvent {
50
- type: "content" | "tool_call" | "done" | "error" | "reasoning";
51
- text?: string;
52
- toolCall?: ToolCall;
53
- usage?: UsageStats;
54
- reasoningContent?: string;
55
- }
56
-
57
- /**
58
- * Split model string into provider and model name (e.g., "anthropic/claude-3-opus" → ["anthropic", "claude-3-opus"]).
59
- */
60
- function splitProvider(model: string): [string | null, string] {
61
- if (!model.includes("/")) {
62
- return [null, model];
63
- }
64
- const [head, ...rest] = model.split("/");
65
- const provider = head.toLowerCase();
66
- const knownProviders = getKnownProviders();
67
- if (knownProviders.has(provider)) {
68
- return [provider, rest.join("/")];
69
- }
70
- return [null, model];
71
- }
72
-
73
- /**
74
- * Get set of known provider ID prefixes.
75
- */
76
- function getKnownProviders(): Set<string> {
77
- return new Set([
78
- "openai",
79
- "azure",
80
- "anthropic",
81
- "deepseek",
82
- "ollama",
83
- "groq",
84
- "mistral",
85
- "cohere",
86
- "together_ai",
87
- "openrouter",
88
- "gemini",
89
- "vertex_ai",
90
- ]);
91
- }
92
-
93
- /**
94
- * Get provider-to-env-var mapping.
95
- */
96
- function getProviderEnvMap(): Map<string, string> {
97
- const envMap = new Map([
98
- ["openai", "OPENAI_API_KEY"],
99
- ["anthropic", "ANTHROPIC_API_KEY"],
100
- ["deepseek", "DEEPSEEK_API_KEY"],
101
- ["groq", "GROQ_API_KEY"],
102
- ["mistral", "MISTRAL_API_KEY"],
103
- ["cohere", "COHERE_API_KEY"],
104
- ["openrouter", "OPENROUTER_API_KEY"],
105
- ["gemini", "GEMINI_API_KEY"],
106
- ]);
107
- return envMap;
108
- }
109
-
110
- /**
111
- * Check if model targets Anthropic's API.
112
- */
113
- function isAnthropicModel(model: string): boolean {
114
- const lowered = model.toLowerCase();
115
- if (lowered.startsWith("anthropic/") || lowered.startsWith("claude")) {
116
- return true;
117
- }
118
- const [provider] = splitProvider(model);
119
- return provider === "anthropic";
120
- }
121
-
122
- /**
123
- * Check if model targets DeepSeek's API.
124
- */
125
- function isDeepseekModel(model: string): boolean {
126
- const [provider, stripped] = splitProvider(model);
127
- const lowered = model.toLowerCase();
128
- return (
129
- provider === "deepseek" ||
130
- lowered.startsWith("deepseek") ||
131
- stripped.startsWith("deepseek")
132
- );
133
- }
134
-
135
- /**
136
- * Check if DeepSeek model supports tool calls.
137
- * Reasoning models are not reliable function-call models.
138
- */
139
- function deepseekSupportsTools(model: string): boolean {
140
- const lowered = model.toLowerCase();
141
- return !["reasoner", "-r1", "/r1"].some((part) => lowered.includes(part));
142
- }
143
-
144
- /**
145
- * Filter models by tool compatibility.
146
- */
147
- function toolCompatibleModels(
148
- primary: string,
149
- models: string[],
150
- needsTools: boolean
151
- ): string[] {
152
- if (!needsTools) {
153
- return models;
154
- }
155
-
156
- const compatible = models.filter(
157
- (m) => !isDeepseekModel(m) || deepseekSupportsTools(m)
158
- );
159
-
160
- if (compatible.length > 0) {
161
- return compatible;
162
- }
163
-
164
- if (isDeepseekModel(primary)) {
165
- return ["deepseek/deepseek-chat"];
166
- }
167
-
168
- return models;
169
- }
170
-
171
- /**
172
- * Apply Anthropic ephemeral cache markers to messages and tools.
173
- *
174
- * Anthropic charges full input tokens for repeated identical prefixes.
175
- * Adding `cache_control: {"type": "ephemeral"}` to system prompt and tools
176
- * enables 5-minute KV cache, reducing input cost ~80% on subsequent turns.
177
- */
178
- function _applyAnthropicCacheControl(
179
- model: string,
180
- messages: Record<string, unknown>[],
181
- toolSchemas: Record<string, unknown>[] | null
182
- ): [Record<string, unknown>[], Record<string, unknown>[] | null] {
183
- if (!isAnthropicModel(model)) {
184
- return [messages, toolSchemas];
185
- }
186
-
187
- // Process messages to add cache_control to system message
188
- const newMessages: Record<string, unknown>[] = [];
189
- let cachedSystem = false;
190
-
191
- for (const msg of messages) {
192
- if (
193
- !cachedSystem &&
194
- msg.role === "system" &&
195
- typeof msg.content === "string"
196
- ) {
197
- const content = msg.content as string;
198
- if (content) {
199
- newMessages.push({
200
- role: "system",
201
- content: [
202
- {
203
- type: "text",
204
- text: content,
205
- cache_control: { type: "ephemeral" },
206
- },
207
- ],
208
- });
209
- cachedSystem = true;
210
- continue;
211
- }
212
- }
213
-
214
- if (
215
- !cachedSystem &&
216
- msg.role === "system" &&
217
- Array.isArray(msg.content)
218
- ) {
219
- const content = msg.content as Record<string, unknown>[];
220
- if (content.length > 0) {
221
- const newBlocks = content.map((block) => ({ ...block }));
222
- const lastBlock = newBlocks[newBlocks.length - 1];
223
- newBlocks[newBlocks.length - 1] = {
224
- ...lastBlock,
225
- cache_control: { type: "ephemeral" },
226
- };
227
- newMessages.push({
228
- ...msg,
229
- content: newBlocks,
230
- });
231
- cachedSystem = true;
232
- continue;
233
- }
234
- }
235
-
236
- newMessages.push(msg);
237
- }
238
-
239
- // Add cache_control to tool schemas
240
- let newTools: Record<string, unknown>[] | null = null;
241
- if (toolSchemas && toolSchemas.length > 0) {
242
- newTools = toolSchemas.map((t) => ({ ...t }));
243
- const lastTool = newTools[newTools.length - 1];
244
- newTools[newTools.length - 1] = {
245
- ...lastTool,
246
- cache_control: { type: "ephemeral" },
247
- };
248
- }
249
-
250
- return [newMessages, newTools];
251
- }
252
-
253
- /**
254
- * Estimate token count for mixed CJK/English text.
255
- * CJK characters ~2 tokens each, non-CJK ~4 chars per token.
256
- */
257
- function _estimateTokens(text: string): number {
258
- // Count CJK characters (simplified check)
259
- const cjkRegex = /[\u4E00-\u9FFF\u3040-\u309F\uAC00-\uD7AF]/g;
260
- const cjkCount = (text.match(cjkRegex) || []).length;
261
- const otherCount = text.length - cjkCount;
262
- return Math.max(1, cjkCount * 2 + Math.floor(otherCount / 4));
263
- }
264
-
265
- /**
266
- * Cost per 1K tokens (input / output) — USD.
267
- */
268
- const MODEL_COST_ESTIMATES: Map<string, [number, number]> = new Map([
269
- ["gpt-4o", [0.0025, 0.01]],
270
- ["gpt-4o-mini", [0.00015, 0.0006]],
271
- ["gpt-4.1", [0.002, 0.008]],
272
- ["gpt-4.1-mini", [0.0004, 0.0016]],
273
- ["gpt-4.1-nano", [0.0001, 0.0004]],
274
- ["o3", [0.01, 0.04]],
275
- ["o4-mini", [0.0011, 0.0044]],
276
- ["claude-sonnet-4-6", [0.003, 0.015]],
277
- ["claude-opus-4-7", [0.005, 0.025]],
278
- ["claude-haiku-4-5", [0.0008, 0.004]],
279
- ["deepseek-chat", [0.00027, 0.0011]],
280
- ["deepseek-reasoner", [0.00055, 0.00219]],
281
- ["deepseek-v4-flash", [0.00014, 0.00028]],
282
- ["deepseek-v4-pro", [0.00174, 0.00348]],
283
- ["deepseek/deepseek-chat", [0.00027, 0.0011]],
284
- ["deepseek/deepseek-reasoner", [0.00055, 0.00219]],
285
- ["deepseek/deepseek-v4-flash", [0.00014, 0.00028]],
286
- ["deepseek/deepseek-v4-pro", [0.00174, 0.00348]],
287
- ["gemini/gemini-2.5-flash", [0.0003, 0.0025]],
288
- ["gemini/gemini-2.5-pro", [0.00125, 0.01]],
289
- ["ollama/llama3", [0.0, 0.0]],
290
- ["ollama/qwen2.5", [0.0, 0.0]],
291
- ]);
292
-
293
- /**
294
- * Fallback chains for model availability.
295
- */
296
- const FALLBACK_CHAINS: Map<string, string[]> = new Map([
297
- ["gpt-4o", ["gpt-4o-mini"]],
298
- ["gpt-4o-mini", ["gpt-4o"]],
299
- ["gpt-4.1", ["gpt-4.1-mini", "gpt-4o-mini"]],
300
- ["gpt-4.1-mini", ["gpt-4o-mini"]],
301
- ["gpt-4.1-nano", ["gpt-4.1-mini"]],
302
- ["o3", ["o4-mini", "gpt-4.1"]],
303
- ["o4-mini", ["gpt-4.1-mini"]],
304
- ["claude-sonnet-4-6", ["claude-haiku-4-5", "gpt-4.1-mini"]],
305
- ["claude-opus-4-7", ["claude-sonnet-4-6", "gpt-4.1"]],
306
- ["claude-haiku-4-5", ["gpt-4.1-mini"]],
307
- ["deepseek-chat", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
308
- ["deepseek-reasoner", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
309
- ["deepseek-v4-flash", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
310
- ["deepseek-v4-pro", ["deepseek-v4-flash", "deepseek/deepseek-chat", "gpt-4.1-mini"]],
311
- ["deepseek/deepseek-chat", ["gpt-4.1-mini"]],
312
- ["deepseek/deepseek-reasoner", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
313
- ["deepseek/deepseek-v4-flash", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
314
- ["deepseek/deepseek-v4-pro", [
315
- "deepseek/deepseek-v4-flash",
316
- "deepseek/deepseek-chat",
317
- "gpt-4.1-mini",
318
- ]],
319
- ["gemini/gemini-2.5-flash", ["gemini/gemini-2.5-pro", "gpt-4.1-mini"]],
320
- ["gemini/gemini-2.5-pro", ["gpt-4.1"]],
321
- ]);
322
-
323
- /**
324
- * HTTP status codes that are considered transient errors (worth retrying).
325
- */
326
- const RETRYABLE_STATUSES = new Set([408, 425, 429, 500, 502, 503, 504]);
327
-
328
- /**
329
- * Check if an exception is worth retrying.
330
- */
331
- function isTransientError(err: unknown): boolean {
332
- if (err instanceof Error) {
333
- const status =
334
- (err as any).status_code || (err as any).http_status || 0;
335
- if (status && RETRYABLE_STATUSES.has(status)) {
336
- return true;
337
- }
338
-
339
- if (err.name === "TimeoutError") {
340
- return true;
341
- }
342
-
343
- const errName = err.constructor.name.toLowerCase();
344
- return [
345
- "ratelimiterror",
346
- "apitimeouterror",
347
- "apiconnectionerror",
348
- "serviceunavailableerror",
349
- "internalservererror",
350
- "timeout",
351
- ].includes(errName);
352
- }
353
-
354
- return false;
355
- }
356
-
357
- /**
358
- * Estimate cost for LLM API call.
359
- */
360
- export function estimateCost(
361
- model: string,
362
- promptTokens: number,
363
- completionTokens: number
364
- ): number {
365
- const costs = MODEL_COST_ESTIMATES.get(model) || [0.001, 0.002];
366
- return (
367
- (promptTokens / 1000) * costs[0] + (completionTokens / 1000) * costs[1]
368
- );
369
- }
370
-
371
- /**
372
- * Format user-facing error message for LLM failures.
373
- */
374
- function formatUserFacingError(model: string, err: unknown): string {
375
- const text = err instanceof Error ? err.message : String(err);
376
- const lowered = text.toLowerCase();
377
- const [provider] = splitProvider(model);
378
-
379
- // Missing API key
380
- if (
381
- lowered.includes("api_key") ||
382
- lowered.includes("authentication") ||
383
- lowered.includes("unauthorized")
384
- ) {
385
- const envMap = getProviderEnvMap();
386
- const envVar = envMap.get(provider || "") || "the appropriate *_API_KEY";
387
- const configured = Array.from(envMap.entries())
388
- .filter(([, e]) => process.env[e])
389
- .map(([p]) => p)
390
- .join(", ");
391
- const hint = configured
392
- ? `已配置: ${configured}。`
393
- : "未配置任何 API key。";
394
- return (
395
- `❌ ${model} 调用失败:缺少或无效的 API key。\n` +
396
- `请确认 \`${envVar}\` 已设置,或运行 \`sky init\` 重新配置。${hint}`
397
- );
398
- }
399
-
400
- if (lowered.includes("rate limit") || text.includes("429")) {
401
- return `❌ ${model} 速率受限,请稍后重试。`;
402
- }
403
-
404
- if (lowered.includes("timeout")) {
405
- return `❌ ${model} 请求超时,请稍后重试或调高超时时间。`;
406
- }
407
-
408
- if (
409
- lowered.includes("model") &&
410
- (lowered.includes("not found") || lowered.includes("does not exist"))
411
- ) {
412
- return (
413
- `❌ ${model} 不是该 provider 的有效模型 ID。\n` +
414
- `请运行配置检查或 \`sky init\` 重新选择。`
415
- );
416
- }
417
-
418
- // Content filtering / safety
419
- if (
420
- [
421
- "content exists risk",
422
- "content_policy",
423
- "content_filter",
424
- "content_filtered",
425
- "safety",
426
- "blocked by safety",
427
- "responsibleaipolicyviolation",
428
- "policy_violation",
429
- ].some((kw) => lowered.includes(kw))
430
- ) {
431
- const short = text.split("\n")[0].slice(0, 200);
432
- return (
433
- `❌ ${model} 拒绝该请求 (内容审核):${short}\n` +
434
- `原因:provider 的内容安全过滤判定此次提问/上下文敏感。\n` +
435
- `建议:\n` +
436
- ` - 换一个 provider(如 OpenAI / Anthropic)\n` +
437
- ` - 把敏感关键词改写得更通用后重发`
438
- );
439
- }
440
-
441
- // Bad request / malformed sequence
442
- if (
443
- [
444
- "bad request",
445
- "invalid_request",
446
- "tool_calls",
447
- "tool messages",
448
- ].some((kw) => lowered.includes(kw)) ||
449
- err instanceof Error && err.constructor.name.toLowerCase().includes("badrequest")
450
- ) {
451
- const short = text.split("\n")[0].slice(0, 200);
452
- return (
453
- `❌ ${model} 调用失败 (Bad Request):${short}\n` +
454
- `会话消息序列可能损坏,请清理后重试。`
455
- );
456
- }
457
-
458
- const short = text.split("\n")[0].slice(0, 200) || (err instanceof Error ? err.name : "Unknown error");
459
- return `❌ ${model} 调用失败:${short}`;
460
- }
461
-
462
- /**
463
- * Unified LLM client with retry, fallback chains, caching, cost tracking, and budget control.
464
- */
465
- export class LLMClient {
466
- private config: any;
467
- private _toolRegistry: ToolRegistry;
468
- private _cache: LLMCache;
469
- private usageStats: Map<string, Record<string, number>> = new Map();
470
- private totalCost: number = 0;
471
- private costLimit: number | null;
472
- private log: Logger | null = null;
473
-
474
- constructor(
475
- config: any,
476
- toolRegistry: ToolRegistry,
477
- costLimit: number | null = null
478
- ) {
479
- this.config = config;
480
- this._toolRegistry = toolRegistry;
481
- this._cache = new LLMCache(256, 120);
482
- this.costLimit = costLimit;
483
- }
484
-
485
- /**
486
- * Set logger instance for event tracking.
487
- */
488
- setLogger(log: Logger): void {
489
- this.log = log;
490
- }
491
-
492
- /**
493
- * Get model for a specific agent or default.
494
- */
495
- private getModel(agentName?: string): string {
496
- if (agentName) {
497
- const agentCfg = (this.config.agents as any)?.[agentName];
498
- if (agentCfg?.model) {
499
- return String(agentCfg.model);
500
- }
501
- }
502
- // Honor the user's configured default (set by the /setup wizard). YAML uses
503
- // snake_case; the legacy camelCase read is kept as a last resort.
504
- const c: any = this.config;
505
- return c.default_model || c.llm?.default_model || c.llm?.defaultModel || "gpt-4o";
506
- }
507
-
508
- /**
509
- * Get max retries from config.
510
- */
511
- private _getRetries(): number {
512
- return (this.config.llm as any)?.maxRetries ?? 2;
513
- }
514
-
515
- /**
516
- * Track token usage and cost.
517
- */
518
- private trackUsage(
519
- agentName: string | undefined,
520
- model: string,
521
- promptTokens: number,
522
- completionTokens: number
523
- ): void {
524
- const key = agentName || "default";
525
- if (!this.usageStats.has(key)) {
526
- this.usageStats.set(key, {
527
- prompt_tokens: 0,
528
- completion_tokens: 0,
529
- calls: 0,
530
- cost: 0,
531
- });
532
- }
533
-
534
- const stats = this.usageStats.get(key)!;
535
- stats.prompt_tokens += promptTokens;
536
- stats.completion_tokens += completionTokens;
537
- stats.calls += 1;
538
-
539
- const cost = estimateCost(model, promptTokens, completionTokens);
540
- stats.cost += cost;
541
- this.totalCost += cost;
542
- }
543
-
544
- /**
545
- * Check if cost limit exceeded.
546
- */
547
- private checkBudget(): void {
548
- if (this.costLimit !== null && this.totalCost >= this.costLimit) {
549
- throw new Error(
550
- `Cost limit exceeded: $${this.totalCost.toFixed(4)} >= $${this.costLimit.toFixed(4)}`
551
- );
552
- }
553
- }
554
-
555
- /**
556
- * Check if API key is available for model.
557
- */
558
- private hasKeyForModel(model: string): boolean {
559
- let [provider] = splitProvider(model);
560
-
561
- if (!provider) {
562
- const lowered = model.toLowerCase();
563
- for (const p of getKnownProviders()) {
564
- if (lowered.includes(p)) {
565
- provider = p;
566
- break;
567
- }
568
- }
569
- }
570
-
571
- if (!provider) {
572
- return true; // Can't determine; don't skip
573
- }
574
-
575
- const envMap = getProviderEnvMap();
576
- const envVar =
577
- envMap.get(provider) || `${provider.toUpperCase()}_API_KEY`;
578
- return !!process.env[envVar];
579
- }
580
-
581
- /**
582
- * Get usage statistics.
583
- */
584
- getUsageStats(): Map<string, Record<string, number>> {
585
- return new Map(this.usageStats);
586
- }
587
-
588
- /**
589
- * Get total cost.
590
- */
591
- getTotalCost(): number {
592
- return this.totalCost;
593
- }
594
-
595
- /**
596
- * Reset usage statistics and cost.
597
- */
598
- resetUsageStats(): void {
599
- this.usageStats.clear();
600
- this.totalCost = 0;
601
- }
602
-
603
- /**
604
- * Complete a prompt (dummy implementation).
605
- *
606
- * Note: Full implementation requires integrating with an actual LLM API provider.
607
- * This is a placeholder that shows the structure and interface.
608
- */
609
- async complete(
610
- messages: Record<string, unknown>[],
611
- agentName?: string,
612
- tools?: string[],
613
- stream: boolean = false,
614
- overrides?: Record<string, unknown>
615
- ): Promise<LLMResponse> {
616
- this.checkBudget();
617
-
618
- const ov = overrides || {};
619
- const rawModel = ov.model;
620
- const model: string =
621
- typeof rawModel === "string" ? rawModel : this.getModel(agentName);
622
-
623
- // Build fallback chain
624
- const fallbackModels =
625
- FALLBACK_CHAINS.get(model)?.filter((m) => this.hasKeyForModel(m)) || [];
626
- const modelsToTry = toolCompatibleModels(
627
- model,
628
- [model, ...fallbackModels],
629
- !!tools
630
- );
631
-
632
- // Try each model in sequence
633
- let lastError: Error | null = null;
634
- for (const attemptModel of modelsToTry) {
635
- try {
636
- this.checkBudget();
637
- return await this.completeWithRetry(
638
- attemptModel,
639
- messages,
640
- agentName,
641
- tools,
642
- stream,
643
- overrides
644
- );
645
- } catch (e) {
646
- lastError = e instanceof Error ? e : new Error(String(e));
647
- this.log?.warn("llm_fallback", {
648
- model: attemptModel,
649
- agent: agentName,
650
- error: lastError.message,
651
- });
652
- continue;
653
- }
654
- }
655
-
656
- // All models failed
657
- return {
658
- content: formatUserFacingError(model, lastError),
659
- toolCalls: [],
660
- model,
661
- usage: { promptTokens: 0, completionTokens: 0 },
662
- cost: 0,
663
- truncated: false,
664
- };
665
- }
666
-
667
- /**
668
- * Complete with retry logic — real HTTP call to LLM API.
669
- */
670
- private async completeWithRetry(
671
- model: string,
672
- messages: Record<string, unknown>[],
673
- agentName?: string,
674
- tools?: string[],
675
- stream: boolean = false,
676
- overrides?: Record<string, unknown>
677
- ): Promise<LLMResponse> {
678
- const temperature = (overrides?.temperature as number) ?? 0.7;
679
- const maxTokens = (overrides?.maxTokens as number) ?? 4096;
680
- const maxRetries = (this.config.llm as any)?.maxRetries ?? 2;
681
- const isAnthropic = model.includes("claude") || model.startsWith("anthropic/");
682
-
683
- let lastError: Error | null = null;
684
- for (let attempt = 0; attempt <= maxRetries; attempt++) {
685
- try {
686
- if (attempt > 0) await new Promise(r => setTimeout(r, 1000 * Math.pow(2, attempt - 1)));
687
-
688
- let content: string;
689
- let toolCalls: ToolCall[] = [];
690
- let usage: UsageStats = { promptTokens: 0, completionTokens: 0 };
691
-
692
- if (isAnthropic) {
693
- const r = await this.callAnthropic(model, messages, tools, temperature, maxTokens, agentName);
694
- content = r.content; toolCalls = r.toolCalls; usage = r.usage;
695
- } else {
696
- const r = await this.callOpenAI(model, messages, tools, temperature, maxTokens, agentName);
697
- content = r.content; toolCalls = r.toolCalls; usage = r.usage;
698
- }
699
-
700
- const name = agentName || "default";
701
- if (!this.usageStats.has(name)) this.usageStats.set(name, { prompt_tokens: 0, completion_tokens: 0, calls: 0, cost: 0 });
702
- const s = this.usageStats.get(name)!;
703
- s.prompt_tokens += usage.promptTokens; s.completion_tokens += usage.completionTokens; s.calls += 1;
704
- const cost = estimateCost(model, usage.promptTokens, usage.completionTokens);
705
- s.cost += cost; this.totalCost += cost;
706
-
707
- return { content, toolCalls, model, usage, cost, truncated: false };
708
- } catch (e: any) {
709
- lastError = e;
710
- if (attempt >= maxRetries) throw e;
711
- }
712
- }
713
- throw lastError || new Error("Unknown error");
714
- }
715
-
716
- private async callOpenAI(
717
- m: string, messages: Record<string, unknown>[], tools?: string[], temp?: number, maxTok?: number, agentName?: string
718
- ): Promise<{ content: string; toolCalls: ToolCall[]; usage: UsageStats }> {
719
- const apiKey = this.getApiKey(m, agentName);
720
- const baseUrl = this.getBaseUrl(m);
721
- const body: Record<string, unknown> = { model: m, messages, temperature: temp ?? 0.7, max_tokens: maxTok ?? 4096 };
722
- if (tools?.length) {
723
- const defs = tools.map(t => this._toolRegistry.get(t)).filter(Boolean) as any[];
724
- if (defs.length) body.tools = defs.map(t => ({ type: "function", function: { name: t.name, description: t.description, parameters: this.paramsToSchema(t.parameters || []) } }));
725
- }
726
- const resp = await fetch(baseUrl + "/chat/completions", { method: "POST", headers: { "Content-Type": "application/json", Authorization: "Bearer " + apiKey }, body: JSON.stringify(body) });
727
- if (!resp.ok) { const e: any = new Error("API " + resp.status + ": " + ((await resp.text()).slice(0, 200))); e.status_code = resp.status; throw e; }
728
- const data: any = await resp.json();
729
- const msg = data.choices?.[0]?.message || {};
730
- return { content: msg.content || "", toolCalls: (msg.tool_calls || []).map((tc: any) => ({ id: tc.id, type: "function", function: { name: tc.function?.name || "", arguments: tc.function?.arguments || "{}" } })), usage: { promptTokens: data.usage?.prompt_tokens || 0, completionTokens: data.usage?.completion_tokens || 0 } };
731
- }
732
-
733
- private async callAnthropic(
734
- m: string, messages: Record<string, unknown>[], tools?: string[], temp?: number, maxTok?: number, agentName?: string
735
- ): Promise<{ content: string; toolCalls: ToolCall[]; usage: UsageStats }> {
736
- const apiKey = this.getApiKey("anthropic", agentName);
737
- const body: Record<string, unknown> = { model: m, max_tokens: maxTok ?? 4096, messages: messages.filter(msg => msg.role !== "system"), temperature: temp ?? 0.7 };
738
- const sys = messages.find(msg => msg.role === "system"); if (sys) body.system = sys.content;
739
- if (tools?.length) {
740
- const defs = tools.map(t => this._toolRegistry.get(t)).filter(Boolean) as any[];
741
- if (defs.length) body.tools = defs.map(t => ({ name: t.name, description: t.description, input_schema: this.paramsToSchema(t.parameters || []) }));
742
- }
743
- const resp = await fetch("https://api.anthropic.com/v1/messages", { method: "POST", headers: { "Content-Type": "application/json", "x-api-key": apiKey, "anthropic-version": "2023-06-01" }, body: JSON.stringify(body) });
744
- if (!resp.ok) { const e: any = new Error("API " + resp.status + ": " + ((await resp.text()).slice(0, 200))); e.status_code = resp.status; throw e; }
745
- const data: any = await resp.json(); let content = ""; const toolCalls: ToolCall[] = [];
746
- for (const b of data.content || []) { if (b.type === "text") content += b.text; if (b.type === "tool_use") toolCalls.push({ id: b.id, type: "function", function: { name: b.name, arguments: JSON.stringify(b.input) } }); }
747
- return { content, toolCalls, usage: { promptTokens: data.usage?.input_tokens || 0, completionTokens: data.usage?.output_tokens || 0 } };
748
- }
749
-
750
- private paramsToSchema(params: any[]): Record<string, any> {
751
- const props: Record<string, any> = {};
752
- for (const p of params) props[p.name] = { type: p.type === "integer" ? "integer" : p.type === "number" ? "number" : p.type === "boolean" ? "boolean" : "string", description: p.description };
753
- const required = params.filter(p => p.required).map(p => p.name);
754
- return { type: "object", properties: props, ...(required.length > 0 ? { required } : {}) };
755
- }
756
-
757
- private getApiKey(model: string, agentName?: string): string {
758
- // 0. Per-agent override (agents.<name>.api_key) beats everything
759
- if (agentName) {
760
- const agentKey = (this.config.agents as any)?.[agentName]?.api_key;
761
- if (agentKey) return String(agentKey);
762
- }
763
-
764
- let provider = "openai"; const [pr] = splitProvider(model); if (pr) provider = pr;
765
- else { const l = model.toLowerCase(); if (l.includes("claude")) provider = "anthropic"; else if (l.includes("deepseek")) provider = "deepseek"; else if (l.includes("groq")) provider = "groq"; else if (l.includes("openrouter")) provider = "openrouter"; else if (l.includes("gemini")) provider = "gemini"; }
766
- const envMap = getProviderEnvMap();
767
- const envVar = envMap.get(provider) || (provider.toUpperCase() + "_API_KEY");
768
-
769
- // 1. Check environment variable first
770
- let key = process.env[envVar];
771
- if (key) return key;
772
-
773
- // 2. Check config file (~/.skyloom/config.yaml)
774
- try {
775
- const fs = require("fs"); const path = require("path"); const yaml = require("yaml");
776
- const cfgPath = path.join(require("os").homedir(), ".skyloom", "config.yaml");
777
- if (fs.existsSync(cfgPath)) {
778
- const cfg = yaml.parse(fs.readFileSync(cfgPath, "utf-8")) || {};
779
- const keys = cfg.api_keys || {};
780
- if (keys[provider]) return keys[provider];
781
- }
782
- } catch { /* ignore */ }
783
-
784
- throw new Error("Missing " + envVar + ". Run: sky apikey set " + provider + " YOUR_KEY");
785
- }
786
-
787
- private getBaseUrl(model: string): string {
788
- let provider = "openai"; const [pr] = splitProvider(model); if (pr) provider = pr;
789
- else { const l = model.toLowerCase(); if (l.includes("claude")) return "https://api.anthropic.com/v1"; else if (l.includes("deepseek")) return "https://api.deepseek.com/v1"; else if (l.includes("groq")) return "https://api.groq.com/openai/v1"; else if (l.includes("openrouter")) return "https://openrouter.ai/api/v1"; else if (l.includes("ollama")) return ((process.env.OLLAMA_HOST || "http://localhost:11434") + "/v1"); }
790
- if (provider === "deepseek") return "https://api.deepseek.com/v1";
791
- if (provider === "groq") return "https://api.groq.com/openai/v1";
792
- if (provider === "openrouter") return "https://openrouter.ai/api/v1";
793
- if (provider === "ollama") return ((process.env.OLLAMA_HOST || "http://localhost:11434") + "/v1");
794
- return "https://api.openai.com/v1";
795
- }
796
-
797
- async *stream(
798
- messages: Record<string, unknown>[], agentName?: string
799
- ): AsyncGenerator<string> {
800
- const response = await this.complete(messages, agentName);
801
- yield response.content;
802
- }
803
-
804
- /**
805
- * Real SSE token streaming for OpenAI-compatible providers (openai, deepseek,
806
- * groq, openrouter, mistral, xai, ollama). Content + reasoning deltas are
807
- * yielded as they arrive; tool-call deltas are accumulated by index and
808
- * emitted once complete. Usage comes from the final `stream_options` chunk.
809
- */
810
- private async *callOpenAIStream(
811
- m: string, messages: Record<string, unknown>[], tools?: string[], temp?: number, maxTok?: number, signal?: AbortSignal, agentName?: string
812
- ): AsyncGenerator<StreamEvent> {
813
- const apiKey = this.getApiKey(m);
814
- const baseUrl = this.getBaseUrl(m);
815
- const body: Record<string, unknown> = {
816
- model: m, messages, temperature: temp ?? 0.7, max_tokens: maxTok ?? 4096,
817
- stream: true, stream_options: { include_usage: true },
818
- };
819
- if (tools?.length) {
820
- const defs = tools.map(t => this._toolRegistry.get(t)).filter(Boolean) as any[];
821
- if (defs.length) body.tools = defs.map(t => ({ type: "function", function: { name: t.name, description: t.description, parameters: this.paramsToSchema(t.parameters || []) } }));
822
- }
823
- const resp = await fetch(baseUrl + "/chat/completions", { method: "POST", headers: { "Content-Type": "application/json", Authorization: "Bearer " + apiKey }, body: JSON.stringify(body), signal });
824
- if (!resp.ok || !resp.body) { const e: any = new Error("API " + resp.status + ": " + ((await resp.text()).slice(0, 200))); e.status_code = resp.status; throw e; }
825
-
826
- const reader = (resp.body as any).getReader();
827
- const decoder = new TextDecoder();
828
- let buf = "";
829
- const toolAcc = new Map<number, { id: string; name: string; args: string }>();
830
- let usage: UsageStats = { promptTokens: 0, completionTokens: 0 };
831
- let reasoning = "";
832
-
833
- while (true) {
834
- const { done, value } = await reader.read();
835
- if (done) break;
836
- buf += decoder.decode(value, { stream: true });
837
- const lines = buf.split("\n");
838
- buf = lines.pop() || "";
839
- for (const line of lines) {
840
- const t = line.trim();
841
- if (!t.startsWith("data:")) continue;
842
- const data = t.slice(5).trim();
843
- if (data === "[DONE]") continue;
844
- let json: any; try { json = JSON.parse(data); } catch { continue; }
845
- if (json.usage) usage = { promptTokens: json.usage.prompt_tokens || 0, completionTokens: json.usage.completion_tokens || 0 };
846
- const delta = json.choices?.[0]?.delta;
847
- if (!delta) continue;
848
- if (delta.content) yield { type: "content", text: delta.content };
849
- if (delta.reasoning_content) { reasoning += delta.reasoning_content; yield { type: "reasoning", text: delta.reasoning_content }; }
850
- if (Array.isArray(delta.tool_calls)) {
851
- for (const tc of delta.tool_calls) {
852
- const idx = tc.index ?? 0;
853
- const acc = toolAcc.get(idx) || { id: "", name: "", args: "" };
854
- if (tc.id) acc.id = tc.id;
855
- if (tc.function?.name) acc.name = tc.function.name;
856
- if (tc.function?.arguments) acc.args += tc.function.arguments;
857
- toolAcc.set(idx, acc);
858
- }
859
- }
860
- }
861
- }
862
- for (const acc of toolAcc.values()) {
863
- if (acc.name) yield { type: "tool_call", toolCall: { id: acc.id || ("call_" + acc.name), type: "function", function: { name: acc.name, arguments: acc.args || "{}" } } };
864
- }
865
- yield { type: "done", usage, reasoningContent: reasoning || undefined };
866
- }
867
-
868
- async *streamWithTools(
869
- messages: Record<string, unknown>[], agentName?: string, tools?: string[],
870
- _toolRegistry?: ToolRegistry, overrides?: Record<string, unknown>, signal?: AbortSignal
871
- ): AsyncGenerator<StreamEvent> {
872
- this.checkBudget();
873
- const ov = overrides || {};
874
- const model: string = typeof ov.model === "string" ? ov.model : this.getModel(agentName);
875
- const temperature = (ov.temperature as number) ?? 0.7;
876
- const maxTokens = (ov.maxTokens as number) ?? 4096;
877
- const isAnthropic = model.includes("claude") || model.startsWith("anthropic/");
878
-
879
- // Blocking fallback used for Anthropic (different wire format) and on
880
- // failures before any content has streamed (preserves fallback chain + retry).
881
- const blockingFallback = async function* (this: LLMClient): AsyncGenerator<StreamEvent> {
882
- const response = await this.complete(messages, agentName, tools, false, overrides);
883
- if (response.content) yield { type: "content", text: response.content };
884
- for (const tc of response.toolCalls || []) yield { type: "tool_call", toolCall: tc };
885
- yield { type: "done", usage: response.usage, reasoningContent: response.reasoningContent };
886
- }.bind(this);
887
-
888
- if (isAnthropic) { yield* blockingFallback(); return; }
889
-
890
- let started = false;
891
- let usage: UsageStats = { promptTokens: 0, completionTokens: 0 };
892
- try {
893
- for await (const ev of this.callOpenAIStream(model, messages, tools, temperature, maxTokens, signal, agentName)) {
894
- if (ev.type === "content" || ev.type === "tool_call") started = true;
895
- if (ev.type === "done" && ev.usage) usage = ev.usage;
896
- yield ev;
897
- }
898
- } catch (e: any) {
899
- // User interrupt (Ctrl-C): stop cleanly — keep whatever streamed, no error, no fallback.
900
- if (signal?.aborted || e?.name === "AbortError") { yield { type: "done", usage }; return; }
901
- if (started) { yield { type: "error", text: String(e?.message || e) }; yield { type: "done", usage }; return; }
902
- this.log?.warn("stream_failed_fallback", { model, error: String(e?.message || e) });
903
- yield* blockingFallback();
904
- return;
905
- }
906
-
907
- // Usage + cost bookkeeping (mirrors completeWithRetry).
908
- const name = agentName || "default";
909
- if (!this.usageStats.has(name)) this.usageStats.set(name, { prompt_tokens: 0, completion_tokens: 0, calls: 0, cost: 0 });
910
- const s = this.usageStats.get(name)!;
911
- s.prompt_tokens += usage.promptTokens; s.completion_tokens += usage.completionTokens; s.calls += 1;
912
- const cost = estimateCost(model, usage.promptTokens, usage.completionTokens);
913
- s.cost += cost; this.totalCost += cost;
914
- }
915
- }
1
+ /**
2
+ * LLM abstraction layer with LiteLLM-compatible routing, retry, fallback, cost tracking, and budget control.
3
+ *
4
+ * Provides unified interface for multiple LLM providers (OpenAI, Anthropic, DeepSeek, etc.)
5
+ * with automatic fallback chains, prompt caching for Anthropic, and cost estimation.
6
+ */
7
+
8
+ import type { Logger } from "./logger";
9
+ import { LLMCache } from "./cache";
10
+ import type { ToolRegistry } from "./tool";
11
+
12
+ /**
13
+ * LLM response from completion.
14
+ */
15
+ export interface LLMResponse {
16
+ content: string;
17
+ toolCalls: ToolCall[];
18
+ model: string;
19
+ usage: UsageStats;
20
+ cost: number;
21
+ reasoningContent?: string;
22
+ // True when LLM loop ran out of iterations before producing a tool-call-free answer
23
+ truncated: boolean;
24
+ }
25
+
26
+ /**
27
+ * Tool call extracted from LLM response.
28
+ */
29
+ export interface ToolCall {
30
+ id: string;
31
+ type: string;
32
+ function: {
33
+ name: string;
34
+ arguments: string;
35
+ };
36
+ }
37
+
38
+ /**
39
+ * Token usage statistics.
40
+ */
41
+ export interface UsageStats {
42
+ promptTokens: number;
43
+ completionTokens: number;
44
+ }
45
+
46
+ /**
47
+ * Streaming event from LLM.
48
+ */
49
+ export interface StreamEvent {
50
+ type: "content" | "tool_call" | "done" | "error" | "reasoning";
51
+ text?: string;
52
+ toolCall?: ToolCall;
53
+ usage?: UsageStats;
54
+ reasoningContent?: string;
55
+ }
56
+
57
+ /**
58
+ * Split model string into provider and model name (e.g., "anthropic/claude-3-opus" → ["anthropic", "claude-3-opus"]).
59
+ */
60
+ function splitProvider(model: string): [string | null, string] {
61
+ if (!model.includes("/")) {
62
+ return [null, model];
63
+ }
64
+ const [head, ...rest] = model.split("/");
65
+ const provider = head.toLowerCase();
66
+ const knownProviders = getKnownProviders();
67
+ if (knownProviders.has(provider)) {
68
+ return [provider, rest.join("/")];
69
+ }
70
+ return [null, model];
71
+ }
72
+
73
+ /**
74
+ * Get set of known provider ID prefixes.
75
+ */
76
+ function getKnownProviders(): Set<string> {
77
+ return new Set([
78
+ "openai",
79
+ "azure",
80
+ "anthropic",
81
+ "deepseek",
82
+ "ollama",
83
+ "groq",
84
+ "mistral",
85
+ "cohere",
86
+ "together",
87
+ "openrouter",
88
+ "google",
89
+ "vertex_ai",
90
+ "xai",
91
+ "perplexity",
92
+ "fireworks",
93
+ "reka",
94
+ "nvidia",
95
+ "sambanova",
96
+ "qwen",
97
+ "zhipu",
98
+ "lingyiwanwu",
99
+ "minimax",
100
+ "moonshot",
101
+ "baidu",
102
+ "baichuan",
103
+ "stepfun",
104
+ "lmstudio",
105
+ "vllm",
106
+ "litellm",
107
+ ]);
108
+ }
109
+
110
+ /**
111
+ * Get provider-to-env-var mapping.
112
+ */
113
+ function getProviderEnvMap(): Map<string, string> {
114
+ const envMap = new Map([
115
+ ["openai", "OPENAI_API_KEY"],
116
+ ["anthropic", "ANTHROPIC_API_KEY"],
117
+ ["google", "GEMINI_API_KEY"],
118
+ ["deepseek", "DEEPSEEK_API_KEY"],
119
+ ["xai", "XAI_API_KEY"],
120
+ ["mistral", "MISTRAL_API_KEY"],
121
+ ["groq", "GROQ_API_KEY"],
122
+ ["cohere", "COHERE_API_KEY"],
123
+ ["perplexity", "PERPLEXITY_API_KEY"],
124
+ ["fireworks", "FIREWORKS_API_KEY"],
125
+ ["together", "TOGETHER_API_KEY"],
126
+ ["openrouter", "OPENROUTER_API_KEY"],
127
+ ["reka", "REKA_API_KEY"],
128
+ ["nvidia", "NVIDIA_API_KEY"],
129
+ ["sambanova", "SAMBANOVA_API_KEY"],
130
+ ["qwen", "QWEN_API_KEY"],
131
+ ["zhipu", "ZHIPU_API_KEY"],
132
+ ["lingyiwanwu", "LINGYIWANWU_API_KEY"],
133
+ ["minimax", "MINIMAX_API_KEY"],
134
+ ["moonshot", "MOONSHOT_API_KEY"],
135
+ ["baidu", "BAIDU_API_KEY"],
136
+ ["baichuan", "BAICHUAN_API_KEY"],
137
+ ["stepfun", "STEPFUN_API_KEY"],
138
+ ]);
139
+ return envMap;
140
+ }
141
+
142
+ /**
143
+ * Check if model targets Anthropic's API.
144
+ */
145
+ function isAnthropicModel(model: string): boolean {
146
+ const lowered = model.toLowerCase();
147
+ if (lowered.startsWith("anthropic/") || lowered.startsWith("claude")) {
148
+ return true;
149
+ }
150
+ const [provider] = splitProvider(model);
151
+ return provider === "anthropic";
152
+ }
153
+
154
+ /**
155
+ * Check if model targets DeepSeek's API.
156
+ */
157
+ function isDeepseekModel(model: string): boolean {
158
+ const [provider, stripped] = splitProvider(model);
159
+ const lowered = model.toLowerCase();
160
+ return (
161
+ provider === "deepseek" ||
162
+ lowered.startsWith("deepseek") ||
163
+ stripped.startsWith("deepseek")
164
+ );
165
+ }
166
+
167
+ /**
168
+ * Check if DeepSeek model supports tool calls.
169
+ * Reasoning models are not reliable function-call models.
170
+ */
171
+ function deepseekSupportsTools(model: string): boolean {
172
+ const lowered = model.toLowerCase();
173
+ return !["reasoner", "-r1", "/r1"].some((part) => lowered.includes(part));
174
+ }
175
+
176
+ /**
177
+ * Filter models by tool compatibility.
178
+ */
179
+ function toolCompatibleModels(
180
+ primary: string,
181
+ models: string[],
182
+ needsTools: boolean
183
+ ): string[] {
184
+ if (!needsTools) {
185
+ return models;
186
+ }
187
+
188
+ const compatible = models.filter(
189
+ (m) => !isDeepseekModel(m) || deepseekSupportsTools(m)
190
+ );
191
+
192
+ if (compatible.length > 0) {
193
+ return compatible;
194
+ }
195
+
196
+ if (isDeepseekModel(primary)) {
197
+ return ["deepseek/deepseek-chat"];
198
+ }
199
+
200
+ return models;
201
+ }
202
+
203
+ /**
204
+ * Apply Anthropic ephemeral cache markers to messages and tools.
205
+ *
206
+ * Anthropic charges full input tokens for repeated identical prefixes.
207
+ * Adding `cache_control: {"type": "ephemeral"}` to system prompt and tools
208
+ * enables 5-minute KV cache, reducing input cost ~80% on subsequent turns.
209
+ */
210
+ function _applyAnthropicCacheControl(
211
+ model: string,
212
+ messages: Record<string, unknown>[],
213
+ toolSchemas: Record<string, unknown>[] | null
214
+ ): [Record<string, unknown>[], Record<string, unknown>[] | null] {
215
+ if (!isAnthropicModel(model)) {
216
+ return [messages, toolSchemas];
217
+ }
218
+
219
+ // Process messages to add cache_control to system message
220
+ const newMessages: Record<string, unknown>[] = [];
221
+ let cachedSystem = false;
222
+
223
+ for (const msg of messages) {
224
+ if (
225
+ !cachedSystem &&
226
+ msg.role === "system" &&
227
+ typeof msg.content === "string"
228
+ ) {
229
+ const content = msg.content as string;
230
+ if (content) {
231
+ newMessages.push({
232
+ role: "system",
233
+ content: [
234
+ {
235
+ type: "text",
236
+ text: content,
237
+ cache_control: { type: "ephemeral" },
238
+ },
239
+ ],
240
+ });
241
+ cachedSystem = true;
242
+ continue;
243
+ }
244
+ }
245
+
246
+ if (
247
+ !cachedSystem &&
248
+ msg.role === "system" &&
249
+ Array.isArray(msg.content)
250
+ ) {
251
+ const content = msg.content as Record<string, unknown>[];
252
+ if (content.length > 0) {
253
+ const newBlocks = content.map((block) => ({ ...block }));
254
+ const lastBlock = newBlocks[newBlocks.length - 1];
255
+ newBlocks[newBlocks.length - 1] = {
256
+ ...lastBlock,
257
+ cache_control: { type: "ephemeral" },
258
+ };
259
+ newMessages.push({
260
+ ...msg,
261
+ content: newBlocks,
262
+ });
263
+ cachedSystem = true;
264
+ continue;
265
+ }
266
+ }
267
+
268
+ newMessages.push(msg);
269
+ }
270
+
271
+ // Add cache_control to tool schemas
272
+ let newTools: Record<string, unknown>[] | null = null;
273
+ if (toolSchemas && toolSchemas.length > 0) {
274
+ newTools = toolSchemas.map((t) => ({ ...t }));
275
+ const lastTool = newTools[newTools.length - 1];
276
+ newTools[newTools.length - 1] = {
277
+ ...lastTool,
278
+ cache_control: { type: "ephemeral" },
279
+ };
280
+ }
281
+
282
+ return [newMessages, newTools];
283
+ }
284
+
285
+ /**
286
+ * Estimate token count for mixed CJK/English text.
287
+ * CJK characters ~2 tokens each, non-CJK ~4 chars per token.
288
+ */
289
+ function _estimateTokens(text: string): number {
290
+ // Count CJK characters (simplified check)
291
+ const cjkRegex = /[\u4E00-\u9FFF\u3040-\u309F\uAC00-\uD7AF]/g;
292
+ const cjkCount = (text.match(cjkRegex) || []).length;
293
+ const otherCount = text.length - cjkCount;
294
+ return Math.max(1, cjkCount * 2 + Math.floor(otherCount / 4));
295
+ }
296
+
297
+ /**
298
+ * Cost per 1K tokens (input / output) — USD.
299
+ */
300
+ const MODEL_COST_ESTIMATES: Map<string, [number, number]> = new Map([
301
+ ["gpt-4o", [0.0025, 0.01]],
302
+ ["gpt-4o-mini", [0.00015, 0.0006]],
303
+ ["gpt-4.1", [0.002, 0.008]],
304
+ ["gpt-4.1-mini", [0.0004, 0.0016]],
305
+ ["gpt-4.1-nano", [0.0001, 0.0004]],
306
+ ["o3", [0.01, 0.04]],
307
+ ["o4-mini", [0.0011, 0.0044]],
308
+ ["claude-sonnet-4-6", [0.003, 0.015]],
309
+ ["claude-opus-4-7", [0.005, 0.025]],
310
+ ["claude-haiku-4-5", [0.0008, 0.004]],
311
+ ["deepseek-chat", [0.00027, 0.0011]],
312
+ ["deepseek-reasoner", [0.00055, 0.00219]],
313
+ ["deepseek-v4-flash", [0.00014, 0.00028]],
314
+ ["deepseek-v4-pro", [0.00174, 0.00348]],
315
+ ["deepseek/deepseek-chat", [0.00027, 0.0011]],
316
+ ["deepseek/deepseek-reasoner", [0.00055, 0.00219]],
317
+ ["deepseek/deepseek-v4-flash", [0.00014, 0.00028]],
318
+ ["deepseek/deepseek-v4-pro", [0.00174, 0.00348]],
319
+ ["gemini/gemini-2.5-flash", [0.0003, 0.0025]],
320
+ ["gemini/gemini-2.5-pro", [0.00125, 0.01]],
321
+ ["ollama/llama3", [0.0, 0.0]],
322
+ ["ollama/qwen2.5", [0.0, 0.0]],
323
+ ]);
324
+
325
+ /**
326
+ * Fallback chains for model availability.
327
+ */
328
+ const FALLBACK_CHAINS: Map<string, string[]> = new Map([
329
+ ["gpt-4o", ["gpt-4o-mini"]],
330
+ ["gpt-4o-mini", ["gpt-4o"]],
331
+ ["gpt-4.1", ["gpt-4.1-mini", "gpt-4o-mini"]],
332
+ ["gpt-4.1-mini", ["gpt-4o-mini"]],
333
+ ["gpt-4.1-nano", ["gpt-4.1-mini"]],
334
+ ["o3", ["o4-mini", "gpt-4.1"]],
335
+ ["o4-mini", ["gpt-4.1-mini"]],
336
+ ["claude-sonnet-4-6", ["claude-haiku-4-5", "gpt-4.1-mini"]],
337
+ ["claude-opus-4-7", ["claude-sonnet-4-6", "gpt-4.1"]],
338
+ ["claude-haiku-4-5", ["gpt-4.1-mini"]],
339
+ ["deepseek-chat", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
340
+ ["deepseek-reasoner", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
341
+ ["deepseek-v4-flash", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
342
+ ["deepseek-v4-pro", ["deepseek-v4-flash", "deepseek/deepseek-chat", "gpt-4.1-mini"]],
343
+ ["deepseek/deepseek-chat", ["gpt-4.1-mini"]],
344
+ ["deepseek/deepseek-reasoner", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
345
+ ["deepseek/deepseek-v4-flash", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
346
+ ["deepseek/deepseek-v4-pro", [
347
+ "deepseek/deepseek-v4-flash",
348
+ "deepseek/deepseek-chat",
349
+ "gpt-4.1-mini",
350
+ ]],
351
+ ["gemini/gemini-2.5-flash", ["gemini/gemini-2.5-pro", "gpt-4.1-mini"]],
352
+ ["gemini/gemini-2.5-pro", ["gpt-4.1"]],
353
+ ]);
354
+
355
+ /**
356
+ * HTTP status codes that are considered transient errors (worth retrying).
357
+ */
358
+ const RETRYABLE_STATUSES = new Set([408, 425, 429, 500, 502, 503, 504]);
359
+
360
+ /**
361
+ * Check if an exception is worth retrying.
362
+ */
363
+ function isTransientError(err: unknown): boolean {
364
+ if (err instanceof Error) {
365
+ const status =
366
+ (err as any).status_code || (err as any).http_status || 0;
367
+ if (status && RETRYABLE_STATUSES.has(status)) {
368
+ return true;
369
+ }
370
+
371
+ if (err.name === "TimeoutError") {
372
+ return true;
373
+ }
374
+
375
+ const errName = err.constructor.name.toLowerCase();
376
+ return [
377
+ "ratelimiterror",
378
+ "apitimeouterror",
379
+ "apiconnectionerror",
380
+ "serviceunavailableerror",
381
+ "internalservererror",
382
+ "timeout",
383
+ ].includes(errName);
384
+ }
385
+
386
+ return false;
387
+ }
388
+
389
+ /**
390
+ * Estimate cost for LLM API call.
391
+ */
392
+ export function estimateCost(
393
+ model: string,
394
+ promptTokens: number,
395
+ completionTokens: number
396
+ ): number {
397
+ const costs = MODEL_COST_ESTIMATES.get(model) || [0.001, 0.002];
398
+ return (
399
+ (promptTokens / 1000) * costs[0] + (completionTokens / 1000) * costs[1]
400
+ );
401
+ }
402
+
403
+ /**
404
+ * Format user-facing error message for LLM failures.
405
+ */
406
+ function formatUserFacingError(model: string, err: unknown): string {
407
+ const text = err instanceof Error ? err.message : String(err);
408
+ const lowered = text.toLowerCase();
409
+ const [provider] = splitProvider(model);
410
+
411
+ // Missing API key
412
+ if (
413
+ lowered.includes("api_key") ||
414
+ lowered.includes("authentication") ||
415
+ lowered.includes("unauthorized")
416
+ ) {
417
+ const envMap = getProviderEnvMap();
418
+ const envVar = envMap.get(provider || "") || "the appropriate *_API_KEY";
419
+ const configured = Array.from(envMap.entries())
420
+ .filter(([, e]) => process.env[e])
421
+ .map(([p]) => p)
422
+ .join(", ");
423
+ const hint = configured
424
+ ? `已配置: ${configured}。`
425
+ : "未配置任何 API key。";
426
+ return (
427
+ `❌ ${model} 调用失败:缺少或无效的 API key。\n` +
428
+ `请确认 \`${envVar}\` 已设置,或运行 \`sky init\` 重新配置。${hint}`
429
+ );
430
+ }
431
+
432
+ if (lowered.includes("rate limit") || text.includes("429")) {
433
+ return `❌ ${model} 速率受限,请稍后重试。`;
434
+ }
435
+
436
+ if (lowered.includes("timeout")) {
437
+ return `❌ ${model} 请求超时,请稍后重试或调高超时时间。`;
438
+ }
439
+
440
+ if (
441
+ lowered.includes("model") &&
442
+ (lowered.includes("not found") || lowered.includes("does not exist"))
443
+ ) {
444
+ return (
445
+ `❌ ${model} 不是该 provider 的有效模型 ID。\n` +
446
+ `请运行配置检查或 \`sky init\` 重新选择。`
447
+ );
448
+ }
449
+
450
+ // Content filtering / safety
451
+ if (
452
+ [
453
+ "content exists risk",
454
+ "content_policy",
455
+ "content_filter",
456
+ "content_filtered",
457
+ "safety",
458
+ "blocked by safety",
459
+ "responsibleaipolicyviolation",
460
+ "policy_violation",
461
+ ].some((kw) => lowered.includes(kw))
462
+ ) {
463
+ const short = text.split("\n")[0].slice(0, 200);
464
+ return (
465
+ `❌ ${model} 拒绝该请求 (内容审核):${short}\n` +
466
+ `原因:provider 的内容安全过滤判定此次提问/上下文敏感。\n` +
467
+ `建议:\n` +
468
+ ` - 换一个 provider(如 OpenAI / Anthropic)\n` +
469
+ ` - 把敏感关键词改写得更通用后重发`
470
+ );
471
+ }
472
+
473
+ // Bad request / malformed sequence
474
+ if (
475
+ [
476
+ "bad request",
477
+ "invalid_request",
478
+ "tool_calls",
479
+ "tool messages",
480
+ ].some((kw) => lowered.includes(kw)) ||
481
+ err instanceof Error && err.constructor.name.toLowerCase().includes("badrequest")
482
+ ) {
483
+ const short = text.split("\n")[0].slice(0, 200);
484
+ return (
485
+ `❌ ${model} 调用失败 (Bad Request):${short}\n` +
486
+ `会话消息序列可能损坏,请清理后重试。`
487
+ );
488
+ }
489
+
490
+ const short = text.split("\n")[0].slice(0, 200) || (err instanceof Error ? err.name : "Unknown error");
491
+ return `❌ ${model} 调用失败:${short}`;
492
+ }
493
+
494
+ /**
495
+ * Unified LLM client with retry, fallback chains, caching, cost tracking, and budget control.
496
+ */
497
+ export class LLMClient {
498
+ private config: any;
499
+ private _toolRegistry: ToolRegistry;
500
+ private _cache: LLMCache;
501
+ private usageStats: Map<string, Record<string, number>> = new Map();
502
+ private totalCost: number = 0;
503
+ private costLimit: number | null;
504
+ private log: Logger | null = null;
505
+
506
+ constructor(
507
+ config: any,
508
+ toolRegistry: ToolRegistry,
509
+ costLimit: number | null = null
510
+ ) {
511
+ this.config = config;
512
+ this._toolRegistry = toolRegistry;
513
+ this._cache = new LLMCache(256, 120);
514
+ this.costLimit = costLimit;
515
+ }
516
+
517
+ /**
518
+ * Set logger instance for event tracking.
519
+ */
520
+ setLogger(log: Logger): void {
521
+ this.log = log;
522
+ }
523
+
524
+ /**
525
+ * Get model for a specific agent or default.
526
+ */
527
+ private getModel(agentName?: string): string {
528
+ if (agentName) {
529
+ const agentCfg = (this.config.agents as any)?.[agentName];
530
+ if (agentCfg?.model) {
531
+ return String(agentCfg.model);
532
+ }
533
+ }
534
+ // Honor the user's configured default (set by the /setup wizard). YAML uses
535
+ // snake_case; the legacy camelCase read is kept as a last resort.
536
+ const c: any = this.config;
537
+ return c.default_model || c.llm?.default_model || c.llm?.defaultModel || "gpt-4o";
538
+ }
539
+
540
+ /**
541
+ * Get max retries from config.
542
+ */
543
+ private _getRetries(): number {
544
+ return (this.config.llm as any)?.maxRetries ?? 2;
545
+ }
546
+
547
+ /**
548
+ * Track token usage and cost.
549
+ */
550
+ private trackUsage(
551
+ agentName: string | undefined,
552
+ model: string,
553
+ promptTokens: number,
554
+ completionTokens: number
555
+ ): void {
556
+ const key = agentName || "default";
557
+ if (!this.usageStats.has(key)) {
558
+ this.usageStats.set(key, {
559
+ prompt_tokens: 0,
560
+ completion_tokens: 0,
561
+ calls: 0,
562
+ cost: 0,
563
+ });
564
+ }
565
+
566
+ const stats = this.usageStats.get(key)!;
567
+ stats.prompt_tokens += promptTokens;
568
+ stats.completion_tokens += completionTokens;
569
+ stats.calls += 1;
570
+
571
+ const cost = estimateCost(model, promptTokens, completionTokens);
572
+ stats.cost += cost;
573
+ this.totalCost += cost;
574
+ }
575
+
576
+ /**
577
+ * Check if cost limit exceeded.
578
+ */
579
+ private checkBudget(): void {
580
+ if (this.costLimit !== null && this.totalCost >= this.costLimit) {
581
+ throw new Error(
582
+ `Cost limit exceeded: $${this.totalCost.toFixed(4)} >= $${this.costLimit.toFixed(4)}`
583
+ );
584
+ }
585
+ }
586
+
587
+ /**
588
+ * Check if API key is available for model.
589
+ */
590
+ private hasKeyForModel(model: string): boolean {
591
+ let [provider] = splitProvider(model);
592
+
593
+ if (!provider) {
594
+ const lowered = model.toLowerCase();
595
+ for (const p of getKnownProviders()) {
596
+ if (lowered.includes(p)) {
597
+ provider = p;
598
+ break;
599
+ }
600
+ }
601
+ }
602
+
603
+ if (!provider) {
604
+ return true; // Can't determine; don't skip
605
+ }
606
+
607
+ const envMap = getProviderEnvMap();
608
+ const envVar =
609
+ envMap.get(provider) || `${provider.toUpperCase()}_API_KEY`;
610
+ return !!process.env[envVar];
611
+ }
612
+
613
+ /**
614
+ * Get usage statistics.
615
+ */
616
+ getUsageStats(): Map<string, Record<string, number>> {
617
+ return new Map(this.usageStats);
618
+ }
619
+
620
+ /**
621
+ * Get total cost.
622
+ */
623
+ getTotalCost(): number {
624
+ return this.totalCost;
625
+ }
626
+
627
+ /**
628
+ * Reset usage statistics and cost.
629
+ */
630
+ resetUsageStats(): void {
631
+ this.usageStats.clear();
632
+ this.totalCost = 0;
633
+ }
634
+
635
+ /**
636
+ * Complete a prompt (dummy implementation).
637
+ *
638
+ * Note: Full implementation requires integrating with an actual LLM API provider.
639
+ * This is a placeholder that shows the structure and interface.
640
+ */
641
+ async complete(
642
+ messages: Record<string, unknown>[],
643
+ agentName?: string,
644
+ tools?: string[],
645
+ stream: boolean = false,
646
+ overrides?: Record<string, unknown>
647
+ ): Promise<LLMResponse> {
648
+ this.checkBudget();
649
+
650
+ const ov = overrides || {};
651
+ const rawModel = ov.model;
652
+ const model: string =
653
+ typeof rawModel === "string" ? rawModel : this.getModel(agentName);
654
+
655
+ // Build fallback chain
656
+ const fallbackModels =
657
+ FALLBACK_CHAINS.get(model)?.filter((m) => this.hasKeyForModel(m)) || [];
658
+ const modelsToTry = toolCompatibleModels(
659
+ model,
660
+ [model, ...fallbackModels],
661
+ !!tools
662
+ );
663
+
664
+ // Try each model in sequence
665
+ let lastError: Error | null = null;
666
+ for (const attemptModel of modelsToTry) {
667
+ try {
668
+ this.checkBudget();
669
+ return await this.completeWithRetry(
670
+ attemptModel,
671
+ messages,
672
+ agentName,
673
+ tools,
674
+ stream,
675
+ overrides
676
+ );
677
+ } catch (e) {
678
+ lastError = e instanceof Error ? e : new Error(String(e));
679
+ this.log?.warn("llm_fallback", {
680
+ model: attemptModel,
681
+ agent: agentName,
682
+ error: lastError.message,
683
+ });
684
+ continue;
685
+ }
686
+ }
687
+
688
+ // All models failed
689
+ return {
690
+ content: formatUserFacingError(model, lastError),
691
+ toolCalls: [],
692
+ model,
693
+ usage: { promptTokens: 0, completionTokens: 0 },
694
+ cost: 0,
695
+ truncated: false,
696
+ };
697
+ }
698
+
699
+ /**
700
+ * Complete with retry logic — real HTTP call to LLM API.
701
+ */
702
+ private async completeWithRetry(
703
+ model: string,
704
+ messages: Record<string, unknown>[],
705
+ agentName?: string,
706
+ tools?: string[],
707
+ stream: boolean = false,
708
+ overrides?: Record<string, unknown>
709
+ ): Promise<LLMResponse> {
710
+ const temperature = (overrides?.temperature as number) ?? 0.7;
711
+ const maxTokens = (overrides?.maxTokens as number) ?? 4096;
712
+ const maxRetries = (this.config.llm as any)?.maxRetries ?? 2;
713
+ const isAnthropic = model.includes("claude") || model.startsWith("anthropic/");
714
+
715
+ let lastError: Error | null = null;
716
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
717
+ try {
718
+ if (attempt > 0) await new Promise(r => setTimeout(r, 1000 * Math.pow(2, attempt - 1)));
719
+
720
+ let content: string;
721
+ let toolCalls: ToolCall[] = [];
722
+ let usage: UsageStats = { promptTokens: 0, completionTokens: 0 };
723
+
724
+ if (isAnthropic) {
725
+ const r = await this.callAnthropic(model, messages, tools, temperature, maxTokens, agentName);
726
+ content = r.content; toolCalls = r.toolCalls; usage = r.usage;
727
+ } else {
728
+ const r = await this.callOpenAI(model, messages, tools, temperature, maxTokens, agentName);
729
+ content = r.content; toolCalls = r.toolCalls; usage = r.usage;
730
+ }
731
+
732
+ const name = agentName || "default";
733
+ if (!this.usageStats.has(name)) this.usageStats.set(name, { prompt_tokens: 0, completion_tokens: 0, calls: 0, cost: 0 });
734
+ const s = this.usageStats.get(name)!;
735
+ s.prompt_tokens += usage.promptTokens; s.completion_tokens += usage.completionTokens; s.calls += 1;
736
+ const cost = estimateCost(model, usage.promptTokens, usage.completionTokens);
737
+ s.cost += cost; this.totalCost += cost;
738
+
739
+ return { content, toolCalls, model, usage, cost, truncated: false };
740
+ } catch (e: any) {
741
+ lastError = e;
742
+ if (attempt >= maxRetries) throw e;
743
+ }
744
+ }
745
+ throw lastError || new Error("Unknown error");
746
+ }
747
+
748
+ private async callOpenAI(
749
+ m: string, messages: Record<string, unknown>[], tools?: string[], temp?: number, maxTok?: number, agentName?: string
750
+ ): Promise<{ content: string; toolCalls: ToolCall[]; usage: UsageStats }> {
751
+ const apiKey = this.getApiKey(m, agentName);
752
+ const baseUrl = this.getBaseUrl(m);
753
+ const body: Record<string, unknown> = { model: m, messages, temperature: temp ?? 0.7, max_tokens: maxTok ?? 4096 };
754
+ if (tools?.length) {
755
+ const defs = tools.map(t => this._toolRegistry.get(t)).filter(Boolean) as any[];
756
+ if (defs.length) body.tools = defs.map(t => ({ type: "function", function: { name: t.name, description: t.description, parameters: this.paramsToSchema(t.parameters || []) } }));
757
+ }
758
+ const resp = await fetch(baseUrl + "/chat/completions", { method: "POST", headers: { "Content-Type": "application/json", Authorization: "Bearer " + apiKey }, body: JSON.stringify(body) });
759
+ if (!resp.ok) { const e: any = new Error("API " + resp.status + ": " + ((await resp.text()).slice(0, 200))); e.status_code = resp.status; throw e; }
760
+ const data: any = await resp.json();
761
+ const msg = data.choices?.[0]?.message || {};
762
+ return { content: msg.content || "", toolCalls: (msg.tool_calls || []).map((tc: any) => ({ id: tc.id, type: "function", function: { name: tc.function?.name || "", arguments: tc.function?.arguments || "{}" } })), usage: { promptTokens: data.usage?.prompt_tokens || 0, completionTokens: data.usage?.completion_tokens || 0 } };
763
+ }
764
+
765
+ private async callAnthropic(
766
+ m: string, messages: Record<string, unknown>[], tools?: string[], temp?: number, maxTok?: number, agentName?: string
767
+ ): Promise<{ content: string; toolCalls: ToolCall[]; usage: UsageStats }> {
768
+ const apiKey = this.getApiKey("anthropic", agentName);
769
+ const body: Record<string, unknown> = { model: m, max_tokens: maxTok ?? 4096, messages: messages.filter(msg => msg.role !== "system"), temperature: temp ?? 0.7 };
770
+ const sys = messages.find(msg => msg.role === "system"); if (sys) body.system = sys.content;
771
+ if (tools?.length) {
772
+ const defs = tools.map(t => this._toolRegistry.get(t)).filter(Boolean) as any[];
773
+ if (defs.length) body.tools = defs.map(t => ({ name: t.name, description: t.description, input_schema: this.paramsToSchema(t.parameters || []) }));
774
+ }
775
+ const resp = await fetch("https://api.anthropic.com/v1/messages", { method: "POST", headers: { "Content-Type": "application/json", "x-api-key": apiKey, "anthropic-version": "2023-06-01" }, body: JSON.stringify(body) });
776
+ if (!resp.ok) { const e: any = new Error("API " + resp.status + ": " + ((await resp.text()).slice(0, 200))); e.status_code = resp.status; throw e; }
777
+ const data: any = await resp.json(); let content = ""; const toolCalls: ToolCall[] = [];
778
+ for (const b of data.content || []) { if (b.type === "text") content += b.text; if (b.type === "tool_use") toolCalls.push({ id: b.id, type: "function", function: { name: b.name, arguments: JSON.stringify(b.input) } }); }
779
+ return { content, toolCalls, usage: { promptTokens: data.usage?.input_tokens || 0, completionTokens: data.usage?.output_tokens || 0 } };
780
+ }
781
+
782
+ private paramsToSchema(params: any[]): Record<string, any> {
783
+ const props: Record<string, any> = {};
784
+ for (const p of params) props[p.name] = { type: p.type === "integer" ? "integer" : p.type === "number" ? "number" : p.type === "boolean" ? "boolean" : "string", description: p.description };
785
+ const required = params.filter(p => p.required).map(p => p.name);
786
+ return { type: "object", properties: props, ...(required.length > 0 ? { required } : {}) };
787
+ }
788
+
789
+ private getApiKey(model: string, agentName?: string): string {
790
+ // 0. Per-agent override (agents.<name>.api_key) beats everything
791
+ if (agentName) {
792
+ const agentKey = (this.config.agents as any)?.[agentName]?.api_key;
793
+ if (agentKey) return String(agentKey);
794
+ }
795
+
796
+ let provider = "openai"; const [pr] = splitProvider(model); if (pr) provider = pr;
797
+ else { const l = model.toLowerCase(); if (l.includes("claude")) provider = "anthropic"; else if (l.includes("deepseek")) provider = "deepseek"; else if (l.includes("groq")) provider = "groq"; else if (l.includes("openrouter")) provider = "openrouter"; else if (l.includes("gemini")) provider = "gemini"; }
798
+ const envMap = getProviderEnvMap();
799
+ const envVar = envMap.get(provider) || (provider.toUpperCase() + "_API_KEY");
800
+
801
+ // 1. Check environment variable first
802
+ let key = process.env[envVar];
803
+ if (key) return key;
804
+
805
+ // 2. Check config file (~/.skyloom/config.yaml)
806
+ try {
807
+ const fs = require("fs"); const path = require("path"); const yaml = require("yaml");
808
+ const cfgPath = path.join(require("os").homedir(), ".skyloom", "config.yaml");
809
+ if (fs.existsSync(cfgPath)) {
810
+ const cfg = yaml.parse(fs.readFileSync(cfgPath, "utf-8")) || {};
811
+ const keys = cfg.api_keys || {};
812
+ if (keys[provider]) return keys[provider];
813
+ }
814
+ } catch { /* ignore */ }
815
+
816
+ throw new Error("Missing " + envVar + ". Run: sky apikey set " + provider + " YOUR_KEY");
817
+ }
818
+
819
+ private getBaseUrl(model: string): string {
820
+ let provider = "openai"; const [pr] = splitProvider(model); if (pr) provider = pr;
821
+ else { const l = model.toLowerCase(); if (l.includes("claude")) return "https://api.anthropic.com/v1"; else if (l.includes("deepseek")) return "https://api.deepseek.com/v1"; else if (l.includes("groq")) return "https://api.groq.com/openai/v1"; else if (l.includes("openrouter")) return "https://openrouter.ai/api/v1"; else if (l.includes("ollama")) return ((process.env.OLLAMA_HOST || "http://localhost:11434") + "/v1"); }
822
+ const urls: Record<string, string> = {
823
+ openai: "https://api.openai.com/v1",
824
+ anthropic: "https://api.anthropic.com/v1",
825
+ google: "https://generativelanguage.googleapis.com/v1beta",
826
+ deepseek: "https://api.deepseek.com/v1",
827
+ xai: "https://api.x.ai/v1",
828
+ mistral: "https://api.mistral.ai/v1",
829
+ groq: "https://api.groq.com/openai/v1",
830
+ cohere: "https://api.cohere.ai/v1",
831
+ perplexity: "https://api.perplexity.ai",
832
+ fireworks: "https://api.fireworks.ai/inference/v1",
833
+ together: "https://api.together.xyz/v1",
834
+ openrouter: "https://openrouter.ai/api/v1",
835
+ reka: "https://api.reka.ai/v1",
836
+ nvidia: "https://integrate.api.nvidia.com/v1",
837
+ sambanova: "https://api.sambanova.ai/v1",
838
+ qwen: "https://dashscope.aliyuncs.com/compatible-mode/v1",
839
+ zhipu: "https://open.bigmodel.cn/api/paas/v4",
840
+ lingyiwanwu: "https://api.lingyiwanwu.com/v1",
841
+ minimax: "https://api.minimax.chat/v1",
842
+ moonshot: "https://api.moonshot.cn/v1",
843
+ baidu: "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop",
844
+ baichuan: "https://api.baichuan-ai.com/v1",
845
+ stepfun: "https://api.stepfun.com/v1",
846
+ ollama: (process.env.OLLAMA_HOST || "http://localhost:11434") + "/v1",
847
+ lmstudio: (process.env.LMSTUDIO_HOST || "http://localhost:1234") + "/v1",
848
+ vllm: (process.env.VLLM_HOST || "http://localhost:8000") + "/v1",
849
+ litellm: (process.env.LITELLM_HOST || "http://localhost:4000") + "/v1",
850
+ };
851
+ return urls[provider] || urls.openai;
852
+ }
853
+
854
+ async *stream(
855
+ messages: Record<string, unknown>[], agentName?: string
856
+ ): AsyncGenerator<string> {
857
+ const response = await this.complete(messages, agentName);
858
+ yield response.content;
859
+ }
860
+
861
+ /**
862
+ * Real SSE token streaming for OpenAI-compatible providers (openai, deepseek,
863
+ * groq, openrouter, mistral, xai, ollama). Content + reasoning deltas are
864
+ * yielded as they arrive; tool-call deltas are accumulated by index and
865
+ * emitted once complete. Usage comes from the final `stream_options` chunk.
866
+ */
867
+ private async *callOpenAIStream(
868
+ m: string, messages: Record<string, unknown>[], tools?: string[], temp?: number, maxTok?: number, signal?: AbortSignal, agentName?: string
869
+ ): AsyncGenerator<StreamEvent> {
870
+ const apiKey = this.getApiKey(m);
871
+ const baseUrl = this.getBaseUrl(m);
872
+ const body: Record<string, unknown> = {
873
+ model: m, messages, temperature: temp ?? 0.7, max_tokens: maxTok ?? 4096,
874
+ stream: true, stream_options: { include_usage: true },
875
+ };
876
+ if (tools?.length) {
877
+ const defs = tools.map(t => this._toolRegistry.get(t)).filter(Boolean) as any[];
878
+ if (defs.length) body.tools = defs.map(t => ({ type: "function", function: { name: t.name, description: t.description, parameters: this.paramsToSchema(t.parameters || []) } }));
879
+ }
880
+ const resp = await fetch(baseUrl + "/chat/completions", { method: "POST", headers: { "Content-Type": "application/json", Authorization: "Bearer " + apiKey }, body: JSON.stringify(body), signal });
881
+ if (!resp.ok || !resp.body) { const e: any = new Error("API " + resp.status + ": " + ((await resp.text()).slice(0, 200))); e.status_code = resp.status; throw e; }
882
+
883
+ const reader = (resp.body as any).getReader();
884
+ const decoder = new TextDecoder();
885
+ let buf = "";
886
+ const toolAcc = new Map<number, { id: string; name: string; args: string }>();
887
+ let usage: UsageStats = { promptTokens: 0, completionTokens: 0 };
888
+ let reasoning = "";
889
+
890
+ while (true) {
891
+ const { done, value } = await reader.read();
892
+ if (done) break;
893
+ buf += decoder.decode(value, { stream: true });
894
+ const lines = buf.split("\n");
895
+ buf = lines.pop() || "";
896
+ for (const line of lines) {
897
+ const t = line.trim();
898
+ if (!t.startsWith("data:")) continue;
899
+ const data = t.slice(5).trim();
900
+ if (data === "[DONE]") continue;
901
+ let json: any; try { json = JSON.parse(data); } catch { continue; }
902
+ if (json.usage) usage = { promptTokens: json.usage.prompt_tokens || 0, completionTokens: json.usage.completion_tokens || 0 };
903
+ const delta = json.choices?.[0]?.delta;
904
+ if (!delta) continue;
905
+ if (delta.content) yield { type: "content", text: delta.content };
906
+ if (delta.reasoning_content) { reasoning += delta.reasoning_content; yield { type: "reasoning", text: delta.reasoning_content }; }
907
+ if (Array.isArray(delta.tool_calls)) {
908
+ for (const tc of delta.tool_calls) {
909
+ const idx = tc.index ?? 0;
910
+ const acc = toolAcc.get(idx) || { id: "", name: "", args: "" };
911
+ if (tc.id) acc.id = tc.id;
912
+ if (tc.function?.name) acc.name = tc.function.name;
913
+ if (tc.function?.arguments) acc.args += tc.function.arguments;
914
+ toolAcc.set(idx, acc);
915
+ }
916
+ }
917
+ }
918
+ }
919
+ for (const acc of toolAcc.values()) {
920
+ if (acc.name) yield { type: "tool_call", toolCall: { id: acc.id || ("call_" + acc.name), type: "function", function: { name: acc.name, arguments: acc.args || "{}" } } };
921
+ }
922
+ yield { type: "done", usage, reasoningContent: reasoning || undefined };
923
+ }
924
+
925
+ async *streamWithTools(
926
+ messages: Record<string, unknown>[], agentName?: string, tools?: string[],
927
+ _toolRegistry?: ToolRegistry, overrides?: Record<string, unknown>, signal?: AbortSignal
928
+ ): AsyncGenerator<StreamEvent> {
929
+ this.checkBudget();
930
+ const ov = overrides || {};
931
+ const model: string = typeof ov.model === "string" ? ov.model : this.getModel(agentName);
932
+ const temperature = (ov.temperature as number) ?? 0.7;
933
+ const maxTokens = (ov.maxTokens as number) ?? 4096;
934
+ const isAnthropic = model.includes("claude") || model.startsWith("anthropic/");
935
+
936
+ // Blocking fallback used for Anthropic (different wire format) and on
937
+ // failures before any content has streamed (preserves fallback chain + retry).
938
+ const blockingFallback = async function* (this: LLMClient): AsyncGenerator<StreamEvent> {
939
+ const response = await this.complete(messages, agentName, tools, false, overrides);
940
+ if (response.content) yield { type: "content", text: response.content };
941
+ for (const tc of response.toolCalls || []) yield { type: "tool_call", toolCall: tc };
942
+ yield { type: "done", usage: response.usage, reasoningContent: response.reasoningContent };
943
+ }.bind(this);
944
+
945
+ if (isAnthropic) { yield* blockingFallback(); return; }
946
+
947
+ let started = false;
948
+ let usage: UsageStats = { promptTokens: 0, completionTokens: 0 };
949
+ try {
950
+ for await (const ev of this.callOpenAIStream(model, messages, tools, temperature, maxTokens, signal, agentName)) {
951
+ if (ev.type === "content" || ev.type === "tool_call") started = true;
952
+ if (ev.type === "done" && ev.usage) usage = ev.usage;
953
+ yield ev;
954
+ }
955
+ } catch (e: any) {
956
+ // User interrupt (Ctrl-C): stop cleanly — keep whatever streamed, no error, no fallback.
957
+ if (signal?.aborted || e?.name === "AbortError") { yield { type: "done", usage }; return; }
958
+ if (started) { yield { type: "error", text: String(e?.message || e) }; yield { type: "done", usage }; return; }
959
+ this.log?.warn("stream_failed_fallback", { model, error: String(e?.message || e) });
960
+ yield* blockingFallback();
961
+ return;
962
+ }
963
+
964
+ // Usage + cost bookkeeping (mirrors completeWithRetry).
965
+ const name = agentName || "default";
966
+ if (!this.usageStats.has(name)) this.usageStats.set(name, { prompt_tokens: 0, completion_tokens: 0, calls: 0, cost: 0 });
967
+ const s = this.usageStats.get(name)!;
968
+ s.prompt_tokens += usage.promptTokens; s.completion_tokens += usage.completionTokens; s.calls += 1;
969
+ const cost = estimateCost(model, usage.promptTokens, usage.completionTokens);
970
+ s.cost += cost; this.totalCost += cost;
971
+ }
972
+ }