skyloom 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/.github/workflows/ci.yml +36 -0
  2. package/CONVERSION_PLAN.md +191 -0
  3. package/README.md +67 -0
  4. package/dist/agents/dew.d.ts +15 -0
  5. package/dist/agents/dew.d.ts.map +1 -0
  6. package/dist/agents/dew.js +74 -0
  7. package/dist/agents/dew.js.map +1 -0
  8. package/dist/agents/fair.d.ts +15 -0
  9. package/dist/agents/fair.d.ts.map +1 -0
  10. package/dist/agents/fair.js +106 -0
  11. package/dist/agents/fair.js.map +1 -0
  12. package/dist/agents/fog.d.ts +15 -0
  13. package/dist/agents/fog.d.ts.map +1 -0
  14. package/dist/agents/fog.js +52 -0
  15. package/dist/agents/fog.js.map +1 -0
  16. package/dist/agents/frost.d.ts +15 -0
  17. package/dist/agents/frost.d.ts.map +1 -0
  18. package/dist/agents/frost.js +54 -0
  19. package/dist/agents/frost.js.map +1 -0
  20. package/dist/agents/rain.d.ts +15 -0
  21. package/dist/agents/rain.d.ts.map +1 -0
  22. package/dist/agents/rain.js +54 -0
  23. package/dist/agents/rain.js.map +1 -0
  24. package/dist/agents/snow.d.ts +27 -0
  25. package/dist/agents/snow.d.ts.map +1 -0
  26. package/dist/agents/snow.js +226 -0
  27. package/dist/agents/snow.js.map +1 -0
  28. package/dist/cli/main.d.ts +7 -0
  29. package/dist/cli/main.d.ts.map +1 -0
  30. package/dist/cli/main.js +402 -0
  31. package/dist/cli/main.js.map +1 -0
  32. package/dist/cli/mode.d.ts +17 -0
  33. package/dist/cli/mode.d.ts.map +1 -0
  34. package/dist/cli/mode.js +56 -0
  35. package/dist/cli/mode.js.map +1 -0
  36. package/dist/core/agent.d.ts +174 -0
  37. package/dist/core/agent.d.ts.map +1 -0
  38. package/dist/core/agent.js +1332 -0
  39. package/dist/core/agent.js.map +1 -0
  40. package/dist/core/agent_helpers.d.ts +51 -0
  41. package/dist/core/agent_helpers.d.ts.map +1 -0
  42. package/dist/core/agent_helpers.js +477 -0
  43. package/dist/core/agent_helpers.js.map +1 -0
  44. package/dist/core/bus.d.ts +99 -0
  45. package/dist/core/bus.d.ts.map +1 -0
  46. package/dist/core/bus.js +191 -0
  47. package/dist/core/bus.js.map +1 -0
  48. package/dist/core/cache.d.ts +63 -0
  49. package/dist/core/cache.d.ts.map +1 -0
  50. package/dist/core/cache.js +121 -0
  51. package/dist/core/cache.js.map +1 -0
  52. package/dist/core/checkpoint.d.ts +19 -0
  53. package/dist/core/checkpoint.d.ts.map +1 -0
  54. package/dist/core/checkpoint.js +120 -0
  55. package/dist/core/checkpoint.js.map +1 -0
  56. package/dist/core/circuit_breaker.d.ts +46 -0
  57. package/dist/core/circuit_breaker.d.ts.map +1 -0
  58. package/dist/core/circuit_breaker.js +99 -0
  59. package/dist/core/circuit_breaker.js.map +1 -0
  60. package/dist/core/config.d.ts +97 -0
  61. package/dist/core/config.d.ts.map +1 -0
  62. package/dist/core/config.js +281 -0
  63. package/dist/core/config.js.map +1 -0
  64. package/dist/core/constants.d.ts +78 -0
  65. package/dist/core/constants.d.ts.map +1 -0
  66. package/dist/core/constants.js +84 -0
  67. package/dist/core/constants.js.map +1 -0
  68. package/dist/core/factory.d.ts +63 -0
  69. package/dist/core/factory.d.ts.map +1 -0
  70. package/dist/core/factory.js +537 -0
  71. package/dist/core/factory.js.map +1 -0
  72. package/dist/core/icons.d.ts +28 -0
  73. package/dist/core/icons.d.ts.map +1 -0
  74. package/dist/core/icons.js +86 -0
  75. package/dist/core/icons.js.map +1 -0
  76. package/dist/core/index.d.ts +29 -0
  77. package/dist/core/index.d.ts.map +1 -0
  78. package/dist/core/index.js +54 -0
  79. package/dist/core/index.js.map +1 -0
  80. package/dist/core/llm.d.ts +121 -0
  81. package/dist/core/llm.d.ts.map +1 -0
  82. package/dist/core/llm.js +532 -0
  83. package/dist/core/llm.js.map +1 -0
  84. package/dist/core/logger.d.ts +57 -0
  85. package/dist/core/logger.d.ts.map +1 -0
  86. package/dist/core/logger.js +122 -0
  87. package/dist/core/logger.js.map +1 -0
  88. package/dist/core/mcp.d.ts +190 -0
  89. package/dist/core/mcp.d.ts.map +1 -0
  90. package/dist/core/mcp.js +822 -0
  91. package/dist/core/mcp.js.map +1 -0
  92. package/dist/core/mcp_server.d.ts +26 -0
  93. package/dist/core/mcp_server.d.ts.map +1 -0
  94. package/dist/core/mcp_server.js +211 -0
  95. package/dist/core/mcp_server.js.map +1 -0
  96. package/dist/core/memory.d.ts +190 -0
  97. package/dist/core/memory.d.ts.map +1 -0
  98. package/dist/core/memory.js +988 -0
  99. package/dist/core/memory.js.map +1 -0
  100. package/dist/core/middleware.d.ts +114 -0
  101. package/dist/core/middleware.d.ts.map +1 -0
  102. package/dist/core/middleware.js +248 -0
  103. package/dist/core/middleware.js.map +1 -0
  104. package/dist/core/pipelines.d.ts +87 -0
  105. package/dist/core/pipelines.d.ts.map +1 -0
  106. package/dist/core/pipelines.js +301 -0
  107. package/dist/core/pipelines.js.map +1 -0
  108. package/dist/core/profile.d.ts +23 -0
  109. package/dist/core/profile.d.ts.map +1 -0
  110. package/dist/core/profile.js +289 -0
  111. package/dist/core/profile.js.map +1 -0
  112. package/dist/core/router.d.ts +24 -0
  113. package/dist/core/router.d.ts.map +1 -0
  114. package/dist/core/router.js +111 -0
  115. package/dist/core/router.js.map +1 -0
  116. package/dist/core/schemas.d.ts +82 -0
  117. package/dist/core/schemas.d.ts.map +1 -0
  118. package/dist/core/schemas.js +200 -0
  119. package/dist/core/schemas.js.map +1 -0
  120. package/dist/core/semantic.d.ts +92 -0
  121. package/dist/core/semantic.d.ts.map +1 -0
  122. package/dist/core/semantic.js +175 -0
  123. package/dist/core/semantic.js.map +1 -0
  124. package/dist/core/skill.d.ts +68 -0
  125. package/dist/core/skill.d.ts.map +1 -0
  126. package/dist/core/skill.js +350 -0
  127. package/dist/core/skill.js.map +1 -0
  128. package/dist/core/tool.d.ts +99 -0
  129. package/dist/core/tool.d.ts.map +1 -0
  130. package/dist/core/tool.js +341 -0
  131. package/dist/core/tool.js.map +1 -0
  132. package/dist/core/tool_router.d.ts +29 -0
  133. package/dist/core/tool_router.d.ts.map +1 -0
  134. package/dist/core/tool_router.js +172 -0
  135. package/dist/core/tool_router.js.map +1 -0
  136. package/dist/core/workspace.d.ts +48 -0
  137. package/dist/core/workspace.d.ts.map +1 -0
  138. package/dist/core/workspace.js +179 -0
  139. package/dist/core/workspace.js.map +1 -0
  140. package/dist/plugins/loader.d.ts +17 -0
  141. package/dist/plugins/loader.d.ts.map +1 -0
  142. package/dist/plugins/loader.js +96 -0
  143. package/dist/plugins/loader.js.map +1 -0
  144. package/dist/skills/loader.d.ts +9 -0
  145. package/dist/skills/loader.d.ts.map +1 -0
  146. package/dist/skills/loader.js +78 -0
  147. package/dist/skills/loader.js.map +1 -0
  148. package/dist/tools/builtin.d.ts +10 -0
  149. package/dist/tools/builtin.d.ts.map +1 -0
  150. package/dist/tools/builtin.js +414 -0
  151. package/dist/tools/builtin.js.map +1 -0
  152. package/dist/tools/computer.d.ts +12 -0
  153. package/dist/tools/computer.d.ts.map +1 -0
  154. package/dist/tools/computer.js +326 -0
  155. package/dist/tools/computer.js.map +1 -0
  156. package/dist/tools/delegate.d.ts +10 -0
  157. package/dist/tools/delegate.d.ts.map +1 -0
  158. package/dist/tools/delegate.js +45 -0
  159. package/dist/tools/delegate.js.map +1 -0
  160. package/dist/web/server.d.ts +5 -0
  161. package/dist/web/server.d.ts.map +1 -0
  162. package/dist/web/server.js +647 -0
  163. package/dist/web/server.js.map +1 -0
  164. package/dist/web/tts.d.ts +33 -0
  165. package/dist/web/tts.d.ts.map +1 -0
  166. package/dist/web/tts.js +69 -0
  167. package/dist/web/tts.js.map +1 -0
  168. package/package.json +60 -0
  169. package/scripts/install.js +48 -0
  170. package/scripts/link.js +10 -0
  171. package/setup.bat +79 -0
  172. package/skill-test-ty2fOA/test.md +10 -0
  173. package/src/agents/dew.ts +70 -0
  174. package/src/agents/fair.ts +102 -0
  175. package/src/agents/fog.ts +48 -0
  176. package/src/agents/frost.ts +50 -0
  177. package/src/agents/rain.ts +50 -0
  178. package/src/agents/snow.ts +239 -0
  179. package/src/cli/main.ts +405 -0
  180. package/src/cli/mode.ts +58 -0
  181. package/src/core/agent.ts +1506 -0
  182. package/src/core/agent_helpers.ts +461 -0
  183. package/src/core/bus.ts +221 -0
  184. package/src/core/cache.ts +153 -0
  185. package/src/core/checkpoint.ts +94 -0
  186. package/src/core/circuit_breaker.ts +119 -0
  187. package/src/core/config.ts +341 -0
  188. package/src/core/constants.ts +95 -0
  189. package/src/core/factory.ts +627 -0
  190. package/src/core/icons.ts +53 -0
  191. package/src/core/index.ts +31 -0
  192. package/src/core/llm.ts +724 -0
  193. package/src/core/logger.ts +144 -0
  194. package/src/core/mcp.ts +953 -0
  195. package/src/core/mcp_server.ts +176 -0
  196. package/src/core/memory.ts +1169 -0
  197. package/src/core/middleware.ts +350 -0
  198. package/src/core/pipelines.ts +424 -0
  199. package/src/core/profile.ts +255 -0
  200. package/src/core/router.ts +124 -0
  201. package/src/core/schemas.ts +282 -0
  202. package/src/core/semantic.ts +211 -0
  203. package/src/core/skill.ts +342 -0
  204. package/src/core/tool.ts +427 -0
  205. package/src/core/tool_router.ts +193 -0
  206. package/src/core/workspace.ts +150 -0
  207. package/src/plugins/loader.ts +66 -0
  208. package/src/skills/loader.ts +46 -0
  209. package/src/sql.js.d.ts +29 -0
  210. package/src/tools/builtin.ts +382 -0
  211. package/src/tools/computer.ts +269 -0
  212. package/src/tools/delegate.ts +49 -0
  213. package/src/web/server.ts +634 -0
  214. package/src/web/tts.ts +93 -0
  215. package/tests/bus.test.ts +121 -0
  216. package/tests/icons.test.ts +45 -0
  217. package/tests/router.test.ts +86 -0
  218. package/tests/schemas.test.ts +51 -0
  219. package/tests/semantic.test.ts +83 -0
  220. package/tests/setup.ts +10 -0
  221. package/tests/skill.test.ts +172 -0
  222. package/tests/tool.test.ts +108 -0
  223. package/tests/tool_router.test.ts +71 -0
  224. package/tsconfig.json +37 -0
  225. package/vitest.config.ts +17 -0
@@ -0,0 +1,724 @@
1
+ /**
2
+ * LLM abstraction layer with LiteLLM-compatible routing, retry, fallback, cost tracking, and budget control.
3
+ *
4
+ * Provides unified interface for multiple LLM providers (OpenAI, Anthropic, DeepSeek, etc.)
5
+ * with automatic fallback chains, prompt caching for Anthropic, and cost estimation.
6
+ */
7
+
8
+ import type { Logger } from "./logger";
9
+ import { LLMCache } from "./cache";
10
+ import type { ToolRegistry } from "./tool";
11
+
12
+ /**
13
+ * LLM response from completion.
14
+ */
15
+ export interface LLMResponse {
16
+ content: string;
17
+ toolCalls: ToolCall[];
18
+ model: string;
19
+ usage: UsageStats;
20
+ cost: number;
21
+ reasoningContent?: string;
22
+ // True when LLM loop ran out of iterations before producing a tool-call-free answer
23
+ truncated: boolean;
24
+ }
25
+
26
+ /**
27
+ * Tool call extracted from LLM response.
28
+ */
29
+ export interface ToolCall {
30
+ id: string;
31
+ type: string;
32
+ function: {
33
+ name: string;
34
+ arguments: string;
35
+ };
36
+ }
37
+
38
+ /**
39
+ * Token usage statistics.
40
+ */
41
+ export interface UsageStats {
42
+ promptTokens: number;
43
+ completionTokens: number;
44
+ }
45
+
46
+ /**
47
+ * Streaming event from LLM.
48
+ */
49
+ export interface StreamEvent {
50
+ type: "content" | "tool_call" | "done" | "error" | "reasoning";
51
+ text?: string;
52
+ toolCall?: ToolCall;
53
+ usage?: UsageStats;
54
+ reasoningContent?: string;
55
+ }
56
+
57
+ /**
58
+ * Split model string into provider and model name (e.g., "anthropic/claude-3-opus" → ["anthropic", "claude-3-opus"]).
59
+ */
60
+ function splitProvider(model: string): [string | null, string] {
61
+ if (!model.includes("/")) {
62
+ return [null, model];
63
+ }
64
+ const [head, ...rest] = model.split("/");
65
+ const provider = head.toLowerCase();
66
+ const knownProviders = getKnownProviders();
67
+ if (knownProviders.has(provider)) {
68
+ return [provider, rest.join("/")];
69
+ }
70
+ return [null, model];
71
+ }
72
+
73
+ /**
74
+ * Get set of known provider ID prefixes.
75
+ */
76
+ function getKnownProviders(): Set<string> {
77
+ return new Set([
78
+ "openai",
79
+ "azure",
80
+ "anthropic",
81
+ "deepseek",
82
+ "ollama",
83
+ "groq",
84
+ "mistral",
85
+ "cohere",
86
+ "together_ai",
87
+ "openrouter",
88
+ "gemini",
89
+ "vertex_ai",
90
+ ]);
91
+ }
92
+
93
+ /**
94
+ * Get provider-to-env-var mapping.
95
+ */
96
+ function getProviderEnvMap(): Map<string, string> {
97
+ const envMap = new Map([
98
+ ["openai", "OPENAI_API_KEY"],
99
+ ["anthropic", "ANTHROPIC_API_KEY"],
100
+ ["deepseek", "DEEPSEEK_API_KEY"],
101
+ ["groq", "GROQ_API_KEY"],
102
+ ["mistral", "MISTRAL_API_KEY"],
103
+ ["cohere", "COHERE_API_KEY"],
104
+ ["openrouter", "OPENROUTER_API_KEY"],
105
+ ["gemini", "GEMINI_API_KEY"],
106
+ ]);
107
+ return envMap;
108
+ }
109
+
110
+ /**
111
+ * Check if model targets Anthropic's API.
112
+ */
113
+ function isAnthropicModel(model: string): boolean {
114
+ const lowered = model.toLowerCase();
115
+ if (lowered.startsWith("anthropic/") || lowered.startsWith("claude")) {
116
+ return true;
117
+ }
118
+ const [provider] = splitProvider(model);
119
+ return provider === "anthropic";
120
+ }
121
+
122
+ /**
123
+ * Check if model targets DeepSeek's API.
124
+ */
125
+ function isDeepseekModel(model: string): boolean {
126
+ const [provider, stripped] = splitProvider(model);
127
+ const lowered = model.toLowerCase();
128
+ return (
129
+ provider === "deepseek" ||
130
+ lowered.startsWith("deepseek") ||
131
+ stripped.startsWith("deepseek")
132
+ );
133
+ }
134
+
135
+ /**
136
+ * Check if DeepSeek model supports tool calls.
137
+ * Reasoning models are not reliable function-call models.
138
+ */
139
+ function deepseekSupportsTools(model: string): boolean {
140
+ const lowered = model.toLowerCase();
141
+ return !["reasoner", "-r1", "/r1"].some((part) => lowered.includes(part));
142
+ }
143
+
144
+ /**
145
+ * Filter models by tool compatibility.
146
+ */
147
+ function toolCompatibleModels(
148
+ primary: string,
149
+ models: string[],
150
+ needsTools: boolean
151
+ ): string[] {
152
+ if (!needsTools) {
153
+ return models;
154
+ }
155
+
156
+ const compatible = models.filter(
157
+ (m) => !isDeepseekModel(m) || deepseekSupportsTools(m)
158
+ );
159
+
160
+ if (compatible.length > 0) {
161
+ return compatible;
162
+ }
163
+
164
+ if (isDeepseekModel(primary)) {
165
+ return ["deepseek/deepseek-chat"];
166
+ }
167
+
168
+ return models;
169
+ }
170
+
171
+ /**
172
+ * Apply Anthropic ephemeral cache markers to messages and tools.
173
+ *
174
+ * Anthropic charges full input tokens for repeated identical prefixes.
175
+ * Adding `cache_control: {"type": "ephemeral"}` to system prompt and tools
176
+ * enables 5-minute KV cache, reducing input cost ~80% on subsequent turns.
177
+ */
178
+ function _applyAnthropicCacheControl(
179
+ model: string,
180
+ messages: Record<string, unknown>[],
181
+ toolSchemas: Record<string, unknown>[] | null
182
+ ): [Record<string, unknown>[], Record<string, unknown>[] | null] {
183
+ if (!isAnthropicModel(model)) {
184
+ return [messages, toolSchemas];
185
+ }
186
+
187
+ // Process messages to add cache_control to system message
188
+ const newMessages: Record<string, unknown>[] = [];
189
+ let cachedSystem = false;
190
+
191
+ for (const msg of messages) {
192
+ if (
193
+ !cachedSystem &&
194
+ msg.role === "system" &&
195
+ typeof msg.content === "string"
196
+ ) {
197
+ const content = msg.content as string;
198
+ if (content) {
199
+ newMessages.push({
200
+ role: "system",
201
+ content: [
202
+ {
203
+ type: "text",
204
+ text: content,
205
+ cache_control: { type: "ephemeral" },
206
+ },
207
+ ],
208
+ });
209
+ cachedSystem = true;
210
+ continue;
211
+ }
212
+ }
213
+
214
+ if (
215
+ !cachedSystem &&
216
+ msg.role === "system" &&
217
+ Array.isArray(msg.content)
218
+ ) {
219
+ const content = msg.content as Record<string, unknown>[];
220
+ if (content.length > 0) {
221
+ const newBlocks = content.map((block) => ({ ...block }));
222
+ const lastBlock = newBlocks[newBlocks.length - 1];
223
+ newBlocks[newBlocks.length - 1] = {
224
+ ...lastBlock,
225
+ cache_control: { type: "ephemeral" },
226
+ };
227
+ newMessages.push({
228
+ ...msg,
229
+ content: newBlocks,
230
+ });
231
+ cachedSystem = true;
232
+ continue;
233
+ }
234
+ }
235
+
236
+ newMessages.push(msg);
237
+ }
238
+
239
+ // Add cache_control to tool schemas
240
+ let newTools: Record<string, unknown>[] | null = null;
241
+ if (toolSchemas && toolSchemas.length > 0) {
242
+ newTools = toolSchemas.map((t) => ({ ...t }));
243
+ const lastTool = newTools[newTools.length - 1];
244
+ newTools[newTools.length - 1] = {
245
+ ...lastTool,
246
+ cache_control: { type: "ephemeral" },
247
+ };
248
+ }
249
+
250
+ return [newMessages, newTools];
251
+ }
252
+
253
+ /**
254
+ * Estimate token count for mixed CJK/English text.
255
+ * CJK characters ~2 tokens each, non-CJK ~4 chars per token.
256
+ */
257
+ function _estimateTokens(text: string): number {
258
+ // Count CJK characters (simplified check)
259
+ const cjkRegex = /[\u4E00-\u9FFF\u3040-\u309F\uAC00-\uD7AF]/g;
260
+ const cjkCount = (text.match(cjkRegex) || []).length;
261
+ const otherCount = text.length - cjkCount;
262
+ return Math.max(1, cjkCount * 2 + Math.floor(otherCount / 4));
263
+ }
264
+
265
+ /**
266
+ * Cost per 1K tokens (input / output) — USD.
267
+ */
268
+ const MODEL_COST_ESTIMATES: Map<string, [number, number]> = new Map([
269
+ ["gpt-4o", [0.0025, 0.01]],
270
+ ["gpt-4o-mini", [0.00015, 0.0006]],
271
+ ["gpt-4.1", [0.002, 0.008]],
272
+ ["gpt-4.1-mini", [0.0004, 0.0016]],
273
+ ["gpt-4.1-nano", [0.0001, 0.0004]],
274
+ ["o3", [0.01, 0.04]],
275
+ ["o4-mini", [0.0011, 0.0044]],
276
+ ["claude-sonnet-4-6", [0.003, 0.015]],
277
+ ["claude-opus-4-7", [0.005, 0.025]],
278
+ ["claude-haiku-4-5", [0.0008, 0.004]],
279
+ ["deepseek-chat", [0.00027, 0.0011]],
280
+ ["deepseek-reasoner", [0.00055, 0.00219]],
281
+ ["deepseek-v4-flash", [0.00014, 0.00028]],
282
+ ["deepseek-v4-pro", [0.00174, 0.00348]],
283
+ ["deepseek/deepseek-chat", [0.00027, 0.0011]],
284
+ ["deepseek/deepseek-reasoner", [0.00055, 0.00219]],
285
+ ["deepseek/deepseek-v4-flash", [0.00014, 0.00028]],
286
+ ["deepseek/deepseek-v4-pro", [0.00174, 0.00348]],
287
+ ["gemini/gemini-2.5-flash", [0.0003, 0.0025]],
288
+ ["gemini/gemini-2.5-pro", [0.00125, 0.01]],
289
+ ["ollama/llama3", [0.0, 0.0]],
290
+ ["ollama/qwen2.5", [0.0, 0.0]],
291
+ ]);
292
+
293
+ /**
294
+ * Fallback chains for model availability.
295
+ */
296
+ const FALLBACK_CHAINS: Map<string, string[]> = new Map([
297
+ ["gpt-4o", ["gpt-4o-mini"]],
298
+ ["gpt-4o-mini", ["gpt-4o"]],
299
+ ["gpt-4.1", ["gpt-4.1-mini", "gpt-4o-mini"]],
300
+ ["gpt-4.1-mini", ["gpt-4o-mini"]],
301
+ ["gpt-4.1-nano", ["gpt-4.1-mini"]],
302
+ ["o3", ["o4-mini", "gpt-4.1"]],
303
+ ["o4-mini", ["gpt-4.1-mini"]],
304
+ ["claude-sonnet-4-6", ["claude-haiku-4-5", "gpt-4.1-mini"]],
305
+ ["claude-opus-4-7", ["claude-sonnet-4-6", "gpt-4.1"]],
306
+ ["claude-haiku-4-5", ["gpt-4.1-mini"]],
307
+ ["deepseek-chat", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
308
+ ["deepseek-reasoner", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
309
+ ["deepseek-v4-flash", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
310
+ ["deepseek-v4-pro", ["deepseek-v4-flash", "deepseek/deepseek-chat", "gpt-4.1-mini"]],
311
+ ["deepseek/deepseek-chat", ["gpt-4.1-mini"]],
312
+ ["deepseek/deepseek-reasoner", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
313
+ ["deepseek/deepseek-v4-flash", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
314
+ ["deepseek/deepseek-v4-pro", [
315
+ "deepseek/deepseek-v4-flash",
316
+ "deepseek/deepseek-chat",
317
+ "gpt-4.1-mini",
318
+ ]],
319
+ ["gemini/gemini-2.5-flash", ["gemini/gemini-2.5-pro", "gpt-4.1-mini"]],
320
+ ["gemini/gemini-2.5-pro", ["gpt-4.1"]],
321
+ ]);
322
+
323
+ /**
324
+ * HTTP status codes that are considered transient errors (worth retrying).
325
+ */
326
+ const RETRYABLE_STATUSES = new Set([408, 425, 429, 500, 502, 503, 504]);
327
+
328
+ /**
329
+ * Check if an exception is worth retrying.
330
+ */
331
+ function isTransientError(err: unknown): boolean {
332
+ if (err instanceof Error) {
333
+ const status =
334
+ (err as any).status_code || (err as any).http_status || 0;
335
+ if (status && RETRYABLE_STATUSES.has(status)) {
336
+ return true;
337
+ }
338
+
339
+ if (err.name === "TimeoutError") {
340
+ return true;
341
+ }
342
+
343
+ const errName = err.constructor.name.toLowerCase();
344
+ return [
345
+ "ratelimiterror",
346
+ "apitimeouterror",
347
+ "apiconnectionerror",
348
+ "serviceunavailableerror",
349
+ "internalservererror",
350
+ "timeout",
351
+ ].includes(errName);
352
+ }
353
+
354
+ return false;
355
+ }
356
+
357
+ /**
358
+ * Estimate cost for LLM API call.
359
+ */
360
+ export function estimateCost(
361
+ model: string,
362
+ promptTokens: number,
363
+ completionTokens: number
364
+ ): number {
365
+ const costs = MODEL_COST_ESTIMATES.get(model) || [0.001, 0.002];
366
+ return (
367
+ (promptTokens / 1000) * costs[0] + (completionTokens / 1000) * costs[1]
368
+ );
369
+ }
370
+
371
+ /**
372
+ * Format user-facing error message for LLM failures.
373
+ */
374
+ function formatUserFacingError(model: string, err: unknown): string {
375
+ const text = err instanceof Error ? err.message : String(err);
376
+ const lowered = text.toLowerCase();
377
+ const [provider] = splitProvider(model);
378
+
379
+ // Missing API key
380
+ if (
381
+ lowered.includes("api_key") ||
382
+ lowered.includes("authentication") ||
383
+ lowered.includes("unauthorized")
384
+ ) {
385
+ const envMap = getProviderEnvMap();
386
+ const envVar = envMap.get(provider || "") || "the appropriate *_API_KEY";
387
+ const configured = Array.from(envMap.entries())
388
+ .filter(([, e]) => process.env[e])
389
+ .map(([p]) => p)
390
+ .join(", ");
391
+ const hint = configured
392
+ ? `已配置: ${configured}。`
393
+ : "未配置任何 API key。";
394
+ return (
395
+ `❌ ${model} 调用失败:缺少或无效的 API key。\n` +
396
+ `请确认 \`${envVar}\` 已设置,或运行 \`sky init\` 重新配置。${hint}`
397
+ );
398
+ }
399
+
400
+ if (lowered.includes("rate limit") || text.includes("429")) {
401
+ return `❌ ${model} 速率受限,请稍后重试。`;
402
+ }
403
+
404
+ if (lowered.includes("timeout")) {
405
+ return `❌ ${model} 请求超时,请稍后重试或调高超时时间。`;
406
+ }
407
+
408
+ if (
409
+ lowered.includes("model") &&
410
+ (lowered.includes("not found") || lowered.includes("does not exist"))
411
+ ) {
412
+ return (
413
+ `❌ ${model} 不是该 provider 的有效模型 ID。\n` +
414
+ `请运行配置检查或 \`sky init\` 重新选择。`
415
+ );
416
+ }
417
+
418
+ // Content filtering / safety
419
+ if (
420
+ [
421
+ "content exists risk",
422
+ "content_policy",
423
+ "content_filter",
424
+ "content_filtered",
425
+ "safety",
426
+ "blocked by safety",
427
+ "responsibleaipolicyviolation",
428
+ "policy_violation",
429
+ ].some((kw) => lowered.includes(kw))
430
+ ) {
431
+ const short = text.split("\n")[0].slice(0, 200);
432
+ return (
433
+ `❌ ${model} 拒绝该请求 (内容审核):${short}\n` +
434
+ `原因:provider 的内容安全过滤判定此次提问/上下文敏感。\n` +
435
+ `建议:\n` +
436
+ ` - 换一个 provider(如 OpenAI / Anthropic)\n` +
437
+ ` - 把敏感关键词改写得更通用后重发`
438
+ );
439
+ }
440
+
441
+ // Bad request / malformed sequence
442
+ if (
443
+ [
444
+ "bad request",
445
+ "invalid_request",
446
+ "tool_calls",
447
+ "tool messages",
448
+ ].some((kw) => lowered.includes(kw)) ||
449
+ err instanceof Error && err.constructor.name.toLowerCase().includes("badrequest")
450
+ ) {
451
+ const short = text.split("\n")[0].slice(0, 200);
452
+ return (
453
+ `❌ ${model} 调用失败 (Bad Request):${short}\n` +
454
+ `会话消息序列可能损坏,请清理后重试。`
455
+ );
456
+ }
457
+
458
+ const short = text.split("\n")[0].slice(0, 200) || (err instanceof Error ? err.name : "Unknown error");
459
+ return `❌ ${model} 调用失败:${short}`;
460
+ }
461
+
462
+ /**
463
+ * Unified LLM client with retry, fallback chains, caching, cost tracking, and budget control.
464
+ */
465
+ export class LLMClient {
466
+ private config: any;
467
+ private _toolRegistry: ToolRegistry;
468
+ private _cache: LLMCache;
469
+ private usageStats: Map<string, Record<string, number>> = new Map();
470
+ private totalCost: number = 0;
471
+ private costLimit: number | null;
472
+ private log: Logger | null = null;
473
+
474
+ constructor(
475
+ config: any,
476
+ toolRegistry: ToolRegistry,
477
+ costLimit: number | null = null
478
+ ) {
479
+ this.config = config;
480
+ this._toolRegistry = toolRegistry;
481
+ this._cache = new LLMCache(256, 120);
482
+ this.costLimit = costLimit;
483
+ }
484
+
485
+ /**
486
+ * Set logger instance for event tracking.
487
+ */
488
+ setLogger(log: Logger): void {
489
+ this.log = log;
490
+ }
491
+
492
+ /**
493
+ * Get model for a specific agent or default.
494
+ */
495
+ private getModel(agentName?: string): string {
496
+ if (agentName) {
497
+ const agentCfg = (this.config.agents as any)?.[agentName];
498
+ if (agentCfg?.model) {
499
+ return String(agentCfg.model);
500
+ }
501
+ }
502
+ return this.config.llm?.defaultModel || "gpt-4o";
503
+ }
504
+
505
+ /**
506
+ * Get max retries from config.
507
+ */
508
+ private _getRetries(): number {
509
+ return (this.config.llm as any)?.maxRetries ?? 2;
510
+ }
511
+
512
+ /**
513
+ * Track token usage and cost.
514
+ */
515
+ private trackUsage(
516
+ agentName: string | undefined,
517
+ model: string,
518
+ promptTokens: number,
519
+ completionTokens: number
520
+ ): void {
521
+ const key = agentName || "default";
522
+ if (!this.usageStats.has(key)) {
523
+ this.usageStats.set(key, {
524
+ prompt_tokens: 0,
525
+ completion_tokens: 0,
526
+ calls: 0,
527
+ cost: 0,
528
+ });
529
+ }
530
+
531
+ const stats = this.usageStats.get(key)!;
532
+ stats.prompt_tokens += promptTokens;
533
+ stats.completion_tokens += completionTokens;
534
+ stats.calls += 1;
535
+
536
+ const cost = estimateCost(model, promptTokens, completionTokens);
537
+ stats.cost += cost;
538
+ this.totalCost += cost;
539
+ }
540
+
541
+ /**
542
+ * Check if cost limit exceeded.
543
+ */
544
+ private checkBudget(): void {
545
+ if (this.costLimit !== null && this.totalCost >= this.costLimit) {
546
+ throw new Error(
547
+ `Cost limit exceeded: $${this.totalCost.toFixed(4)} >= $${this.costLimit.toFixed(4)}`
548
+ );
549
+ }
550
+ }
551
+
552
+ /**
553
+ * Check if API key is available for model.
554
+ */
555
+ private hasKeyForModel(model: string): boolean {
556
+ let [provider] = splitProvider(model);
557
+
558
+ if (!provider) {
559
+ const lowered = model.toLowerCase();
560
+ for (const p of getKnownProviders()) {
561
+ if (lowered.includes(p)) {
562
+ provider = p;
563
+ break;
564
+ }
565
+ }
566
+ }
567
+
568
+ if (!provider) {
569
+ return true; // Can't determine; don't skip
570
+ }
571
+
572
+ const envMap = getProviderEnvMap();
573
+ const envVar =
574
+ envMap.get(provider) || `${provider.toUpperCase()}_API_KEY`;
575
+ return !!process.env[envVar];
576
+ }
577
+
578
+ /**
579
+ * Get usage statistics.
580
+ */
581
+ getUsageStats(): Map<string, Record<string, number>> {
582
+ return new Map(this.usageStats);
583
+ }
584
+
585
+ /**
586
+ * Get total cost.
587
+ */
588
+ getTotalCost(): number {
589
+ return this.totalCost;
590
+ }
591
+
592
+ /**
593
+ * Reset usage statistics and cost.
594
+ */
595
+ resetUsageStats(): void {
596
+ this.usageStats.clear();
597
+ this.totalCost = 0;
598
+ }
599
+
600
+ /**
601
+ * Complete a prompt (dummy implementation).
602
+ *
603
+ * Note: Full implementation requires integrating with an actual LLM API provider.
604
+ * This is a placeholder that shows the structure and interface.
605
+ */
606
+ async complete(
607
+ messages: Record<string, unknown>[],
608
+ agentName?: string,
609
+ tools?: string[],
610
+ stream: boolean = false,
611
+ overrides?: Record<string, unknown>
612
+ ): Promise<LLMResponse> {
613
+ this.checkBudget();
614
+
615
+ const ov = overrides || {};
616
+ const rawModel = ov.model;
617
+ const model: string =
618
+ typeof rawModel === "string" ? rawModel : this.getModel(agentName);
619
+
620
+ // Build fallback chain
621
+ const fallbackModels =
622
+ FALLBACK_CHAINS.get(model)?.filter((m) => this.hasKeyForModel(m)) || [];
623
+ const modelsToTry = toolCompatibleModels(
624
+ model,
625
+ [model, ...fallbackModels],
626
+ !!tools
627
+ );
628
+
629
+ // Try each model in sequence
630
+ let lastError: Error | null = null;
631
+ for (const attemptModel of modelsToTry) {
632
+ try {
633
+ this.checkBudget();
634
+ return await this.completeWithRetry(
635
+ attemptModel,
636
+ messages,
637
+ agentName,
638
+ tools,
639
+ stream,
640
+ overrides
641
+ );
642
+ } catch (e) {
643
+ lastError = e instanceof Error ? e : new Error(String(e));
644
+ this.log?.warn("llm_fallback", {
645
+ model: attemptModel,
646
+ agent: agentName,
647
+ error: lastError.message,
648
+ });
649
+ continue;
650
+ }
651
+ }
652
+
653
+ // All models failed
654
+ return {
655
+ content: formatUserFacingError(model, lastError),
656
+ toolCalls: [],
657
+ model,
658
+ usage: { promptTokens: 0, completionTokens: 0 },
659
+ cost: 0,
660
+ truncated: false,
661
+ };
662
+ }
663
+
664
+ /**
665
+ * Complete with retry logic (placeholder).
666
+ */
667
+ private async completeWithRetry(
668
+ model: string,
669
+ _messages: Record<string, unknown>[],
670
+ _agentName?: string,
671
+ _tools?: string[],
672
+ _stream: boolean = false,
673
+ overrides?: Record<string, unknown>
674
+ ): Promise<LLMResponse> {
675
+ // This is a placeholder. Real implementation would:
676
+ // 1. Validate cache
677
+ // 2. Call actual LLM API (OpenAI, Anthropic, etc.)
678
+ // 3. Apply Anthropic cache control if needed
679
+ // 4. Handle retry logic with exponential backoff
680
+ // 5. Track usage and cost
681
+ // 6. Cache results if appropriate
682
+
683
+ const _temperature = (overrides?.temperature as number) ?? 0.7;
684
+ const _maxTokens = (overrides?.maxTokens as number) ?? 2000;
685
+
686
+ // For now, return a dummy response
687
+ return {
688
+ content: "Placeholder response from LLM",
689
+ toolCalls: [],
690
+ model,
691
+ usage: { promptTokens: 100, completionTokens: 50 },
692
+ cost: estimateCost(model, 100, 50),
693
+ truncated: false,
694
+ };
695
+ }
696
+
697
+ /**
698
+ * Stream a completion (placeholder).
699
+ */
700
+ async *stream(
701
+ _messages: Record<string, unknown>[],
702
+ _agentName?: string
703
+ ): AsyncGenerator<string> {
704
+ // Placeholder implementation
705
+ yield "Streaming response...";
706
+ }
707
+
708
+ /**
709
+ * Stream completion with tool awareness (placeholder).
710
+ */
711
+ async *streamWithTools(
712
+ _messages: Record<string, unknown>[],
713
+ _agentName?: string,
714
+ _tools?: string[],
715
+ _toolRegistry?: ToolRegistry,
716
+ _overrides?: Record<string, unknown>
717
+ ): AsyncGenerator<StreamEvent> {
718
+ // Placeholder implementation
719
+ yield {
720
+ type: "content",
721
+ text: "Tool-aware streaming response...",
722
+ };
723
+ }
724
+ }