skyloom 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +36 -0
- package/CONVERSION_PLAN.md +191 -0
- package/README.md +67 -0
- package/dist/agents/dew.d.ts +15 -0
- package/dist/agents/dew.d.ts.map +1 -0
- package/dist/agents/dew.js +74 -0
- package/dist/agents/dew.js.map +1 -0
- package/dist/agents/fair.d.ts +15 -0
- package/dist/agents/fair.d.ts.map +1 -0
- package/dist/agents/fair.js +106 -0
- package/dist/agents/fair.js.map +1 -0
- package/dist/agents/fog.d.ts +15 -0
- package/dist/agents/fog.d.ts.map +1 -0
- package/dist/agents/fog.js +52 -0
- package/dist/agents/fog.js.map +1 -0
- package/dist/agents/frost.d.ts +15 -0
- package/dist/agents/frost.d.ts.map +1 -0
- package/dist/agents/frost.js +54 -0
- package/dist/agents/frost.js.map +1 -0
- package/dist/agents/rain.d.ts +15 -0
- package/dist/agents/rain.d.ts.map +1 -0
- package/dist/agents/rain.js +54 -0
- package/dist/agents/rain.js.map +1 -0
- package/dist/agents/snow.d.ts +27 -0
- package/dist/agents/snow.d.ts.map +1 -0
- package/dist/agents/snow.js +226 -0
- package/dist/agents/snow.js.map +1 -0
- package/dist/cli/main.d.ts +7 -0
- package/dist/cli/main.d.ts.map +1 -0
- package/dist/cli/main.js +402 -0
- package/dist/cli/main.js.map +1 -0
- package/dist/cli/mode.d.ts +17 -0
- package/dist/cli/mode.d.ts.map +1 -0
- package/dist/cli/mode.js +56 -0
- package/dist/cli/mode.js.map +1 -0
- package/dist/core/agent.d.ts +174 -0
- package/dist/core/agent.d.ts.map +1 -0
- package/dist/core/agent.js +1332 -0
- package/dist/core/agent.js.map +1 -0
- package/dist/core/agent_helpers.d.ts +51 -0
- package/dist/core/agent_helpers.d.ts.map +1 -0
- package/dist/core/agent_helpers.js +477 -0
- package/dist/core/agent_helpers.js.map +1 -0
- package/dist/core/bus.d.ts +99 -0
- package/dist/core/bus.d.ts.map +1 -0
- package/dist/core/bus.js +191 -0
- package/dist/core/bus.js.map +1 -0
- package/dist/core/cache.d.ts +63 -0
- package/dist/core/cache.d.ts.map +1 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cache.js.map +1 -0
- package/dist/core/checkpoint.d.ts +19 -0
- package/dist/core/checkpoint.d.ts.map +1 -0
- package/dist/core/checkpoint.js +120 -0
- package/dist/core/checkpoint.js.map +1 -0
- package/dist/core/circuit_breaker.d.ts +46 -0
- package/dist/core/circuit_breaker.d.ts.map +1 -0
- package/dist/core/circuit_breaker.js +99 -0
- package/dist/core/circuit_breaker.js.map +1 -0
- package/dist/core/config.d.ts +97 -0
- package/dist/core/config.d.ts.map +1 -0
- package/dist/core/config.js +281 -0
- package/dist/core/config.js.map +1 -0
- package/dist/core/constants.d.ts +78 -0
- package/dist/core/constants.d.ts.map +1 -0
- package/dist/core/constants.js +84 -0
- package/dist/core/constants.js.map +1 -0
- package/dist/core/factory.d.ts +63 -0
- package/dist/core/factory.d.ts.map +1 -0
- package/dist/core/factory.js +537 -0
- package/dist/core/factory.js.map +1 -0
- package/dist/core/icons.d.ts +28 -0
- package/dist/core/icons.d.ts.map +1 -0
- package/dist/core/icons.js +86 -0
- package/dist/core/icons.js.map +1 -0
- package/dist/core/index.d.ts +29 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +54 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/llm.d.ts +121 -0
- package/dist/core/llm.d.ts.map +1 -0
- package/dist/core/llm.js +532 -0
- package/dist/core/llm.js.map +1 -0
- package/dist/core/logger.d.ts +57 -0
- package/dist/core/logger.d.ts.map +1 -0
- package/dist/core/logger.js +122 -0
- package/dist/core/logger.js.map +1 -0
- package/dist/core/mcp.d.ts +190 -0
- package/dist/core/mcp.d.ts.map +1 -0
- package/dist/core/mcp.js +822 -0
- package/dist/core/mcp.js.map +1 -0
- package/dist/core/mcp_server.d.ts +26 -0
- package/dist/core/mcp_server.d.ts.map +1 -0
- package/dist/core/mcp_server.js +211 -0
- package/dist/core/mcp_server.js.map +1 -0
- package/dist/core/memory.d.ts +190 -0
- package/dist/core/memory.d.ts.map +1 -0
- package/dist/core/memory.js +988 -0
- package/dist/core/memory.js.map +1 -0
- package/dist/core/middleware.d.ts +114 -0
- package/dist/core/middleware.d.ts.map +1 -0
- package/dist/core/middleware.js +248 -0
- package/dist/core/middleware.js.map +1 -0
- package/dist/core/pipelines.d.ts +87 -0
- package/dist/core/pipelines.d.ts.map +1 -0
- package/dist/core/pipelines.js +301 -0
- package/dist/core/pipelines.js.map +1 -0
- package/dist/core/profile.d.ts +23 -0
- package/dist/core/profile.d.ts.map +1 -0
- package/dist/core/profile.js +289 -0
- package/dist/core/profile.js.map +1 -0
- package/dist/core/router.d.ts +24 -0
- package/dist/core/router.d.ts.map +1 -0
- package/dist/core/router.js +111 -0
- package/dist/core/router.js.map +1 -0
- package/dist/core/schemas.d.ts +82 -0
- package/dist/core/schemas.d.ts.map +1 -0
- package/dist/core/schemas.js +200 -0
- package/dist/core/schemas.js.map +1 -0
- package/dist/core/semantic.d.ts +92 -0
- package/dist/core/semantic.d.ts.map +1 -0
- package/dist/core/semantic.js +175 -0
- package/dist/core/semantic.js.map +1 -0
- package/dist/core/skill.d.ts +68 -0
- package/dist/core/skill.d.ts.map +1 -0
- package/dist/core/skill.js +350 -0
- package/dist/core/skill.js.map +1 -0
- package/dist/core/tool.d.ts +99 -0
- package/dist/core/tool.d.ts.map +1 -0
- package/dist/core/tool.js +341 -0
- package/dist/core/tool.js.map +1 -0
- package/dist/core/tool_router.d.ts +29 -0
- package/dist/core/tool_router.d.ts.map +1 -0
- package/dist/core/tool_router.js +172 -0
- package/dist/core/tool_router.js.map +1 -0
- package/dist/core/workspace.d.ts +48 -0
- package/dist/core/workspace.d.ts.map +1 -0
- package/dist/core/workspace.js +179 -0
- package/dist/core/workspace.js.map +1 -0
- package/dist/plugins/loader.d.ts +17 -0
- package/dist/plugins/loader.d.ts.map +1 -0
- package/dist/plugins/loader.js +96 -0
- package/dist/plugins/loader.js.map +1 -0
- package/dist/skills/loader.d.ts +9 -0
- package/dist/skills/loader.d.ts.map +1 -0
- package/dist/skills/loader.js +78 -0
- package/dist/skills/loader.js.map +1 -0
- package/dist/tools/builtin.d.ts +10 -0
- package/dist/tools/builtin.d.ts.map +1 -0
- package/dist/tools/builtin.js +414 -0
- package/dist/tools/builtin.js.map +1 -0
- package/dist/tools/computer.d.ts +12 -0
- package/dist/tools/computer.d.ts.map +1 -0
- package/dist/tools/computer.js +326 -0
- package/dist/tools/computer.js.map +1 -0
- package/dist/tools/delegate.d.ts +10 -0
- package/dist/tools/delegate.d.ts.map +1 -0
- package/dist/tools/delegate.js +45 -0
- package/dist/tools/delegate.js.map +1 -0
- package/dist/web/server.d.ts +5 -0
- package/dist/web/server.d.ts.map +1 -0
- package/dist/web/server.js +647 -0
- package/dist/web/server.js.map +1 -0
- package/dist/web/tts.d.ts +33 -0
- package/dist/web/tts.d.ts.map +1 -0
- package/dist/web/tts.js +69 -0
- package/dist/web/tts.js.map +1 -0
- package/package.json +60 -0
- package/scripts/install.js +48 -0
- package/scripts/link.js +10 -0
- package/setup.bat +79 -0
- package/skill-test-ty2fOA/test.md +10 -0
- package/src/agents/dew.ts +70 -0
- package/src/agents/fair.ts +102 -0
- package/src/agents/fog.ts +48 -0
- package/src/agents/frost.ts +50 -0
- package/src/agents/rain.ts +50 -0
- package/src/agents/snow.ts +239 -0
- package/src/cli/main.ts +405 -0
- package/src/cli/mode.ts +58 -0
- package/src/core/agent.ts +1506 -0
- package/src/core/agent_helpers.ts +461 -0
- package/src/core/bus.ts +221 -0
- package/src/core/cache.ts +153 -0
- package/src/core/checkpoint.ts +94 -0
- package/src/core/circuit_breaker.ts +119 -0
- package/src/core/config.ts +341 -0
- package/src/core/constants.ts +95 -0
- package/src/core/factory.ts +627 -0
- package/src/core/icons.ts +53 -0
- package/src/core/index.ts +31 -0
- package/src/core/llm.ts +724 -0
- package/src/core/logger.ts +144 -0
- package/src/core/mcp.ts +953 -0
- package/src/core/mcp_server.ts +176 -0
- package/src/core/memory.ts +1169 -0
- package/src/core/middleware.ts +350 -0
- package/src/core/pipelines.ts +424 -0
- package/src/core/profile.ts +255 -0
- package/src/core/router.ts +124 -0
- package/src/core/schemas.ts +282 -0
- package/src/core/semantic.ts +211 -0
- package/src/core/skill.ts +342 -0
- package/src/core/tool.ts +427 -0
- package/src/core/tool_router.ts +193 -0
- package/src/core/workspace.ts +150 -0
- package/src/plugins/loader.ts +66 -0
- package/src/skills/loader.ts +46 -0
- package/src/sql.js.d.ts +29 -0
- package/src/tools/builtin.ts +382 -0
- package/src/tools/computer.ts +269 -0
- package/src/tools/delegate.ts +49 -0
- package/src/web/server.ts +634 -0
- package/src/web/tts.ts +93 -0
- package/tests/bus.test.ts +121 -0
- package/tests/icons.test.ts +45 -0
- package/tests/router.test.ts +86 -0
- package/tests/schemas.test.ts +51 -0
- package/tests/semantic.test.ts +83 -0
- package/tests/setup.ts +10 -0
- package/tests/skill.test.ts +172 -0
- package/tests/tool.test.ts +108 -0
- package/tests/tool_router.test.ts +71 -0
- package/tsconfig.json +37 -0
- package/vitest.config.ts +17 -0
package/src/core/llm.ts
ADDED
|
@@ -0,0 +1,724 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM abstraction layer with LiteLLM-compatible routing, retry, fallback, cost tracking, and budget control.
|
|
3
|
+
*
|
|
4
|
+
* Provides unified interface for multiple LLM providers (OpenAI, Anthropic, DeepSeek, etc.)
|
|
5
|
+
* with automatic fallback chains, prompt caching for Anthropic, and cost estimation.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Logger } from "./logger";
|
|
9
|
+
import { LLMCache } from "./cache";
|
|
10
|
+
import type { ToolRegistry } from "./tool";
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* LLM response from completion.
|
|
14
|
+
*/
|
|
15
|
+
export interface LLMResponse {
|
|
16
|
+
content: string;
|
|
17
|
+
toolCalls: ToolCall[];
|
|
18
|
+
model: string;
|
|
19
|
+
usage: UsageStats;
|
|
20
|
+
cost: number;
|
|
21
|
+
reasoningContent?: string;
|
|
22
|
+
// True when LLM loop ran out of iterations before producing a tool-call-free answer
|
|
23
|
+
truncated: boolean;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Tool call extracted from LLM response.
|
|
28
|
+
*/
|
|
29
|
+
export interface ToolCall {
|
|
30
|
+
id: string;
|
|
31
|
+
type: string;
|
|
32
|
+
function: {
|
|
33
|
+
name: string;
|
|
34
|
+
arguments: string;
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Token usage statistics.
|
|
40
|
+
*/
|
|
41
|
+
export interface UsageStats {
|
|
42
|
+
promptTokens: number;
|
|
43
|
+
completionTokens: number;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Streaming event from LLM.
|
|
48
|
+
*/
|
|
49
|
+
export interface StreamEvent {
|
|
50
|
+
type: "content" | "tool_call" | "done" | "error" | "reasoning";
|
|
51
|
+
text?: string;
|
|
52
|
+
toolCall?: ToolCall;
|
|
53
|
+
usage?: UsageStats;
|
|
54
|
+
reasoningContent?: string;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Split model string into provider and model name (e.g., "anthropic/claude-3-opus" → ["anthropic", "claude-3-opus"]).
|
|
59
|
+
*/
|
|
60
|
+
function splitProvider(model: string): [string | null, string] {
|
|
61
|
+
if (!model.includes("/")) {
|
|
62
|
+
return [null, model];
|
|
63
|
+
}
|
|
64
|
+
const [head, ...rest] = model.split("/");
|
|
65
|
+
const provider = head.toLowerCase();
|
|
66
|
+
const knownProviders = getKnownProviders();
|
|
67
|
+
if (knownProviders.has(provider)) {
|
|
68
|
+
return [provider, rest.join("/")];
|
|
69
|
+
}
|
|
70
|
+
return [null, model];
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Get set of known provider ID prefixes.
|
|
75
|
+
*/
|
|
76
|
+
function getKnownProviders(): Set<string> {
|
|
77
|
+
return new Set([
|
|
78
|
+
"openai",
|
|
79
|
+
"azure",
|
|
80
|
+
"anthropic",
|
|
81
|
+
"deepseek",
|
|
82
|
+
"ollama",
|
|
83
|
+
"groq",
|
|
84
|
+
"mistral",
|
|
85
|
+
"cohere",
|
|
86
|
+
"together_ai",
|
|
87
|
+
"openrouter",
|
|
88
|
+
"gemini",
|
|
89
|
+
"vertex_ai",
|
|
90
|
+
]);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Get provider-to-env-var mapping.
|
|
95
|
+
*/
|
|
96
|
+
function getProviderEnvMap(): Map<string, string> {
|
|
97
|
+
const envMap = new Map([
|
|
98
|
+
["openai", "OPENAI_API_KEY"],
|
|
99
|
+
["anthropic", "ANTHROPIC_API_KEY"],
|
|
100
|
+
["deepseek", "DEEPSEEK_API_KEY"],
|
|
101
|
+
["groq", "GROQ_API_KEY"],
|
|
102
|
+
["mistral", "MISTRAL_API_KEY"],
|
|
103
|
+
["cohere", "COHERE_API_KEY"],
|
|
104
|
+
["openrouter", "OPENROUTER_API_KEY"],
|
|
105
|
+
["gemini", "GEMINI_API_KEY"],
|
|
106
|
+
]);
|
|
107
|
+
return envMap;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Check if model targets Anthropic's API.
|
|
112
|
+
*/
|
|
113
|
+
function isAnthropicModel(model: string): boolean {
|
|
114
|
+
const lowered = model.toLowerCase();
|
|
115
|
+
if (lowered.startsWith("anthropic/") || lowered.startsWith("claude")) {
|
|
116
|
+
return true;
|
|
117
|
+
}
|
|
118
|
+
const [provider] = splitProvider(model);
|
|
119
|
+
return provider === "anthropic";
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Check if model targets DeepSeek's API.
|
|
124
|
+
*/
|
|
125
|
+
function isDeepseekModel(model: string): boolean {
|
|
126
|
+
const [provider, stripped] = splitProvider(model);
|
|
127
|
+
const lowered = model.toLowerCase();
|
|
128
|
+
return (
|
|
129
|
+
provider === "deepseek" ||
|
|
130
|
+
lowered.startsWith("deepseek") ||
|
|
131
|
+
stripped.startsWith("deepseek")
|
|
132
|
+
);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Check if DeepSeek model supports tool calls.
|
|
137
|
+
* Reasoning models are not reliable function-call models.
|
|
138
|
+
*/
|
|
139
|
+
function deepseekSupportsTools(model: string): boolean {
|
|
140
|
+
const lowered = model.toLowerCase();
|
|
141
|
+
return !["reasoner", "-r1", "/r1"].some((part) => lowered.includes(part));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Filter models by tool compatibility.
|
|
146
|
+
*/
|
|
147
|
+
function toolCompatibleModels(
|
|
148
|
+
primary: string,
|
|
149
|
+
models: string[],
|
|
150
|
+
needsTools: boolean
|
|
151
|
+
): string[] {
|
|
152
|
+
if (!needsTools) {
|
|
153
|
+
return models;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const compatible = models.filter(
|
|
157
|
+
(m) => !isDeepseekModel(m) || deepseekSupportsTools(m)
|
|
158
|
+
);
|
|
159
|
+
|
|
160
|
+
if (compatible.length > 0) {
|
|
161
|
+
return compatible;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (isDeepseekModel(primary)) {
|
|
165
|
+
return ["deepseek/deepseek-chat"];
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return models;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Apply Anthropic ephemeral cache markers to messages and tools.
|
|
173
|
+
*
|
|
174
|
+
* Anthropic charges full input tokens for repeated identical prefixes.
|
|
175
|
+
* Adding `cache_control: {"type": "ephemeral"}` to system prompt and tools
|
|
176
|
+
* enables 5-minute KV cache, reducing input cost ~80% on subsequent turns.
|
|
177
|
+
*/
|
|
178
|
+
function _applyAnthropicCacheControl(
|
|
179
|
+
model: string,
|
|
180
|
+
messages: Record<string, unknown>[],
|
|
181
|
+
toolSchemas: Record<string, unknown>[] | null
|
|
182
|
+
): [Record<string, unknown>[], Record<string, unknown>[] | null] {
|
|
183
|
+
if (!isAnthropicModel(model)) {
|
|
184
|
+
return [messages, toolSchemas];
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Process messages to add cache_control to system message
|
|
188
|
+
const newMessages: Record<string, unknown>[] = [];
|
|
189
|
+
let cachedSystem = false;
|
|
190
|
+
|
|
191
|
+
for (const msg of messages) {
|
|
192
|
+
if (
|
|
193
|
+
!cachedSystem &&
|
|
194
|
+
msg.role === "system" &&
|
|
195
|
+
typeof msg.content === "string"
|
|
196
|
+
) {
|
|
197
|
+
const content = msg.content as string;
|
|
198
|
+
if (content) {
|
|
199
|
+
newMessages.push({
|
|
200
|
+
role: "system",
|
|
201
|
+
content: [
|
|
202
|
+
{
|
|
203
|
+
type: "text",
|
|
204
|
+
text: content,
|
|
205
|
+
cache_control: { type: "ephemeral" },
|
|
206
|
+
},
|
|
207
|
+
],
|
|
208
|
+
});
|
|
209
|
+
cachedSystem = true;
|
|
210
|
+
continue;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (
|
|
215
|
+
!cachedSystem &&
|
|
216
|
+
msg.role === "system" &&
|
|
217
|
+
Array.isArray(msg.content)
|
|
218
|
+
) {
|
|
219
|
+
const content = msg.content as Record<string, unknown>[];
|
|
220
|
+
if (content.length > 0) {
|
|
221
|
+
const newBlocks = content.map((block) => ({ ...block }));
|
|
222
|
+
const lastBlock = newBlocks[newBlocks.length - 1];
|
|
223
|
+
newBlocks[newBlocks.length - 1] = {
|
|
224
|
+
...lastBlock,
|
|
225
|
+
cache_control: { type: "ephemeral" },
|
|
226
|
+
};
|
|
227
|
+
newMessages.push({
|
|
228
|
+
...msg,
|
|
229
|
+
content: newBlocks,
|
|
230
|
+
});
|
|
231
|
+
cachedSystem = true;
|
|
232
|
+
continue;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
newMessages.push(msg);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Add cache_control to tool schemas
|
|
240
|
+
let newTools: Record<string, unknown>[] | null = null;
|
|
241
|
+
if (toolSchemas && toolSchemas.length > 0) {
|
|
242
|
+
newTools = toolSchemas.map((t) => ({ ...t }));
|
|
243
|
+
const lastTool = newTools[newTools.length - 1];
|
|
244
|
+
newTools[newTools.length - 1] = {
|
|
245
|
+
...lastTool,
|
|
246
|
+
cache_control: { type: "ephemeral" },
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return [newMessages, newTools];
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Estimate token count for mixed CJK/English text.
|
|
255
|
+
* CJK characters ~2 tokens each, non-CJK ~4 chars per token.
|
|
256
|
+
*/
|
|
257
|
+
function _estimateTokens(text: string): number {
|
|
258
|
+
// Count CJK characters (simplified check)
|
|
259
|
+
const cjkRegex = /[\u4E00-\u9FFF\u3040-\u309F\uAC00-\uD7AF]/g;
|
|
260
|
+
const cjkCount = (text.match(cjkRegex) || []).length;
|
|
261
|
+
const otherCount = text.length - cjkCount;
|
|
262
|
+
return Math.max(1, cjkCount * 2 + Math.floor(otherCount / 4));
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Cost per 1K tokens (input / output) — USD.
|
|
267
|
+
*/
|
|
268
|
+
const MODEL_COST_ESTIMATES: Map<string, [number, number]> = new Map([
|
|
269
|
+
["gpt-4o", [0.0025, 0.01]],
|
|
270
|
+
["gpt-4o-mini", [0.00015, 0.0006]],
|
|
271
|
+
["gpt-4.1", [0.002, 0.008]],
|
|
272
|
+
["gpt-4.1-mini", [0.0004, 0.0016]],
|
|
273
|
+
["gpt-4.1-nano", [0.0001, 0.0004]],
|
|
274
|
+
["o3", [0.01, 0.04]],
|
|
275
|
+
["o4-mini", [0.0011, 0.0044]],
|
|
276
|
+
["claude-sonnet-4-6", [0.003, 0.015]],
|
|
277
|
+
["claude-opus-4-7", [0.005, 0.025]],
|
|
278
|
+
["claude-haiku-4-5", [0.0008, 0.004]],
|
|
279
|
+
["deepseek-chat", [0.00027, 0.0011]],
|
|
280
|
+
["deepseek-reasoner", [0.00055, 0.00219]],
|
|
281
|
+
["deepseek-v4-flash", [0.00014, 0.00028]],
|
|
282
|
+
["deepseek-v4-pro", [0.00174, 0.00348]],
|
|
283
|
+
["deepseek/deepseek-chat", [0.00027, 0.0011]],
|
|
284
|
+
["deepseek/deepseek-reasoner", [0.00055, 0.00219]],
|
|
285
|
+
["deepseek/deepseek-v4-flash", [0.00014, 0.00028]],
|
|
286
|
+
["deepseek/deepseek-v4-pro", [0.00174, 0.00348]],
|
|
287
|
+
["gemini/gemini-2.5-flash", [0.0003, 0.0025]],
|
|
288
|
+
["gemini/gemini-2.5-pro", [0.00125, 0.01]],
|
|
289
|
+
["ollama/llama3", [0.0, 0.0]],
|
|
290
|
+
["ollama/qwen2.5", [0.0, 0.0]],
|
|
291
|
+
]);
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Fallback chains for model availability.
|
|
295
|
+
*/
|
|
296
|
+
const FALLBACK_CHAINS: Map<string, string[]> = new Map([
|
|
297
|
+
["gpt-4o", ["gpt-4o-mini"]],
|
|
298
|
+
["gpt-4o-mini", ["gpt-4o"]],
|
|
299
|
+
["gpt-4.1", ["gpt-4.1-mini", "gpt-4o-mini"]],
|
|
300
|
+
["gpt-4.1-mini", ["gpt-4o-mini"]],
|
|
301
|
+
["gpt-4.1-nano", ["gpt-4.1-mini"]],
|
|
302
|
+
["o3", ["o4-mini", "gpt-4.1"]],
|
|
303
|
+
["o4-mini", ["gpt-4.1-mini"]],
|
|
304
|
+
["claude-sonnet-4-6", ["claude-haiku-4-5", "gpt-4.1-mini"]],
|
|
305
|
+
["claude-opus-4-7", ["claude-sonnet-4-6", "gpt-4.1"]],
|
|
306
|
+
["claude-haiku-4-5", ["gpt-4.1-mini"]],
|
|
307
|
+
["deepseek-chat", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
|
|
308
|
+
["deepseek-reasoner", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
|
|
309
|
+
["deepseek-v4-flash", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
|
|
310
|
+
["deepseek-v4-pro", ["deepseek-v4-flash", "deepseek/deepseek-chat", "gpt-4.1-mini"]],
|
|
311
|
+
["deepseek/deepseek-chat", ["gpt-4.1-mini"]],
|
|
312
|
+
["deepseek/deepseek-reasoner", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
|
|
313
|
+
["deepseek/deepseek-v4-flash", ["deepseek/deepseek-chat", "gpt-4.1-mini"]],
|
|
314
|
+
["deepseek/deepseek-v4-pro", [
|
|
315
|
+
"deepseek/deepseek-v4-flash",
|
|
316
|
+
"deepseek/deepseek-chat",
|
|
317
|
+
"gpt-4.1-mini",
|
|
318
|
+
]],
|
|
319
|
+
["gemini/gemini-2.5-flash", ["gemini/gemini-2.5-pro", "gpt-4.1-mini"]],
|
|
320
|
+
["gemini/gemini-2.5-pro", ["gpt-4.1"]],
|
|
321
|
+
]);
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* HTTP status codes that are considered transient errors (worth retrying).
|
|
325
|
+
*/
|
|
326
|
+
const RETRYABLE_STATUSES = new Set([408, 425, 429, 500, 502, 503, 504]);
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Check if an exception is worth retrying.
|
|
330
|
+
*/
|
|
331
|
+
function isTransientError(err: unknown): boolean {
|
|
332
|
+
if (err instanceof Error) {
|
|
333
|
+
const status =
|
|
334
|
+
(err as any).status_code || (err as any).http_status || 0;
|
|
335
|
+
if (status && RETRYABLE_STATUSES.has(status)) {
|
|
336
|
+
return true;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
if (err.name === "TimeoutError") {
|
|
340
|
+
return true;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
const errName = err.constructor.name.toLowerCase();
|
|
344
|
+
return [
|
|
345
|
+
"ratelimiterror",
|
|
346
|
+
"apitimeouterror",
|
|
347
|
+
"apiconnectionerror",
|
|
348
|
+
"serviceunavailableerror",
|
|
349
|
+
"internalservererror",
|
|
350
|
+
"timeout",
|
|
351
|
+
].includes(errName);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
return false;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Estimate cost for LLM API call.
|
|
359
|
+
*/
|
|
360
|
+
export function estimateCost(
|
|
361
|
+
model: string,
|
|
362
|
+
promptTokens: number,
|
|
363
|
+
completionTokens: number
|
|
364
|
+
): number {
|
|
365
|
+
const costs = MODEL_COST_ESTIMATES.get(model) || [0.001, 0.002];
|
|
366
|
+
return (
|
|
367
|
+
(promptTokens / 1000) * costs[0] + (completionTokens / 1000) * costs[1]
|
|
368
|
+
);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Format user-facing error message for LLM failures.
|
|
373
|
+
*/
|
|
374
|
+
function formatUserFacingError(model: string, err: unknown): string {
|
|
375
|
+
const text = err instanceof Error ? err.message : String(err);
|
|
376
|
+
const lowered = text.toLowerCase();
|
|
377
|
+
const [provider] = splitProvider(model);
|
|
378
|
+
|
|
379
|
+
// Missing API key
|
|
380
|
+
if (
|
|
381
|
+
lowered.includes("api_key") ||
|
|
382
|
+
lowered.includes("authentication") ||
|
|
383
|
+
lowered.includes("unauthorized")
|
|
384
|
+
) {
|
|
385
|
+
const envMap = getProviderEnvMap();
|
|
386
|
+
const envVar = envMap.get(provider || "") || "the appropriate *_API_KEY";
|
|
387
|
+
const configured = Array.from(envMap.entries())
|
|
388
|
+
.filter(([, e]) => process.env[e])
|
|
389
|
+
.map(([p]) => p)
|
|
390
|
+
.join(", ");
|
|
391
|
+
const hint = configured
|
|
392
|
+
? `已配置: ${configured}。`
|
|
393
|
+
: "未配置任何 API key。";
|
|
394
|
+
return (
|
|
395
|
+
`❌ ${model} 调用失败:缺少或无效的 API key。\n` +
|
|
396
|
+
`请确认 \`${envVar}\` 已设置,或运行 \`sky init\` 重新配置。${hint}`
|
|
397
|
+
);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
if (lowered.includes("rate limit") || text.includes("429")) {
|
|
401
|
+
return `❌ ${model} 速率受限,请稍后重试。`;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
if (lowered.includes("timeout")) {
|
|
405
|
+
return `❌ ${model} 请求超时,请稍后重试或调高超时时间。`;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
if (
|
|
409
|
+
lowered.includes("model") &&
|
|
410
|
+
(lowered.includes("not found") || lowered.includes("does not exist"))
|
|
411
|
+
) {
|
|
412
|
+
return (
|
|
413
|
+
`❌ ${model} 不是该 provider 的有效模型 ID。\n` +
|
|
414
|
+
`请运行配置检查或 \`sky init\` 重新选择。`
|
|
415
|
+
);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// Content filtering / safety
|
|
419
|
+
if (
|
|
420
|
+
[
|
|
421
|
+
"content exists risk",
|
|
422
|
+
"content_policy",
|
|
423
|
+
"content_filter",
|
|
424
|
+
"content_filtered",
|
|
425
|
+
"safety",
|
|
426
|
+
"blocked by safety",
|
|
427
|
+
"responsibleaipolicyviolation",
|
|
428
|
+
"policy_violation",
|
|
429
|
+
].some((kw) => lowered.includes(kw))
|
|
430
|
+
) {
|
|
431
|
+
const short = text.split("\n")[0].slice(0, 200);
|
|
432
|
+
return (
|
|
433
|
+
`❌ ${model} 拒绝该请求 (内容审核):${short}\n` +
|
|
434
|
+
`原因:provider 的内容安全过滤判定此次提问/上下文敏感。\n` +
|
|
435
|
+
`建议:\n` +
|
|
436
|
+
` - 换一个 provider(如 OpenAI / Anthropic)\n` +
|
|
437
|
+
` - 把敏感关键词改写得更通用后重发`
|
|
438
|
+
);
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
// Bad request / malformed sequence
|
|
442
|
+
if (
|
|
443
|
+
[
|
|
444
|
+
"bad request",
|
|
445
|
+
"invalid_request",
|
|
446
|
+
"tool_calls",
|
|
447
|
+
"tool messages",
|
|
448
|
+
].some((kw) => lowered.includes(kw)) ||
|
|
449
|
+
err instanceof Error && err.constructor.name.toLowerCase().includes("badrequest")
|
|
450
|
+
) {
|
|
451
|
+
const short = text.split("\n")[0].slice(0, 200);
|
|
452
|
+
return (
|
|
453
|
+
`❌ ${model} 调用失败 (Bad Request):${short}\n` +
|
|
454
|
+
`会话消息序列可能损坏,请清理后重试。`
|
|
455
|
+
);
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
const short = text.split("\n")[0].slice(0, 200) || (err instanceof Error ? err.name : "Unknown error");
|
|
459
|
+
return `❌ ${model} 调用失败:${short}`;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
/**
|
|
463
|
+
* Unified LLM client with retry, fallback chains, caching, cost tracking, and budget control.
|
|
464
|
+
*/
|
|
465
|
+
export class LLMClient {
|
|
466
|
+
private config: any;
|
|
467
|
+
private _toolRegistry: ToolRegistry;
|
|
468
|
+
private _cache: LLMCache;
|
|
469
|
+
private usageStats: Map<string, Record<string, number>> = new Map();
|
|
470
|
+
private totalCost: number = 0;
|
|
471
|
+
private costLimit: number | null;
|
|
472
|
+
private log: Logger | null = null;
|
|
473
|
+
|
|
474
|
+
constructor(
|
|
475
|
+
config: any,
|
|
476
|
+
toolRegistry: ToolRegistry,
|
|
477
|
+
costLimit: number | null = null
|
|
478
|
+
) {
|
|
479
|
+
this.config = config;
|
|
480
|
+
this._toolRegistry = toolRegistry;
|
|
481
|
+
this._cache = new LLMCache(256, 120);
|
|
482
|
+
this.costLimit = costLimit;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
/**
|
|
486
|
+
* Set logger instance for event tracking.
|
|
487
|
+
*/
|
|
488
|
+
setLogger(log: Logger): void {
|
|
489
|
+
this.log = log;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
/**
|
|
493
|
+
* Get model for a specific agent or default.
|
|
494
|
+
*/
|
|
495
|
+
private getModel(agentName?: string): string {
|
|
496
|
+
if (agentName) {
|
|
497
|
+
const agentCfg = (this.config.agents as any)?.[agentName];
|
|
498
|
+
if (agentCfg?.model) {
|
|
499
|
+
return String(agentCfg.model);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
return this.config.llm?.defaultModel || "gpt-4o";
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
/**
|
|
506
|
+
* Get max retries from config.
|
|
507
|
+
*/
|
|
508
|
+
private _getRetries(): number {
|
|
509
|
+
return (this.config.llm as any)?.maxRetries ?? 2;
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
/**
|
|
513
|
+
* Track token usage and cost.
|
|
514
|
+
*/
|
|
515
|
+
private trackUsage(
|
|
516
|
+
agentName: string | undefined,
|
|
517
|
+
model: string,
|
|
518
|
+
promptTokens: number,
|
|
519
|
+
completionTokens: number
|
|
520
|
+
): void {
|
|
521
|
+
const key = agentName || "default";
|
|
522
|
+
if (!this.usageStats.has(key)) {
|
|
523
|
+
this.usageStats.set(key, {
|
|
524
|
+
prompt_tokens: 0,
|
|
525
|
+
completion_tokens: 0,
|
|
526
|
+
calls: 0,
|
|
527
|
+
cost: 0,
|
|
528
|
+
});
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
const stats = this.usageStats.get(key)!;
|
|
532
|
+
stats.prompt_tokens += promptTokens;
|
|
533
|
+
stats.completion_tokens += completionTokens;
|
|
534
|
+
stats.calls += 1;
|
|
535
|
+
|
|
536
|
+
const cost = estimateCost(model, promptTokens, completionTokens);
|
|
537
|
+
stats.cost += cost;
|
|
538
|
+
this.totalCost += cost;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
/**
|
|
542
|
+
* Check if cost limit exceeded.
|
|
543
|
+
*/
|
|
544
|
+
private checkBudget(): void {
|
|
545
|
+
if (this.costLimit !== null && this.totalCost >= this.costLimit) {
|
|
546
|
+
throw new Error(
|
|
547
|
+
`Cost limit exceeded: $${this.totalCost.toFixed(4)} >= $${this.costLimit.toFixed(4)}`
|
|
548
|
+
);
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
/**
|
|
553
|
+
* Check if API key is available for model.
|
|
554
|
+
*/
|
|
555
|
+
private hasKeyForModel(model: string): boolean {
|
|
556
|
+
let [provider] = splitProvider(model);
|
|
557
|
+
|
|
558
|
+
if (!provider) {
|
|
559
|
+
const lowered = model.toLowerCase();
|
|
560
|
+
for (const p of getKnownProviders()) {
|
|
561
|
+
if (lowered.includes(p)) {
|
|
562
|
+
provider = p;
|
|
563
|
+
break;
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
if (!provider) {
|
|
569
|
+
return true; // Can't determine; don't skip
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
const envMap = getProviderEnvMap();
|
|
573
|
+
const envVar =
|
|
574
|
+
envMap.get(provider) || `${provider.toUpperCase()}_API_KEY`;
|
|
575
|
+
return !!process.env[envVar];
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
/**
|
|
579
|
+
* Get usage statistics.
|
|
580
|
+
*/
|
|
581
|
+
getUsageStats(): Map<string, Record<string, number>> {
|
|
582
|
+
return new Map(this.usageStats);
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
/**
|
|
586
|
+
* Get total cost.
|
|
587
|
+
*/
|
|
588
|
+
getTotalCost(): number {
|
|
589
|
+
return this.totalCost;
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* Reset usage statistics and cost.
|
|
594
|
+
*/
|
|
595
|
+
resetUsageStats(): void {
|
|
596
|
+
this.usageStats.clear();
|
|
597
|
+
this.totalCost = 0;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
/**
|
|
601
|
+
* Complete a prompt (dummy implementation).
|
|
602
|
+
*
|
|
603
|
+
* Note: Full implementation requires integrating with an actual LLM API provider.
|
|
604
|
+
* This is a placeholder that shows the structure and interface.
|
|
605
|
+
*/
|
|
606
|
+
async complete(
|
|
607
|
+
messages: Record<string, unknown>[],
|
|
608
|
+
agentName?: string,
|
|
609
|
+
tools?: string[],
|
|
610
|
+
stream: boolean = false,
|
|
611
|
+
overrides?: Record<string, unknown>
|
|
612
|
+
): Promise<LLMResponse> {
|
|
613
|
+
this.checkBudget();
|
|
614
|
+
|
|
615
|
+
const ov = overrides || {};
|
|
616
|
+
const rawModel = ov.model;
|
|
617
|
+
const model: string =
|
|
618
|
+
typeof rawModel === "string" ? rawModel : this.getModel(agentName);
|
|
619
|
+
|
|
620
|
+
// Build fallback chain
|
|
621
|
+
const fallbackModels =
|
|
622
|
+
FALLBACK_CHAINS.get(model)?.filter((m) => this.hasKeyForModel(m)) || [];
|
|
623
|
+
const modelsToTry = toolCompatibleModels(
|
|
624
|
+
model,
|
|
625
|
+
[model, ...fallbackModels],
|
|
626
|
+
!!tools
|
|
627
|
+
);
|
|
628
|
+
|
|
629
|
+
// Try each model in sequence
|
|
630
|
+
let lastError: Error | null = null;
|
|
631
|
+
for (const attemptModel of modelsToTry) {
|
|
632
|
+
try {
|
|
633
|
+
this.checkBudget();
|
|
634
|
+
return await this.completeWithRetry(
|
|
635
|
+
attemptModel,
|
|
636
|
+
messages,
|
|
637
|
+
agentName,
|
|
638
|
+
tools,
|
|
639
|
+
stream,
|
|
640
|
+
overrides
|
|
641
|
+
);
|
|
642
|
+
} catch (e) {
|
|
643
|
+
lastError = e instanceof Error ? e : new Error(String(e));
|
|
644
|
+
this.log?.warn("llm_fallback", {
|
|
645
|
+
model: attemptModel,
|
|
646
|
+
agent: agentName,
|
|
647
|
+
error: lastError.message,
|
|
648
|
+
});
|
|
649
|
+
continue;
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
// All models failed
|
|
654
|
+
return {
|
|
655
|
+
content: formatUserFacingError(model, lastError),
|
|
656
|
+
toolCalls: [],
|
|
657
|
+
model,
|
|
658
|
+
usage: { promptTokens: 0, completionTokens: 0 },
|
|
659
|
+
cost: 0,
|
|
660
|
+
truncated: false,
|
|
661
|
+
};
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
/**
|
|
665
|
+
* Complete with retry logic (placeholder).
|
|
666
|
+
*/
|
|
667
|
+
private async completeWithRetry(
|
|
668
|
+
model: string,
|
|
669
|
+
_messages: Record<string, unknown>[],
|
|
670
|
+
_agentName?: string,
|
|
671
|
+
_tools?: string[],
|
|
672
|
+
_stream: boolean = false,
|
|
673
|
+
overrides?: Record<string, unknown>
|
|
674
|
+
): Promise<LLMResponse> {
|
|
675
|
+
// This is a placeholder. Real implementation would:
|
|
676
|
+
// 1. Validate cache
|
|
677
|
+
// 2. Call actual LLM API (OpenAI, Anthropic, etc.)
|
|
678
|
+
// 3. Apply Anthropic cache control if needed
|
|
679
|
+
// 4. Handle retry logic with exponential backoff
|
|
680
|
+
// 5. Track usage and cost
|
|
681
|
+
// 6. Cache results if appropriate
|
|
682
|
+
|
|
683
|
+
const _temperature = (overrides?.temperature as number) ?? 0.7;
|
|
684
|
+
const _maxTokens = (overrides?.maxTokens as number) ?? 2000;
|
|
685
|
+
|
|
686
|
+
// For now, return a dummy response
|
|
687
|
+
return {
|
|
688
|
+
content: "Placeholder response from LLM",
|
|
689
|
+
toolCalls: [],
|
|
690
|
+
model,
|
|
691
|
+
usage: { promptTokens: 100, completionTokens: 50 },
|
|
692
|
+
cost: estimateCost(model, 100, 50),
|
|
693
|
+
truncated: false,
|
|
694
|
+
};
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
/**
|
|
698
|
+
* Stream a completion (placeholder).
|
|
699
|
+
*/
|
|
700
|
+
async *stream(
|
|
701
|
+
_messages: Record<string, unknown>[],
|
|
702
|
+
_agentName?: string
|
|
703
|
+
): AsyncGenerator<string> {
|
|
704
|
+
// Placeholder implementation
|
|
705
|
+
yield "Streaming response...";
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
/**
|
|
709
|
+
* Stream completion with tool awareness (placeholder).
|
|
710
|
+
*/
|
|
711
|
+
async *streamWithTools(
|
|
712
|
+
_messages: Record<string, unknown>[],
|
|
713
|
+
_agentName?: string,
|
|
714
|
+
_tools?: string[],
|
|
715
|
+
_toolRegistry?: ToolRegistry,
|
|
716
|
+
_overrides?: Record<string, unknown>
|
|
717
|
+
): AsyncGenerator<StreamEvent> {
|
|
718
|
+
// Placeholder implementation
|
|
719
|
+
yield {
|
|
720
|
+
type: "content",
|
|
721
|
+
text: "Tool-aware streaming response...",
|
|
722
|
+
};
|
|
723
|
+
}
|
|
724
|
+
}
|