@4djs/assistant 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/chat-history.d.ts.map +1 -1
- package/dist/core/create-assistant-store.d.ts +2 -0
- package/dist/core/create-assistant-store.d.ts.map +1 -1
- package/dist/core/index.d.ts +5 -1
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +715 -28
- package/dist/core/llm-chat.d.ts +13 -0
- package/dist/core/llm-chat.d.ts.map +1 -1
- package/dist/core/llm-context-breakdown.d.ts +39 -0
- package/dist/core/llm-context-breakdown.d.ts.map +1 -0
- package/dist/core/llm-settings-storage.d.ts +3 -0
- package/dist/core/llm-settings-storage.d.ts.map +1 -1
- package/dist/core/llm-sse.d.ts +2 -0
- package/dist/core/llm-sse.d.ts.map +1 -1
- package/dist/core/llm-usage.d.ts +52 -0
- package/dist/core/llm-usage.d.ts.map +1 -0
- package/dist/core/types.d.ts +5 -0
- package/dist/core/types.d.ts.map +1 -1
- package/dist/index.js +353 -353
- package/dist/labels.d.ts +27 -1
- package/dist/labels.d.ts.map +1 -1
- package/dist/react/Assistant.d.ts.map +1 -1
- package/dist/react/components/chat/ChatComposer.d.ts +10 -2
- package/dist/react/components/chat/ChatComposer.d.ts.map +1 -1
- package/dist/react/components/chat/ContextUsageStrip.d.ts +14 -0
- package/dist/react/components/chat/ContextUsageStrip.d.ts.map +1 -0
- package/dist/react/components/chat/LlmSettingsStrip.d.ts.map +1 -1
- package/dist/react/components/chat/UsageToolbarButton.d.ts +24 -0
- package/dist/react/components/chat/UsageToolbarButton.d.ts.map +1 -0
- package/dist/react/components/chat/context-usage-utils.d.ts +34 -0
- package/dist/react/components/chat/context-usage-utils.d.ts.map +1 -0
- package/dist/react/index.d.ts +2 -0
- package/dist/react/index.d.ts.map +1 -1
- package/dist/react/types.d.ts +2 -0
- package/dist/react/types.d.ts.map +1 -1
- package/dist/styles.css +216 -5
- package/package.json +1 -1
package/dist/core/index.js
CHANGED
|
@@ -1460,8 +1460,34 @@ var DEFAULT_ASSISTANT_LABELS = {
|
|
|
1460
1460
|
"composer.stop": "Stop",
|
|
1461
1461
|
"composer.hint": "Enter to send · Shift+Enter for new line",
|
|
1462
1462
|
"composer.llmSettings": "LLM settings",
|
|
1463
|
+
"composer.contextUsage": "Context usage",
|
|
1463
1464
|
"composer.generateSuggestions": "Generate suggestions",
|
|
1464
1465
|
"composer.clearConversation": "Clear conversation",
|
|
1466
|
+
"usage.ariaLabel": "Show context usage",
|
|
1467
|
+
"usage.panelTitle": "Context usage",
|
|
1468
|
+
"usage.panelClose": "Close context usage",
|
|
1469
|
+
"usage.fullPercent": "{percent}% full",
|
|
1470
|
+
"usage.tokenBudget": "{used} / {total} tokens",
|
|
1471
|
+
"usage.tooltipTitle": "LLM usage",
|
|
1472
|
+
"usage.context": "Context: {used} / {total} ({percent}%)",
|
|
1473
|
+
"usage.tokensTotal": "Tokens: {count}",
|
|
1474
|
+
"usage.promptCompletion": "Prompt: {prompt} · Completion: {completion}",
|
|
1475
|
+
"usage.cached": "Cached: {count}",
|
|
1476
|
+
"usage.reasoning": "Reasoning: {count}",
|
|
1477
|
+
"usage.speed": "Speed: {rate} tok/s",
|
|
1478
|
+
"usage.duration": "Duration: {duration}",
|
|
1479
|
+
"usage.requests": "Requests this turn: {count}",
|
|
1480
|
+
"usage.model": "Model: {model}",
|
|
1481
|
+
"usage.session": "Session: {total} tokens · {requests} requests",
|
|
1482
|
+
"usage.idle": "No usage yet — send a message to start tracking",
|
|
1483
|
+
"usage.contextLimit": "Context window: {total}",
|
|
1484
|
+
"usage.category.systemPrompt": "System prompt",
|
|
1485
|
+
"usage.category.toolDefinitions": "Tool definitions",
|
|
1486
|
+
"usage.category.conversation": "Conversation",
|
|
1487
|
+
"usage.category.completion": "Completion",
|
|
1488
|
+
"usage.category.cached": "Cached prompt",
|
|
1489
|
+
"usage.category.reasoning": "Reasoning",
|
|
1490
|
+
"usage.estimatedHint": "Category splits are estimated from payload size",
|
|
1465
1491
|
"commandMenu.ariaLabel": "Composer commands",
|
|
1466
1492
|
"commandMenu.header": "Commands",
|
|
1467
1493
|
"commands.clear.description": "Clear the current conversation",
|
|
@@ -1487,7 +1513,7 @@ var DEFAULT_ASSISTANT_LABELS = {
|
|
|
1487
1513
|
"llmSettings.modelList": "Model list",
|
|
1488
1514
|
"llmSettings.baseUrlPlaceholder": "https://api.openai.com/v1",
|
|
1489
1515
|
"llmSettings.apiKeyPlaceholder": "sk-…",
|
|
1490
|
-
"llmSettings.apiKeyConfiguredPlaceholder": "
|
|
1516
|
+
"llmSettings.apiKeyConfiguredPlaceholder": "Leave blank to keep current key",
|
|
1491
1517
|
"llmSettings.modelPlaceholder": "gpt-4o-mini",
|
|
1492
1518
|
"llmSettings.modelListPlaceholder": "Optional — comma-separated",
|
|
1493
1519
|
"llmSettings.connected": "Connected · {model}",
|
|
@@ -2081,6 +2107,365 @@ function autoResolvedInteractiveResult(toolName, args) {
|
|
|
2081
2107
|
message: "A choice was already submitted. Call the action tool immediately."
|
|
2082
2108
|
});
|
|
2083
2109
|
}
|
|
2110
|
+
// src/core/llm-usage.ts
|
|
2111
|
+
var EMPTY_USAGE = {
|
|
2112
|
+
promptTokens: 0,
|
|
2113
|
+
completionTokens: 0,
|
|
2114
|
+
totalTokens: 0
|
|
2115
|
+
};
|
|
2116
|
+
var DEFAULT_MODEL_CONTEXT_WINDOWS = [
|
|
2117
|
+
["gpt-5.5-pro", 1050000],
|
|
2118
|
+
["gpt-5.5", 1050000],
|
|
2119
|
+
["gpt-5.4-pro", 1100000],
|
|
2120
|
+
["gpt-5.4", 1100000],
|
|
2121
|
+
["gpt-5.3", 400000],
|
|
2122
|
+
["gpt-5.2-pro", 400000],
|
|
2123
|
+
["gpt-5.2", 400000],
|
|
2124
|
+
["gpt-5.1", 400000],
|
|
2125
|
+
["gpt-5-pro", 400000],
|
|
2126
|
+
["gpt-5-mini", 400000],
|
|
2127
|
+
["gpt-5-nano", 400000],
|
|
2128
|
+
["gpt-5", 400000],
|
|
2129
|
+
["gpt-4.1-nano", 1047576],
|
|
2130
|
+
["gpt-4.1-mini", 1047576],
|
|
2131
|
+
["gpt-4.1", 1047576],
|
|
2132
|
+
["gpt-4o-mini", 128000],
|
|
2133
|
+
["gpt-4o-realtime", 128000],
|
|
2134
|
+
["gpt-4o-audio", 128000],
|
|
2135
|
+
["gpt-4o", 128000],
|
|
2136
|
+
["gpt-4.5-preview", 128000],
|
|
2137
|
+
["gpt-4.5", 128000],
|
|
2138
|
+
["gpt-4-turbo-preview", 128000],
|
|
2139
|
+
["gpt-4-turbo", 128000],
|
|
2140
|
+
["gpt-4-32k", 32768],
|
|
2141
|
+
["gpt-4-1106", 128000],
|
|
2142
|
+
["gpt-4-0125", 128000],
|
|
2143
|
+
["gpt-4", 8192],
|
|
2144
|
+
["gpt-3.5-turbo-16k", 16385],
|
|
2145
|
+
["gpt-3.5-turbo-1106", 16385],
|
|
2146
|
+
["gpt-3.5-turbo", 16385],
|
|
2147
|
+
["gpt-3.5", 16385],
|
|
2148
|
+
["o4-mini", 200000],
|
|
2149
|
+
["o3-deep-research", 200000],
|
|
2150
|
+
["o3-pro", 200000],
|
|
2151
|
+
["o3-mini", 200000],
|
|
2152
|
+
["o3", 200000],
|
|
2153
|
+
["o1-pro", 200000],
|
|
2154
|
+
["o1-preview", 200000],
|
|
2155
|
+
["o1-mini", 128000],
|
|
2156
|
+
["o1", 200000],
|
|
2157
|
+
["codex-mini", 200000],
|
|
2158
|
+
["chatgpt-4o", 128000],
|
|
2159
|
+
["claude-opus-4-8", 1e6],
|
|
2160
|
+
["claude-opus-4-7", 1e6],
|
|
2161
|
+
["claude-opus-4-6", 1e6],
|
|
2162
|
+
["claude-sonnet-4-6", 1e6],
|
|
2163
|
+
["claude-mythos", 1e6],
|
|
2164
|
+
["claude-fable", 1e6],
|
|
2165
|
+
["claude-opus-4-1", 200000],
|
|
2166
|
+
["claude-opus-4", 200000],
|
|
2167
|
+
["claude-sonnet-4-5", 200000],
|
|
2168
|
+
["claude-sonnet-4-1", 200000],
|
|
2169
|
+
["claude-sonnet-4", 200000],
|
|
2170
|
+
["claude-haiku-4-5", 200000],
|
|
2171
|
+
["claude-haiku-4", 200000],
|
|
2172
|
+
["claude-3-7-sonnet", 200000],
|
|
2173
|
+
["claude-3-7", 200000],
|
|
2174
|
+
["claude-3-5-sonnet", 200000],
|
|
2175
|
+
["claude-3-5-haiku", 200000],
|
|
2176
|
+
["claude-3-5", 200000],
|
|
2177
|
+
["claude-3-opus", 200000],
|
|
2178
|
+
["claude-3-sonnet", 200000],
|
|
2179
|
+
["claude-3-haiku", 200000],
|
|
2180
|
+
["claude-3", 200000],
|
|
2181
|
+
["claude-2.1", 200000],
|
|
2182
|
+
["claude-2", 1e5],
|
|
2183
|
+
["claude-instant", 1e5],
|
|
2184
|
+
["gemini-3.1-pro", 1048576],
|
|
2185
|
+
["gemini-3.1-flash", 1048576],
|
|
2186
|
+
["gemini-3.1", 1048576],
|
|
2187
|
+
["gemini-3-pro", 1048576],
|
|
2188
|
+
["gemini-3-flash", 1048576],
|
|
2189
|
+
["gemini-3-deep", 1048576],
|
|
2190
|
+
["gemini-3", 1048576],
|
|
2191
|
+
["gemini-2.5-pro", 1048576],
|
|
2192
|
+
["gemini-2.5-flash-lite", 1048576],
|
|
2193
|
+
["gemini-2.5-flash", 1048576],
|
|
2194
|
+
["gemini-2.5", 1048576],
|
|
2195
|
+
["gemini-2.0-flash-lite", 1048576],
|
|
2196
|
+
["gemini-2.0-flash", 1048576],
|
|
2197
|
+
["gemini-2.0", 1048576],
|
|
2198
|
+
["gemini-1.5-pro", 2097152],
|
|
2199
|
+
["gemini-1.5-flash-8b", 1048576],
|
|
2200
|
+
["gemini-1.5-flash", 1048576],
|
|
2201
|
+
["gemini-1.5", 1048576],
|
|
2202
|
+
["gemini-pro-1.5", 2097152],
|
|
2203
|
+
["gemini-ultra", 32768],
|
|
2204
|
+
["gemini-pro", 32768],
|
|
2205
|
+
["deepseek-r1", 128000],
|
|
2206
|
+
["deepseek-reasoner", 128000],
|
|
2207
|
+
["deepseek-v3.2", 128000],
|
|
2208
|
+
["deepseek-v3.1", 128000],
|
|
2209
|
+
["deepseek-v3", 128000],
|
|
2210
|
+
["deepseek-chat", 64000],
|
|
2211
|
+
["deepseek-coder", 64000],
|
|
2212
|
+
["deepseek", 64000],
|
|
2213
|
+
["llama-4-maverick", 1e6],
|
|
2214
|
+
["llama-4-scout", 1e6],
|
|
2215
|
+
["llama-4", 1e6],
|
|
2216
|
+
["llama-3.3", 128000],
|
|
2217
|
+
["llama-3.2", 128000],
|
|
2218
|
+
["llama-3.1", 128000],
|
|
2219
|
+
["llama-3-70b", 128000],
|
|
2220
|
+
["llama-3-8b", 128000],
|
|
2221
|
+
["llama-3", 128000],
|
|
2222
|
+
["llama-2-70b", 4096],
|
|
2223
|
+
["llama-2-13b", 4096],
|
|
2224
|
+
["llama-2-7b", 4096],
|
|
2225
|
+
["llama-2", 4096],
|
|
2226
|
+
["llama3.3", 128000],
|
|
2227
|
+
["llama3.2", 128000],
|
|
2228
|
+
["llama3.1", 128000],
|
|
2229
|
+
["llama3", 8192],
|
|
2230
|
+
["codellama", 16384],
|
|
2231
|
+
["mistral-large-3", 256000],
|
|
2232
|
+
["mistral-large-2", 128000],
|
|
2233
|
+
["mistral-large", 128000],
|
|
2234
|
+
["mistral-medium", 32768],
|
|
2235
|
+
["mistral-small", 32768],
|
|
2236
|
+
["mistral-nemo", 128000],
|
|
2237
|
+
["mistral-saba", 32768],
|
|
2238
|
+
["pixtral-large", 128000],
|
|
2239
|
+
["pixtral", 128000],
|
|
2240
|
+
["codestral", 32768],
|
|
2241
|
+
["mistral", 32768],
|
|
2242
|
+
["mixtral-8x22b", 65536],
|
|
2243
|
+
["mixtral-8x7b", 32768],
|
|
2244
|
+
["mixtral", 32768],
|
|
2245
|
+
["ministral", 128000],
|
|
2246
|
+
["qwen3-235b", 128000],
|
|
2247
|
+
["qwen3-32b", 128000],
|
|
2248
|
+
["qwen3", 128000],
|
|
2249
|
+
["qwen2.5-72b", 128000],
|
|
2250
|
+
["qwen2.5-32b", 128000],
|
|
2251
|
+
["qwen2.5-14b", 128000],
|
|
2252
|
+
["qwen2.5-7b", 128000],
|
|
2253
|
+
["qwen2.5-1m", 1e6],
|
|
2254
|
+
["qwen2.5", 128000],
|
|
2255
|
+
["qwen2-72b", 128000],
|
|
2256
|
+
["qwen2", 128000],
|
|
2257
|
+
["qwen-max", 32768],
|
|
2258
|
+
["qwen-plus", 131072],
|
|
2259
|
+
["qwen-turbo", 131072],
|
|
2260
|
+
["qwen", 32768],
|
|
2261
|
+
["command-r-plus", 128000],
|
|
2262
|
+
["command-r7b", 128000],
|
|
2263
|
+
["command-r", 128000],
|
|
2264
|
+
["command-light", 4096],
|
|
2265
|
+
["command", 4096],
|
|
2266
|
+
["aya", 8192],
|
|
2267
|
+
["grok-4.1", 1e6],
|
|
2268
|
+
["grok-4", 256000],
|
|
2269
|
+
["grok-3-mini", 131072],
|
|
2270
|
+
["grok-3", 131072],
|
|
2271
|
+
["grok-2-vision", 32768],
|
|
2272
|
+
["grok-2", 131072],
|
|
2273
|
+
["grok-beta", 131072],
|
|
2274
|
+
["grok", 131072],
|
|
2275
|
+
["glm-4.5", 128000],
|
|
2276
|
+
["glm-4-plus", 128000],
|
|
2277
|
+
["glm-4", 128000],
|
|
2278
|
+
["chatglm3", 128000],
|
|
2279
|
+
["chatglm", 32768],
|
|
2280
|
+
["phi-4-mini", 128000],
|
|
2281
|
+
["phi-4", 128000],
|
|
2282
|
+
["phi-3.5", 128000],
|
|
2283
|
+
["phi-3-medium", 128000],
|
|
2284
|
+
["phi-3-mini", 128000],
|
|
2285
|
+
["phi-3", 128000],
|
|
2286
|
+
["gemma-3-27b", 128000],
|
|
2287
|
+
["gemma-3", 128000],
|
|
2288
|
+
["gemma-2-27b", 8192],
|
|
2289
|
+
["gemma-2-9b", 8192],
|
|
2290
|
+
["gemma-2", 8192],
|
|
2291
|
+
["gemma", 8192],
|
|
2292
|
+
["kimi-k2", 128000],
|
|
2293
|
+
["moonshot-v1-128k", 128000],
|
|
2294
|
+
["moonshot-v1-32k", 32768],
|
|
2295
|
+
["moonshot", 128000],
|
|
2296
|
+
["nova-premier", 1e6],
|
|
2297
|
+
["nova-pro", 300000],
|
|
2298
|
+
["nova-lite", 300000],
|
|
2299
|
+
["nova-micro", 128000],
|
|
2300
|
+
["jamba-1.5-large", 256000],
|
|
2301
|
+
["jamba-1.5-mini", 256000],
|
|
2302
|
+
["jamba", 256000],
|
|
2303
|
+
["sonar-pro", 200000],
|
|
2304
|
+
["sonar-reasoning", 127072],
|
|
2305
|
+
["sonar", 127072],
|
|
2306
|
+
["sonar-deep-research", 127072],
|
|
2307
|
+
["yi-large", 32768],
|
|
2308
|
+
["yi-34b", 200000],
|
|
2309
|
+
["yi", 32768],
|
|
2310
|
+
["dbrx-instruct", 32768],
|
|
2311
|
+
["dbrx", 32768],
|
|
2312
|
+
["solar-pro", 32768],
|
|
2313
|
+
["solar", 32768],
|
|
2314
|
+
["internvl", 32768],
|
|
2315
|
+
["nvidia-nemotron", 128000],
|
|
2316
|
+
["nemotron", 128000],
|
|
2317
|
+
["reka-core", 128000],
|
|
2318
|
+
["reka-flash", 128000],
|
|
2319
|
+
["reka", 128000],
|
|
2320
|
+
["falcon-180b", 2048],
|
|
2321
|
+
["falcon", 2048],
|
|
2322
|
+
["stablelm", 4096],
|
|
2323
|
+
["wizardlm", 32768],
|
|
2324
|
+
["vicuna", 4096],
|
|
2325
|
+
["openchat", 8192],
|
|
2326
|
+
["nous-hermes", 32768],
|
|
2327
|
+
["hermes-3", 128000],
|
|
2328
|
+
["hermes", 32768]
|
|
2329
|
+
];
|
|
2330
|
+
function readNumber(value) {
|
|
2331
|
+
return typeof value === "number" && Number.isFinite(value) ? value : 0;
|
|
2332
|
+
}
|
|
2333
|
+
function readNestedNumber(parent, key, nestedKey) {
|
|
2334
|
+
if (!parent || typeof parent !== "object")
|
|
2335
|
+
return;
|
|
2336
|
+
const nested = parent[key];
|
|
2337
|
+
if (!nested || typeof nested !== "object")
|
|
2338
|
+
return;
|
|
2339
|
+
const value = nested[nestedKey];
|
|
2340
|
+
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
|
2341
|
+
}
|
|
2342
|
+
function parseUsage(raw) {
|
|
2343
|
+
if (!raw || typeof raw !== "object")
|
|
2344
|
+
return null;
|
|
2345
|
+
const usage = raw;
|
|
2346
|
+
const promptTokens = readNumber(usage.prompt_tokens);
|
|
2347
|
+
const completionTokens = readNumber(usage.completion_tokens);
|
|
2348
|
+
const totalTokens = readNumber(usage.total_tokens) || promptTokens + completionTokens;
|
|
2349
|
+
if (promptTokens === 0 && completionTokens === 0 && totalTokens === 0) {
|
|
2350
|
+
return null;
|
|
2351
|
+
}
|
|
2352
|
+
const cachedTokens = readNestedNumber(usage, "prompt_tokens_details", "cached_tokens") || readNumber(usage.cached_tokens) || undefined;
|
|
2353
|
+
const reasoningTokens = readNestedNumber(usage, "completion_tokens_details", "reasoning_tokens") || readNumber(usage.reasoning_tokens) || undefined;
|
|
2354
|
+
return {
|
|
2355
|
+
promptTokens,
|
|
2356
|
+
completionTokens,
|
|
2357
|
+
totalTokens,
|
|
2358
|
+
...cachedTokens ? { cachedTokens } : {},
|
|
2359
|
+
...reasoningTokens ? { reasoningTokens } : {}
|
|
2360
|
+
};
|
|
2361
|
+
}
|
|
2362
|
+
function aggregateUsage(usages) {
|
|
2363
|
+
if (usages.length === 0)
|
|
2364
|
+
return { ...EMPTY_USAGE };
|
|
2365
|
+
return usages.reduce((acc, usage) => {
|
|
2366
|
+
const cachedTokens = (acc.cachedTokens ?? 0) + (usage.cachedTokens ?? 0);
|
|
2367
|
+
const reasoningTokens = (acc.reasoningTokens ?? 0) + (usage.reasoningTokens ?? 0);
|
|
2368
|
+
return {
|
|
2369
|
+
promptTokens: acc.promptTokens + usage.promptTokens,
|
|
2370
|
+
completionTokens: acc.completionTokens + usage.completionTokens,
|
|
2371
|
+
totalTokens: acc.totalTokens + usage.totalTokens,
|
|
2372
|
+
...cachedTokens > 0 ? { cachedTokens } : {},
|
|
2373
|
+
...reasoningTokens > 0 ? { reasoningTokens } : {}
|
|
2374
|
+
};
|
|
2375
|
+
}, { ...EMPTY_USAGE });
|
|
2376
|
+
}
|
|
2377
|
+
function resolveContextWindow(model, overrides) {
|
|
2378
|
+
const normalized = model.trim().toLowerCase();
|
|
2379
|
+
if (!normalized)
|
|
2380
|
+
return null;
|
|
2381
|
+
if (overrides) {
|
|
2382
|
+
for (const [key, value] of Object.entries(overrides)) {
|
|
2383
|
+
if (normalized === key.toLowerCase() || normalized.includes(key.toLowerCase())) {
|
|
2384
|
+
return value;
|
|
2385
|
+
}
|
|
2386
|
+
}
|
|
2387
|
+
}
|
|
2388
|
+
for (const [prefix, size] of DEFAULT_MODEL_CONTEXT_WINDOWS) {
|
|
2389
|
+
if (normalized.includes(prefix))
|
|
2390
|
+
return size;
|
|
2391
|
+
}
|
|
2392
|
+
return null;
|
|
2393
|
+
}
|
|
2394
|
+
function computeTokensPerSecond(completionTokens, durationMs) {
|
|
2395
|
+
if (completionTokens <= 0 || durationMs <= 0)
|
|
2396
|
+
return null;
|
|
2397
|
+
return Math.round(completionTokens / durationMs * 1000 * 10) / 10;
|
|
2398
|
+
}
|
|
2399
|
+
function buildTurnUsage(input) {
|
|
2400
|
+
const aggregated = aggregateUsage(input.usages);
|
|
2401
|
+
const contextWindow = resolveContextWindow(input.model, input.modelContextWindows);
|
|
2402
|
+
const contextUsedPercent = contextWindow && aggregated.totalTokens > 0 ? Math.min(100, aggregated.totalTokens / contextWindow * 100) : null;
|
|
2403
|
+
return {
|
|
2404
|
+
...aggregated,
|
|
2405
|
+
model: input.model,
|
|
2406
|
+
durationMs: Math.max(0, Math.round(input.durationMs)),
|
|
2407
|
+
tokensPerSecond: computeTokensPerSecond(aggregated.completionTokens, input.durationMs),
|
|
2408
|
+
contextWindow,
|
|
2409
|
+
contextUsedPercent,
|
|
2410
|
+
requestCount: input.usages.length,
|
|
2411
|
+
...input.streaming ? { streaming: true } : {}
|
|
2412
|
+
};
|
|
2413
|
+
}
|
|
2414
|
+
function appendSessionUsage(previous, turn) {
|
|
2415
|
+
const session = previous?.session ?? {
|
|
2416
|
+
promptTokens: 0,
|
|
2417
|
+
completionTokens: 0,
|
|
2418
|
+
totalTokens: 0,
|
|
2419
|
+
requestCount: 0
|
|
2420
|
+
};
|
|
2421
|
+
return {
|
|
2422
|
+
session: {
|
|
2423
|
+
promptTokens: session.promptTokens + turn.promptTokens,
|
|
2424
|
+
completionTokens: session.completionTokens + turn.completionTokens,
|
|
2425
|
+
totalTokens: session.totalTokens + turn.totalTokens,
|
|
2426
|
+
requestCount: session.requestCount + turn.requestCount
|
|
2427
|
+
},
|
|
2428
|
+
currentTurn: turn,
|
|
2429
|
+
baselineBreakdown: previous?.baselineBreakdown ?? null
|
|
2430
|
+
};
|
|
2431
|
+
}
|
|
2432
|
+
function createEmptySessionUsage() {
|
|
2433
|
+
return {
|
|
2434
|
+
session: {
|
|
2435
|
+
promptTokens: 0,
|
|
2436
|
+
completionTokens: 0,
|
|
2437
|
+
totalTokens: 0,
|
|
2438
|
+
requestCount: 0
|
|
2439
|
+
},
|
|
2440
|
+
currentTurn: null,
|
|
2441
|
+
baselineBreakdown: null
|
|
2442
|
+
};
|
|
2443
|
+
}
|
|
2444
|
+
function isStoredLlmTurnUsage(value) {
|
|
2445
|
+
if (!value || typeof value !== "object")
|
|
2446
|
+
return false;
|
|
2447
|
+
const usage = value;
|
|
2448
|
+
return typeof usage.promptTokens === "number" && Number.isFinite(usage.promptTokens) && typeof usage.completionTokens === "number" && Number.isFinite(usage.completionTokens) && typeof usage.totalTokens === "number" && Number.isFinite(usage.totalTokens) && typeof usage.model === "string" && typeof usage.durationMs === "number" && Number.isFinite(usage.durationMs) && typeof usage.requestCount === "number" && Number.isFinite(usage.requestCount);
|
|
2449
|
+
}
|
|
2450
|
+
function rebuildSessionUsageFromMessages(messages) {
|
|
2451
|
+
let sessionUsage = null;
|
|
2452
|
+
for (const message of messages) {
|
|
2453
|
+
if (message.role === "assistant" && message.llmUsage) {
|
|
2454
|
+
sessionUsage = appendSessionUsage(sessionUsage, message.llmUsage);
|
|
2455
|
+
}
|
|
2456
|
+
}
|
|
2457
|
+
return sessionUsage;
|
|
2458
|
+
}
|
|
2459
|
+
function formatTokenCount(value) {
|
|
2460
|
+
if (value >= 1e6)
|
|
2461
|
+
return `${(value / 1e6).toFixed(1)}M`;
|
|
2462
|
+
if (value >= 1e4)
|
|
2463
|
+
return `${Math.round(value / 1000)}k`;
|
|
2464
|
+
if (value >= 1000)
|
|
2465
|
+
return `${(value / 1000).toFixed(1)}k`;
|
|
2466
|
+
return String(value);
|
|
2467
|
+
}
|
|
2468
|
+
|
|
2084
2469
|
// src/core/chat-history.ts
|
|
2085
2470
|
var DEFAULT_HISTORY_KEY = "assistant-chat-history";
|
|
2086
2471
|
var DEFAULT_MAX_STORED = 100;
|
|
@@ -2094,6 +2479,7 @@ function toStored(message) {
|
|
|
2094
2479
|
replySuggestions: message.replySuggestions,
|
|
2095
2480
|
isError: message.isError,
|
|
2096
2481
|
llmSetupRequired: message.llmSetupRequired,
|
|
2482
|
+
llmUsage: message.llmUsage,
|
|
2097
2483
|
timestamp: message.timestamp
|
|
2098
2484
|
};
|
|
2099
2485
|
}
|
|
@@ -2107,6 +2493,7 @@ function fromStored(message) {
|
|
|
2107
2493
|
replySuggestions: isValidReplySuggestions(message.replySuggestions) ? message.replySuggestions : undefined,
|
|
2108
2494
|
isError: message.isError,
|
|
2109
2495
|
llmSetupRequired: message.llmSetupRequired,
|
|
2496
|
+
llmUsage: isStoredLlmTurnUsage(message.llmUsage) ? message.llmUsage : undefined,
|
|
2110
2497
|
timestamp: message.timestamp
|
|
2111
2498
|
};
|
|
2112
2499
|
}
|
|
@@ -2684,10 +3071,144 @@ async function testLlmConnection(settings) {
|
|
|
2684
3071
|
}
|
|
2685
3072
|
}
|
|
2686
3073
|
|
|
3074
|
+
// src/core/llm-context-breakdown.ts
|
|
3075
|
+
function estimateTokenCount(text) {
|
|
3076
|
+
if (!text.trim())
|
|
3077
|
+
return 0;
|
|
3078
|
+
return Math.max(1, Math.ceil(text.length / 4));
|
|
3079
|
+
}
|
|
3080
|
+
function estimateToolsTokens(tools) {
|
|
3081
|
+
if (tools.length === 0)
|
|
3082
|
+
return 0;
|
|
3083
|
+
return estimateTokenCount(JSON.stringify(tools));
|
|
3084
|
+
}
|
|
3085
|
+
function estimateMessagesTokens(messages) {
|
|
3086
|
+
return messages.reduce((sum, message) => {
|
|
3087
|
+
let tokens = 4;
|
|
3088
|
+
if (typeof message.content === "string" && message.content.length > 0) {
|
|
3089
|
+
tokens += estimateTokenCount(message.content);
|
|
3090
|
+
}
|
|
3091
|
+
if (message.tool_calls?.length) {
|
|
3092
|
+
tokens += estimateTokenCount(JSON.stringify(message.tool_calls));
|
|
3093
|
+
}
|
|
3094
|
+
if (message.tool_call_id) {
|
|
3095
|
+
tokens += estimateTokenCount(message.tool_call_id);
|
|
3096
|
+
}
|
|
3097
|
+
return sum + tokens;
|
|
3098
|
+
}, 0);
|
|
3099
|
+
}
|
|
3100
|
+
function scaleParts(parts, targetTotal) {
|
|
3101
|
+
const rawTotal = parts.systemPrompt + parts.toolDefinitions + parts.conversation;
|
|
3102
|
+
if (targetTotal <= 0 || rawTotal <= 0) {
|
|
3103
|
+
return { systemPrompt: 0, toolDefinitions: 0, conversation: 0 };
|
|
3104
|
+
}
|
|
3105
|
+
const scale = targetTotal / rawTotal;
|
|
3106
|
+
return {
|
|
3107
|
+
systemPrompt: Math.round(parts.systemPrompt * scale),
|
|
3108
|
+
toolDefinitions: Math.round(parts.toolDefinitions * scale),
|
|
3109
|
+
conversation: Math.round(parts.conversation * scale)
|
|
3110
|
+
};
|
|
3111
|
+
}
|
|
3112
|
+
function estimatePromptParts(input) {
|
|
3113
|
+
return {
|
|
3114
|
+
systemPrompt: estimateTokenCount(input.systemPrompt),
|
|
3115
|
+
toolDefinitions: estimateToolsTokens(input.tools),
|
|
3116
|
+
conversation: estimateMessagesTokens(input.messages)
|
|
3117
|
+
};
|
|
3118
|
+
}
|
|
3119
|
+
function buildContextBreakdown(input) {
|
|
3120
|
+
const parts = estimatePromptParts({
|
|
3121
|
+
systemPrompt: input.systemPrompt,
|
|
3122
|
+
tools: input.tools,
|
|
3123
|
+
messages: input.messages
|
|
3124
|
+
});
|
|
3125
|
+
const promptTokens = input.usage?.promptTokens ?? 0;
|
|
3126
|
+
const completionTokens = input.usage?.completionTokens ?? 0;
|
|
3127
|
+
const cachedTokens = input.usage?.cachedTokens ?? 0;
|
|
3128
|
+
const reasoningTokens = input.usage?.reasoningTokens ?? 0;
|
|
3129
|
+
const scaled = promptTokens > 0 ? scaleParts(parts, Math.max(0, promptTokens - cachedTokens)) : parts;
|
|
3130
|
+
const segments = [];
|
|
3131
|
+
if (scaled.systemPrompt > 0 || promptTokens === 0) {
|
|
3132
|
+
segments.push({
|
|
3133
|
+
id: "systemPrompt",
|
|
3134
|
+
tokens: scaled.systemPrompt,
|
|
3135
|
+
estimated: promptTokens > 0
|
|
3136
|
+
});
|
|
3137
|
+
}
|
|
3138
|
+
if (scaled.toolDefinitions > 0 || promptTokens === 0 && input.tools.length > 0) {
|
|
3139
|
+
segments.push({
|
|
3140
|
+
id: "toolDefinitions",
|
|
3141
|
+
tokens: scaled.toolDefinitions || parts.toolDefinitions,
|
|
3142
|
+
estimated: true
|
|
3143
|
+
});
|
|
3144
|
+
}
|
|
3145
|
+
if (scaled.conversation > 0 || promptTokens === 0 && input.messages.length > 0) {
|
|
3146
|
+
segments.push({
|
|
3147
|
+
id: "conversation",
|
|
3148
|
+
tokens: scaled.conversation || parts.conversation,
|
|
3149
|
+
estimated: promptTokens === 0 || promptTokens > 0
|
|
3150
|
+
});
|
|
3151
|
+
}
|
|
3152
|
+
if (completionTokens > 0) {
|
|
3153
|
+
segments.push({ id: "completion", tokens: completionTokens });
|
|
3154
|
+
}
|
|
3155
|
+
if (cachedTokens > 0) {
|
|
3156
|
+
segments.push({ id: "cached", tokens: cachedTokens });
|
|
3157
|
+
}
|
|
3158
|
+
if (reasoningTokens > 0) {
|
|
3159
|
+
segments.push({ id: "reasoning", tokens: reasoningTokens });
|
|
3160
|
+
}
|
|
3161
|
+
const totalTokens = input.usage?.totalTokens ?? segments.reduce((sum, segment) => sum + segment.tokens, 0);
|
|
3162
|
+
const contextWindow = resolveContextWindow(input.model, input.modelContextWindows);
|
|
3163
|
+
const contextUsedPercent = contextWindow && totalTokens > 0 ? Math.min(100, totalTokens / contextWindow * 100) : null;
|
|
3164
|
+
return {
|
|
3165
|
+
segments: segments.filter((segment) => segment.tokens > 0),
|
|
3166
|
+
promptTokens,
|
|
3167
|
+
completionTokens,
|
|
3168
|
+
totalTokens,
|
|
3169
|
+
contextWindow,
|
|
3170
|
+
contextUsedPercent
|
|
3171
|
+
};
|
|
3172
|
+
}
|
|
3173
|
+
function mergeBreakdownSegments(target, source) {
|
|
3174
|
+
const map = new Map;
|
|
3175
|
+
for (const segment of target.segments) {
|
|
3176
|
+
map.set(segment.id, { ...segment });
|
|
3177
|
+
}
|
|
3178
|
+
for (const segment of source.segments) {
|
|
3179
|
+
const existing = map.get(segment.id);
|
|
3180
|
+
if (existing) {
|
|
3181
|
+
existing.tokens += segment.tokens;
|
|
3182
|
+
} else {
|
|
3183
|
+
map.set(segment.id, { ...segment });
|
|
3184
|
+
}
|
|
3185
|
+
}
|
|
3186
|
+
const segments = [...map.values()].filter((segment) => segment.tokens > 0);
|
|
3187
|
+
const totalTokens = segments.reduce((sum, segment) => sum + segment.tokens, 0);
|
|
3188
|
+
const promptTokens = target.promptTokens + source.promptTokens;
|
|
3189
|
+
const completionTokens = target.completionTokens + source.completionTokens;
|
|
3190
|
+
const contextWindow = target.contextWindow ?? source.contextWindow;
|
|
3191
|
+
const contextUsedPercent = contextWindow && totalTokens > 0 ? Math.min(100, totalTokens / contextWindow * 100) : null;
|
|
3192
|
+
return {
|
|
3193
|
+
segments,
|
|
3194
|
+
promptTokens,
|
|
3195
|
+
completionTokens,
|
|
3196
|
+
totalTokens,
|
|
3197
|
+
contextWindow,
|
|
3198
|
+
contextUsedPercent
|
|
3199
|
+
};
|
|
3200
|
+
}
|
|
3201
|
+
function formatApproxTokens(value) {
|
|
3202
|
+
if (value <= 0)
|
|
3203
|
+
return "0";
|
|
3204
|
+
return `~${formatTokenCount(value)}`;
|
|
3205
|
+
}
|
|
3206
|
+
|
|
2687
3207
|
// src/core/llm-sse.ts
|
|
2688
3208
|
function createStreamParser() {
|
|
2689
3209
|
let content = "";
|
|
2690
3210
|
let model = null;
|
|
3211
|
+
let usage = null;
|
|
2691
3212
|
const toolAcc = new Map;
|
|
2692
3213
|
function toolCallAt(index) {
|
|
2693
3214
|
const existing = toolAcc.get(index);
|
|
@@ -2712,6 +3233,7 @@ function createStreamParser() {
|
|
|
2712
3233
|
content,
|
|
2713
3234
|
toolCalls: finalizeToolCalls(),
|
|
2714
3235
|
model,
|
|
3236
|
+
usage,
|
|
2715
3237
|
done: true
|
|
2716
3238
|
};
|
|
2717
3239
|
}
|
|
@@ -2723,6 +3245,9 @@ function createStreamParser() {
|
|
|
2723
3245
|
}
|
|
2724
3246
|
if (typeof json2.model === "string")
|
|
2725
3247
|
model = json2.model;
|
|
3248
|
+
const parsedUsage = parseUsage(json2.usage);
|
|
3249
|
+
if (parsedUsage)
|
|
3250
|
+
usage = parsedUsage;
|
|
2726
3251
|
const choice = json2.choices?.[0];
|
|
2727
3252
|
const delta = choice?.delta;
|
|
2728
3253
|
if (!delta)
|
|
@@ -2752,6 +3277,7 @@ function createStreamParser() {
|
|
|
2752
3277
|
content,
|
|
2753
3278
|
toolCalls: finalizeToolCalls(),
|
|
2754
3279
|
model,
|
|
3280
|
+
usage,
|
|
2755
3281
|
done: false
|
|
2756
3282
|
};
|
|
2757
3283
|
}
|
|
@@ -2770,6 +3296,7 @@ async function readSseStream(body, onChunk, signal) {
|
|
|
2770
3296
|
content: "",
|
|
2771
3297
|
toolCalls: [],
|
|
2772
3298
|
model: null,
|
|
3299
|
+
usage: null,
|
|
2773
3300
|
done: false
|
|
2774
3301
|
};
|
|
2775
3302
|
const abort = () => {
|
|
@@ -2932,7 +3459,8 @@ async function requestLlmCompletion(input) {
|
|
|
2932
3459
|
return {
|
|
2933
3460
|
content: message.content ?? null,
|
|
2934
3461
|
toolCalls: message.tool_calls ?? [],
|
|
2935
|
-
model: typeof data.model === "string" ? data.model : settings.model
|
|
3462
|
+
model: typeof data.model === "string" ? data.model : settings.model,
|
|
3463
|
+
usage: parseUsage(data.usage)
|
|
2936
3464
|
};
|
|
2937
3465
|
}
|
|
2938
3466
|
async function requestLlmCompletionStream(input, handlers) {
|
|
@@ -2942,7 +3470,11 @@ async function requestLlmCompletionStream(input, handlers) {
|
|
|
2942
3470
|
const res = await fetch(buildCompletionsUrl(settings.baseUrl), {
|
|
2943
3471
|
method: "POST",
|
|
2944
3472
|
headers: buildLlmRequestHeaders(settings.apiKey),
|
|
2945
|
-
body: JSON.stringify({
|
|
3473
|
+
body: JSON.stringify({
|
|
3474
|
+
...payload,
|
|
3475
|
+
stream: true,
|
|
3476
|
+
stream_options: { include_usage: true }
|
|
3477
|
+
}),
|
|
2946
3478
|
signal: input.signal
|
|
2947
3479
|
});
|
|
2948
3480
|
if (!res.ok) {
|
|
@@ -2960,11 +3492,15 @@ async function requestLlmCompletionStream(input, handlers) {
|
|
|
2960
3492
|
if (chunk.contentDelta) {
|
|
2961
3493
|
handlers.onUpdate(chunk.content);
|
|
2962
3494
|
}
|
|
3495
|
+
if (chunk.usage) {
|
|
3496
|
+
handlers.onUsage?.(chunk.usage);
|
|
3497
|
+
}
|
|
2963
3498
|
}, input.signal);
|
|
2964
3499
|
return {
|
|
2965
3500
|
content: final.content || null,
|
|
2966
3501
|
toolCalls: final.toolCalls,
|
|
2967
|
-
model: final.model ?? settings.model
|
|
3502
|
+
model: final.model ?? settings.model,
|
|
3503
|
+
usage: final.usage
|
|
2968
3504
|
};
|
|
2969
3505
|
} catch (error) {
|
|
2970
3506
|
if (input.signal?.aborted || error instanceof DOMException && error.name === "AbortError") {
|
|
@@ -2973,6 +3509,22 @@ async function requestLlmCompletionStream(input, handlers) {
|
|
|
2973
3509
|
throw error;
|
|
2974
3510
|
}
|
|
2975
3511
|
}
|
|
3512
|
+
async function computeBaselineContextBreakdown(input) {
|
|
3513
|
+
const settings = await resolveAssistantLlmSettings();
|
|
3514
|
+
const systemPrompt = buildSystemPromptWithTools(settings.systemPrompt ?? DEFAULT_SYSTEM_PROMPT, input.tools);
|
|
3515
|
+
const clientTools = [
|
|
3516
|
+
...input.tools.map(toClientTool),
|
|
3517
|
+
...getChatInteractiveTools()
|
|
3518
|
+
];
|
|
3519
|
+
return buildContextBreakdown({
|
|
3520
|
+
systemPrompt,
|
|
3521
|
+
tools: clientTools,
|
|
3522
|
+
messages: [],
|
|
3523
|
+
usage: null,
|
|
3524
|
+
model: input.model,
|
|
3525
|
+
modelContextWindows: input.modelContextWindows
|
|
3526
|
+
});
|
|
3527
|
+
}
|
|
2976
3528
|
async function runLlmAgent(input) {
|
|
2977
3529
|
const apiMessages = [
|
|
2978
3530
|
...input.history,
|
|
@@ -2987,8 +3539,40 @@ async function runLlmAgent(input) {
|
|
|
2987
3539
|
];
|
|
2988
3540
|
let finalContent = "";
|
|
2989
3541
|
let endedWithPostResponseTool = false;
|
|
3542
|
+
const collectedUsages = [];
|
|
3543
|
+
let lastModel = input.model ?? null;
|
|
3544
|
+
let lastBreakdown = null;
|
|
2990
3545
|
const settings = await resolveAssistantLlmSettings();
|
|
2991
3546
|
const systemPrompt = buildSystemPromptWithTools(settings.systemPrompt ?? DEFAULT_SYSTEM_PROMPT, input.tools);
|
|
3547
|
+
function buildStreamHandlers() {
|
|
3548
|
+
if (!input.stream)
|
|
3549
|
+
return;
|
|
3550
|
+
return {
|
|
3551
|
+
turnId: input.stream.turnId,
|
|
3552
|
+
onUpdate: (content) => {
|
|
3553
|
+
finalContent = content;
|
|
3554
|
+
input.stream?.onUpdate(content);
|
|
3555
|
+
},
|
|
3556
|
+
onUsage: (usage) => {
|
|
3557
|
+
input.stream?.onUsage?.(usage);
|
|
3558
|
+
}
|
|
3559
|
+
};
|
|
3560
|
+
}
|
|
3561
|
+
function recordCompletionUsage(completion, messages) {
|
|
3562
|
+
lastModel = completion.model;
|
|
3563
|
+
if (completion.usage) {
|
|
3564
|
+
collectedUsages.push(completion.usage);
|
|
3565
|
+
input.stream?.onUsage?.(completion.usage);
|
|
3566
|
+
}
|
|
3567
|
+
lastBreakdown = buildContextBreakdown({
|
|
3568
|
+
systemPrompt,
|
|
3569
|
+
tools: clientTools,
|
|
3570
|
+
messages,
|
|
3571
|
+
usage: completion.usage,
|
|
3572
|
+
model: completion.model,
|
|
3573
|
+
modelContextWindows: input.modelContextWindows
|
|
3574
|
+
});
|
|
3575
|
+
}
|
|
2992
3576
|
for (let step = 0;step < MAX_AGENT_STEPS; step++) {
|
|
2993
3577
|
if (input.signal?.aborted) {
|
|
2994
3578
|
throw new ChatAbortedError;
|
|
@@ -2999,14 +3583,9 @@ async function runLlmAgent(input) {
|
|
|
2999
3583
|
model: input.model,
|
|
3000
3584
|
systemPrompt,
|
|
3001
3585
|
signal: input.signal,
|
|
3002
|
-
stream:
|
|
3003
|
-
turnId: input.stream.turnId,
|
|
3004
|
-
onUpdate: (content) => {
|
|
3005
|
-
finalContent = content;
|
|
3006
|
-
input.stream?.onUpdate(content);
|
|
3007
|
-
}
|
|
3008
|
-
} : undefined
|
|
3586
|
+
stream: buildStreamHandlers()
|
|
3009
3587
|
});
|
|
3588
|
+
recordCompletionUsage(completion, apiMessages);
|
|
3010
3589
|
if (completion.toolCalls.length === 0) {
|
|
3011
3590
|
finalContent = completion.content?.trim() || "I couldn't produce a response. Try rephrasing your request.";
|
|
3012
3591
|
input.stream?.onUpdate(finalContent);
|
|
@@ -3104,14 +3683,9 @@ async function runLlmAgent(input) {
|
|
|
3104
3683
|
model: input.model,
|
|
3105
3684
|
systemPrompt,
|
|
3106
3685
|
signal: input.signal,
|
|
3107
|
-
stream:
|
|
3108
|
-
turnId: input.stream.turnId,
|
|
3109
|
-
onUpdate: (content) => {
|
|
3110
|
-
finalContent = content;
|
|
3111
|
-
input.stream?.onUpdate(content);
|
|
3112
|
-
}
|
|
3113
|
-
} : undefined
|
|
3686
|
+
stream: buildStreamHandlers()
|
|
3114
3687
|
});
|
|
3688
|
+
recordCompletionUsage(completion, apiMessages);
|
|
3115
3689
|
finalContent = completion.content?.trim() || "Done — see the trace below for details.";
|
|
3116
3690
|
input.stream?.onUpdate(finalContent);
|
|
3117
3691
|
apiMessages.push({
|
|
@@ -3122,7 +3696,10 @@ async function runLlmAgent(input) {
|
|
|
3122
3696
|
return {
|
|
3123
3697
|
assistantMessages: finalContent ? [{ id: input.stream?.turnId, content: finalContent }] : [],
|
|
3124
3698
|
apiMessages,
|
|
3125
|
-
lastToolResult
|
|
3699
|
+
lastToolResult,
|
|
3700
|
+
usages: collectedUsages,
|
|
3701
|
+
model: lastModel,
|
|
3702
|
+
contextBreakdown: lastBreakdown
|
|
3126
3703
|
};
|
|
3127
3704
|
}
|
|
3128
3705
|
function resolveAssistantContentForApi(message) {
|
|
@@ -3484,11 +4061,34 @@ function createLlmSettingsStorage(storageKey) {
|
|
|
3484
4061
|
}
|
|
3485
4062
|
};
|
|
3486
4063
|
}
|
|
4064
|
+
function maskApiKey(apiKey, visibleHead = 4, visibleTail = 4) {
|
|
4065
|
+
const trimmed = apiKey.trim();
|
|
4066
|
+
if (!trimmed)
|
|
4067
|
+
return "";
|
|
4068
|
+
if (trimmed.length <= visibleHead + visibleTail) {
|
|
4069
|
+
if (trimmed.length <= 2)
|
|
4070
|
+
return "***";
|
|
4071
|
+
return `${trimmed.slice(0, 1)}***${trimmed.slice(-1)}`;
|
|
4072
|
+
}
|
|
4073
|
+
return `${trimmed.slice(0, visibleHead)}***${trimmed.slice(-visibleTail)}`;
|
|
4074
|
+
}
|
|
4075
|
+
function isUnchangedApiKeyInput(input, existingApiKey) {
|
|
4076
|
+
const trimmed = input.trim();
|
|
4077
|
+
if (!trimmed)
|
|
4078
|
+
return true;
|
|
4079
|
+
if (!existingApiKey)
|
|
4080
|
+
return false;
|
|
4081
|
+
return trimmed === maskApiKey(existingApiKey);
|
|
4082
|
+
}
|
|
4083
|
+
function apiKeyFormDisplayValue(apiKey) {
|
|
4084
|
+
return apiKey ? maskApiKey(apiKey) : "";
|
|
4085
|
+
}
|
|
3487
4086
|
function toStoredSettings(values, existingApiKey) {
|
|
3488
4087
|
const apiKeyInput = values.apiKey.trim();
|
|
4088
|
+
const apiKey = isUnchangedApiKeyInput(apiKeyInput, existingApiKey) ? existingApiKey : apiKeyInput.length > 0 ? apiKeyInput : existingApiKey;
|
|
3489
4089
|
return {
|
|
3490
4090
|
baseUrl: values.baseUrl.trim() || DEFAULT_LLM_BASE_URL,
|
|
3491
|
-
apiKey
|
|
4091
|
+
apiKey,
|
|
3492
4092
|
model: values.model.trim() || DEFAULT_LLM_MODEL,
|
|
3493
4093
|
models: parseModelsText(values.modelsText),
|
|
3494
4094
|
systemPrompt: values.systemPrompt
|
|
@@ -3497,7 +4097,7 @@ function toStoredSettings(values, existingApiKey) {
|
|
|
3497
4097
|
function createLlmSettingsFormState(settings, hasStoredApiKey, defaultSystemPrompt = DEFAULT_ASSISTANT_SYSTEM_PROMPT) {
|
|
3498
4098
|
return {
|
|
3499
4099
|
baseUrl: settings.baseUrl,
|
|
3500
|
-
apiKey:
|
|
4100
|
+
apiKey: apiKeyFormDisplayValue(settings.apiKey),
|
|
3501
4101
|
model: settings.model,
|
|
3502
4102
|
modelsText: formatModelsText(settings.models),
|
|
3503
4103
|
systemPrompt: settings.systemPrompt ?? "",
|
|
@@ -3508,7 +4108,7 @@ function createLlmSettingsFormState(settings, hasStoredApiKey, defaultSystemProm
|
|
|
3508
4108
|
function createLlmSettingsFormStateFromStored(stored, defaultSystemPrompt = DEFAULT_ASSISTANT_SYSTEM_PROMPT) {
|
|
3509
4109
|
return {
|
|
3510
4110
|
baseUrl: stored.baseUrl,
|
|
3511
|
-
apiKey:
|
|
4111
|
+
apiKey: apiKeyFormDisplayValue(stored.apiKey),
|
|
3512
4112
|
model: stored.model,
|
|
3513
4113
|
modelsText: formatModelsText(stored.models),
|
|
3514
4114
|
systemPrompt: stored.systemPrompt,
|
|
@@ -3637,17 +4237,47 @@ function createAssistantStore(deps) {
|
|
|
3637
4237
|
llmEnabled: false,
|
|
3638
4238
|
model: initialSelectedModel
|
|
3639
4239
|
});
|
|
4240
|
+
const initialLlmUsage = rebuildSessionUsageFromMessages(messages);
|
|
3640
4241
|
const store = create((set, get) => {
|
|
4242
|
+
async function refreshBaselineUsage(model) {
|
|
4243
|
+
try {
|
|
4244
|
+
const tools = await resolvedDeps.listTools();
|
|
4245
|
+
const baselineBreakdown = await computeBaselineContextBreakdown({
|
|
4246
|
+
tools,
|
|
4247
|
+
model,
|
|
4248
|
+
modelContextWindows: deps.modelContextWindows
|
|
4249
|
+
});
|
|
4250
|
+
set((state) => ({
|
|
4251
|
+
llmUsage: {
|
|
4252
|
+
...state.llmUsage ?? createEmptySessionUsage(),
|
|
4253
|
+
baselineBreakdown
|
|
4254
|
+
}
|
|
4255
|
+
}));
|
|
4256
|
+
} catch {}
|
|
4257
|
+
}
|
|
3641
4258
|
async function runLlmChatTurn(message) {
|
|
3642
4259
|
const tools = await resolvedDeps.listTools();
|
|
3643
4260
|
const storedMessages = get().messages.filter((m) => m.id !== "welcome");
|
|
3644
4261
|
const priorMessages = storedMessages.at(-1)?.role === "user" ? storedMessages.slice(0, -1) : storedMessages;
|
|
3645
4262
|
const llmHistory = buildLlmHistory(priorMessages);
|
|
3646
4263
|
const turnId = crypto.randomUUID();
|
|
4264
|
+
const turnStart = performance.now();
|
|
4265
|
+
const model = get().selectedModel ?? get().llmModel ?? DEFAULT_LLM_MODEL;
|
|
4266
|
+
const liveUsages = [];
|
|
3647
4267
|
set((state) => ({
|
|
3648
|
-
messages: [...state.messages, createTurnMessage(turnId)]
|
|
4268
|
+
messages: [...state.messages, createTurnMessage(turnId)],
|
|
4269
|
+
llmUsage: {
|
|
4270
|
+
...state.llmUsage ?? createEmptySessionUsage(),
|
|
4271
|
+
currentTurn: buildTurnUsage({
|
|
4272
|
+
usages: [],
|
|
4273
|
+
model,
|
|
4274
|
+
durationMs: 0,
|
|
4275
|
+
modelContextWindows: deps.modelContextWindows,
|
|
4276
|
+
streaming: true
|
|
4277
|
+
})
|
|
4278
|
+
}
|
|
3649
4279
|
}));
|
|
3650
|
-
await runLlmAgent({
|
|
4280
|
+
const agentResult = await runLlmAgent({
|
|
3651
4281
|
userMessage: message,
|
|
3652
4282
|
history: llmHistory,
|
|
3653
4283
|
tools,
|
|
@@ -3660,7 +4290,31 @@ function createAssistantStore(deps) {
|
|
|
3660
4290
|
messages: patchTurnMessage(state.messages, turnId, (msg) => ({
|
|
3661
4291
|
...msg,
|
|
3662
4292
|
content
|
|
3663
|
-
}))
|
|
4293
|
+
})),
|
|
4294
|
+
llmUsage: state.llmUsage?.currentTurn ? {
|
|
4295
|
+
...state.llmUsage,
|
|
4296
|
+
currentTurn: {
|
|
4297
|
+
...state.llmUsage.currentTurn,
|
|
4298
|
+
durationMs: Math.round(performance.now() - turnStart),
|
|
4299
|
+
streaming: true
|
|
4300
|
+
}
|
|
4301
|
+
} : state.llmUsage
|
|
4302
|
+
}));
|
|
4303
|
+
},
|
|
4304
|
+
onUsage: (usage) => {
|
|
4305
|
+
liveUsages.push(usage);
|
|
4306
|
+
const partialTurn = buildTurnUsage({
|
|
4307
|
+
usages: liveUsages,
|
|
4308
|
+
model,
|
|
4309
|
+
durationMs: Math.round(performance.now() - turnStart),
|
|
4310
|
+
modelContextWindows: deps.modelContextWindows,
|
|
4311
|
+
streaming: true
|
|
4312
|
+
});
|
|
4313
|
+
set((state) => ({
|
|
4314
|
+
llmUsage: {
|
|
4315
|
+
...state.llmUsage ?? createEmptySessionUsage(),
|
|
4316
|
+
currentTurn: partialTurn
|
|
4317
|
+
}
|
|
3664
4318
|
}));
|
|
3665
4319
|
}
|
|
3666
4320
|
},
|
|
@@ -3699,15 +4353,27 @@ function createAssistantStore(deps) {
|
|
|
3699
4353
|
const result = await resolvedDeps.invokeTool(name, args);
|
|
3700
4354
|
deps.onToolInvoked?.(result);
|
|
3701
4355
|
return result;
|
|
3702
|
-
}
|
|
4356
|
+
},
|
|
4357
|
+
modelContextWindows: deps.modelContextWindows
|
|
3703
4358
|
});
|
|
4359
|
+
const turnUsage = {
|
|
4360
|
+
...buildTurnUsage({
|
|
4361
|
+
usages: agentResult.usages,
|
|
4362
|
+
model: agentResult.model ?? model,
|
|
4363
|
+
durationMs: performance.now() - turnStart,
|
|
4364
|
+
modelContextWindows: deps.modelContextWindows
|
|
4365
|
+
}),
|
|
4366
|
+
contextBreakdown: agentResult.contextBreakdown ?? undefined
|
|
4367
|
+
};
|
|
3704
4368
|
set((state) => ({
|
|
3705
4369
|
messages: state.messages.map((msg) => msg.id === turnId ? {
|
|
3706
4370
|
...msg,
|
|
3707
4371
|
streaming: false,
|
|
4372
|
+
llmUsage: turnUsage,
|
|
3708
4373
|
content: msg.content.trim() || (msg.activity?.length ? "" : "I couldn't produce a response.")
|
|
3709
4374
|
} : msg),
|
|
3710
|
-
chatLoading: false
|
|
4375
|
+
chatLoading: false,
|
|
4376
|
+
llmUsage: appendSessionUsage(state.llmUsage, turnUsage)
|
|
3711
4377
|
}));
|
|
3712
4378
|
}
|
|
3713
4379
|
return {
|
|
@@ -3718,6 +4384,7 @@ function createAssistantStore(deps) {
|
|
|
3718
4384
|
llmModels: [],
|
|
3719
4385
|
llmModelsLoading: false,
|
|
3720
4386
|
selectedModel: initialSelectedModel,
|
|
4387
|
+
llmUsage: initialLlmUsage,
|
|
3721
4388
|
loadLlmStatus: async () => {
|
|
3722
4389
|
set({ llmModelsLoading: true });
|
|
3723
4390
|
try {
|
|
@@ -3736,6 +4403,9 @@ function createAssistantStore(deps) {
|
|
|
3736
4403
|
selectedModel,
|
|
3737
4404
|
messages: patchWelcomeMessage(state.messages, deps.welcomeMessage, selectedModel, status.enabled)
|
|
3738
4405
|
}));
|
|
4406
|
+
if (status.enabled) {
|
|
4407
|
+
refreshBaselineUsage(selectedModel);
|
|
4408
|
+
}
|
|
3739
4409
|
} finally {
|
|
3740
4410
|
set({ llmModelsLoading: false });
|
|
3741
4411
|
}
|
|
@@ -3812,7 +4482,8 @@ function createAssistantStore(deps) {
|
|
|
3812
4482
|
const { llmEnabled, selectedModel } = get();
|
|
3813
4483
|
history.clear();
|
|
3814
4484
|
set({
|
|
3815
|
-
messages: [deps.welcomeMessage({ llmEnabled, model: selectedModel })]
|
|
4485
|
+
messages: [deps.welcomeMessage({ llmEnabled, model: selectedModel })],
|
|
4486
|
+
llmUsage: null
|
|
3816
4487
|
});
|
|
3817
4488
|
},
|
|
3818
4489
|
stopChat: () => {
|
|
@@ -4118,6 +4789,7 @@ export {
|
|
|
4118
4789
|
runAssistantChatCommand,
|
|
4119
4790
|
resolveSelectedModel,
|
|
4120
4791
|
resolveInteractiveToolResult,
|
|
4792
|
+
resolveContextWindow,
|
|
4121
4793
|
resolveAssistantToolHooks,
|
|
4122
4794
|
resolveAssistantStoreDependencies,
|
|
4123
4795
|
resolveAssistantLlmSettings,
|
|
@@ -4125,9 +4797,11 @@ export {
|
|
|
4125
4797
|
resetAssistantLlm,
|
|
4126
4798
|
requestLlmCompletion,
|
|
4127
4799
|
rejectAllInteractiveToolWaiters,
|
|
4800
|
+
rebuildSessionUsageFromMessages,
|
|
4128
4801
|
prepareMarkdown,
|
|
4129
4802
|
persistStoredModelSelection,
|
|
4130
4803
|
peekStoredModel,
|
|
4804
|
+
parseUsage,
|
|
4131
4805
|
parseSuggestedPromptsResponse,
|
|
4132
4806
|
parseModelsText,
|
|
4133
4807
|
parseChatCommand,
|
|
@@ -4136,7 +4810,9 @@ export {
|
|
|
4136
4810
|
normalizeCodeLineEndings,
|
|
4137
4811
|
migrateLegacyModelStorage,
|
|
4138
4812
|
mergeLlmSettings,
|
|
4813
|
+
mergeBreakdownSegments,
|
|
4139
4814
|
listChatCommands,
|
|
4815
|
+
isStoredLlmTurnUsage,
|
|
4140
4816
|
isLocalLlmBaseUrl,
|
|
4141
4817
|
isLlmUnavailableMessage,
|
|
4142
4818
|
isLlmSettingsFormDirty,
|
|
@@ -4146,32 +4822,43 @@ export {
|
|
|
4146
4822
|
getFallbackModels,
|
|
4147
4823
|
getChatCommandSuggestions,
|
|
4148
4824
|
fromLlmToolName,
|
|
4825
|
+
formatTokenCount,
|
|
4149
4826
|
formatModelsText,
|
|
4150
4827
|
formatLabel,
|
|
4151
4828
|
formatJsonIfLarge,
|
|
4829
|
+
formatApproxTokens,
|
|
4152
4830
|
filterChatCommands,
|
|
4153
4831
|
fetchProviderModels,
|
|
4154
4832
|
fetchLlmStatus,
|
|
4833
|
+
estimateTokenCount,
|
|
4834
|
+
estimatePromptParts,
|
|
4155
4835
|
createLlmSettingsStorage,
|
|
4156
4836
|
createLlmSettingsFormStateFromStored,
|
|
4157
4837
|
createLlmSettingsFormState,
|
|
4838
|
+
createEmptySessionUsage,
|
|
4158
4839
|
createDefaultStoredSettings,
|
|
4159
4840
|
createChatHistoryHelpers,
|
|
4160
4841
|
createAssistantToolRegistry,
|
|
4161
4842
|
createAssistantStore,
|
|
4162
4843
|
connectExternalTools,
|
|
4163
4844
|
configureAssistantLlm,
|
|
4845
|
+
computeTokensPerSecond,
|
|
4846
|
+
computeBaselineContextBreakdown,
|
|
4164
4847
|
completionForChatCommand,
|
|
4165
4848
|
clearProviderModelCache,
|
|
4166
4849
|
childrenToText,
|
|
4167
4850
|
chatActivityStepLabel,
|
|
4851
|
+
buildTurnUsage,
|
|
4168
4852
|
buildSystemPromptWithTools,
|
|
4169
4853
|
buildModelsUrl,
|
|
4170
4854
|
buildLlmRequestHeaders,
|
|
4171
4855
|
buildLlmHistory,
|
|
4172
4856
|
buildDefaultLlmSettings,
|
|
4857
|
+
buildContextBreakdown,
|
|
4173
4858
|
buildCompletionsUrl,
|
|
4174
4859
|
assistantToStored,
|
|
4860
|
+
appendSessionUsage,
|
|
4861
|
+
aggregateUsage,
|
|
4175
4862
|
SUGGEST_REPLIES_TOOL,
|
|
4176
4863
|
LlmUpstreamError,
|
|
4177
4864
|
LlmNotConfiguredError,
|