alvin-bot 4.5.0 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +150 -0
- package/README.md +25 -2
- package/alvin-bot-4.5.1.tgz +0 -0
- package/bin/cli.js +246 -0
- package/dist/handlers/commands.js +461 -63
- package/dist/handlers/message.js +209 -14
- package/dist/i18n.js +470 -13
- package/dist/index.js +44 -5
- package/dist/providers/claude-sdk-provider.js +106 -14
- package/dist/providers/ollama-provider.js +32 -0
- package/dist/providers/openai-compatible.js +10 -1
- package/dist/providers/registry.js +112 -17
- package/dist/providers/types.js +25 -3
- package/dist/services/compaction.js +2 -0
- package/dist/services/cron.js +53 -42
- package/dist/services/heartbeat.js +41 -7
- package/dist/services/language-detect.js +12 -2
- package/dist/services/ollama-manager.js +339 -0
- package/dist/services/personality.js +20 -14
- package/dist/services/session.js +21 -3
- package/dist/services/subagent-delivery.js +111 -0
- package/dist/services/subagents.js +341 -27
- package/dist/services/telegram.js +28 -1
- package/dist/services/updater.js +158 -0
- package/dist/services/usage-tracker.js +11 -4
- package/dist/services/users.js +2 -1
- package/dist/tui/index.js +36 -30
- package/docs/HANDBOOK.md +819 -0
- package/package.json +7 -2
- package/test/claude-sdk-provider.test.ts +69 -0
- package/test/i18n.test.ts +108 -0
- package/test/registry.test.ts +201 -0
- package/test/subagent-delivery.test.ts +169 -0
- package/test/subagents-commands.test.ts +64 -0
- package/test/subagents-config.test.ts +108 -0
- package/test/subagents-depth.test.ts +58 -0
- package/test/subagents-inheritance.test.ts +67 -0
- package/test/subagents-name-resolver.test.ts +122 -0
- package/test/subagents-priority-reject.test.ts +60 -0
- package/test/subagents-shutdown.test.ts +126 -0
- package/test/subagents-toolset.test.ts +51 -0
- package/vitest.config.ts +17 -0
|
@@ -10,7 +10,20 @@ import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
|
10
10
|
import { readFileSync } from "fs";
|
|
11
11
|
import { resolve, dirname } from "path";
|
|
12
12
|
import { fileURLToPath } from "url";
|
|
13
|
+
import { execFile } from "child_process";
|
|
14
|
+
import { promisify } from "util";
|
|
13
15
|
import { findClaudeBinary } from "../find-claude-binary.js";
|
|
16
|
+
const execFileAsync = promisify(execFile);
|
|
17
|
+
/**
|
|
18
|
+
* Detects the Claude CLI "Not logged in" error message. The CLI emits this
|
|
19
|
+
* as normal assistant text when no valid OAuth token is present, so we have
|
|
20
|
+
* to treat that output as an error in the SDK path too.
|
|
21
|
+
*/
|
|
22
|
+
export function isAuthErrorOutput(text) {
|
|
23
|
+
if (!text)
|
|
24
|
+
return false;
|
|
25
|
+
return /^\s*not logged in\b/i.test(text);
|
|
26
|
+
}
|
|
14
27
|
const BOT_PROJECT_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "../..");
|
|
15
28
|
// Load CLAUDE.md once at startup
|
|
16
29
|
let botClaudeMd = "";
|
|
@@ -26,6 +39,11 @@ const CHECKPOINT_TOOL_THRESHOLD = 15;
|
|
|
26
39
|
const CHECKPOINT_MSG_THRESHOLD = 10;
|
|
27
40
|
export class ClaudeSDKProvider {
|
|
28
41
|
config;
|
|
42
|
+
// Cache the availability check: execFile on every user message would block
|
|
43
|
+
// the bot for ~0-5s each time. A 60s cache is safe — the CLI binary does
|
|
44
|
+
// not disappear mid-session.
|
|
45
|
+
availabilityCache = null;
|
|
46
|
+
static AVAILABILITY_CACHE_MS = 60_000;
|
|
29
47
|
constructor(config) {
|
|
30
48
|
this.config = {
|
|
31
49
|
type: "claude-sdk",
|
|
@@ -46,9 +64,23 @@ export class ClaudeSDKProvider {
|
|
|
46
64
|
let prompt = options.prompt;
|
|
47
65
|
const sessionState = options._sessionState;
|
|
48
66
|
if (sessionState) {
|
|
49
|
-
|
|
67
|
+
// Checkpoint reminder injection with COOLDOWN.
|
|
68
|
+
//
|
|
69
|
+
// Old behaviour: once either threshold was crossed, the hint got
|
|
70
|
+
// prepended to EVERY subsequent turn's prompt. That forced Claude
|
|
71
|
+
// to detour through memory-file reads/writes on every single turn,
|
|
72
|
+
// which bloated turn latency in long sessions and was a major
|
|
73
|
+
// contributor to the 5-minute hard timeout firing.
|
|
74
|
+
//
|
|
75
|
+
// New behaviour: inject only every CHECKPOINT_REMINDER_EVERY turns
|
|
76
|
+
// after the threshold is reached. At messageCount 10 → injected,
|
|
77
|
+
// 11/12/13/14 → skipped, 15 → injected again, etc. 80% reduction
|
|
78
|
+
// in per-turn overhead while still giving Claude periodic reminders.
|
|
79
|
+
const CHECKPOINT_REMINDER_EVERY = 5;
|
|
80
|
+
const overThreshold = sessionState.toolUseCount >= CHECKPOINT_TOOL_THRESHOLD ||
|
|
50
81
|
sessionState.messageCount >= CHECKPOINT_MSG_THRESHOLD;
|
|
51
|
-
|
|
82
|
+
const onCooldownBeat = sessionState.messageCount % CHECKPOINT_REMINDER_EVERY === 0;
|
|
83
|
+
if (overThreshold && onCooldownBeat) {
|
|
52
84
|
prompt = `[CHECKPOINT] Du hast bereits ${sessionState.toolUseCount} Tool-Aufrufe und ${sessionState.messageCount} Nachrichten in dieser Session. Schreibe jetzt einen Checkpoint in deine Memory-Datei (docs/memory/YYYY-MM-DD.md) bevor du diese Anfrage bearbeitest.\n\n${prompt}`;
|
|
53
85
|
}
|
|
54
86
|
}
|
|
@@ -56,15 +88,26 @@ export class ClaudeSDKProvider {
|
|
|
56
88
|
const systemPrompt = options.systemPrompt
|
|
57
89
|
? `${options.systemPrompt}\n\n${botClaudeMd}`
|
|
58
90
|
: botClaudeMd;
|
|
91
|
+
// Build a real AbortController the SDK can call .abort() on.
|
|
92
|
+
// The previous implementation cast a plain {signal} object to AbortController,
|
|
93
|
+
// which broke SDK-internal cancellation and left orphan subprocesses.
|
|
94
|
+
let internalAbortController;
|
|
95
|
+
if (options.abortSignal) {
|
|
96
|
+
internalAbortController = new AbortController();
|
|
97
|
+
if (options.abortSignal.aborted) {
|
|
98
|
+
internalAbortController.abort();
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
options.abortSignal.addEventListener("abort", () => internalAbortController?.abort(), { once: true });
|
|
102
|
+
}
|
|
103
|
+
}
|
|
59
104
|
try {
|
|
60
105
|
const claudePath = findClaudeBinary();
|
|
61
106
|
const q = query({
|
|
62
107
|
prompt,
|
|
63
108
|
options: {
|
|
64
109
|
cwd: options.workingDir || process.cwd(),
|
|
65
|
-
abortController:
|
|
66
|
-
? { signal: options.abortSignal }
|
|
67
|
-
: undefined,
|
|
110
|
+
abortController: internalAbortController,
|
|
68
111
|
resume: options.sessionId ?? undefined,
|
|
69
112
|
pathToClaudeCodeExecutable: claudePath,
|
|
70
113
|
permissionMode: "bypassPermissions",
|
|
@@ -76,7 +119,7 @@ export class ClaudeSDKProvider {
|
|
|
76
119
|
"WebSearch", "WebFetch", "Task",
|
|
77
120
|
],
|
|
78
121
|
systemPrompt,
|
|
79
|
-
effort: (options.effort || "
|
|
122
|
+
effort: (options.effort || "medium"),
|
|
80
123
|
maxTurns: 50,
|
|
81
124
|
betas: ["context-1m-2025-08-07"],
|
|
82
125
|
},
|
|
@@ -97,6 +140,17 @@ export class ClaudeSDKProvider {
|
|
|
97
140
|
if (assistantMsg.message?.content) {
|
|
98
141
|
for (const block of assistantMsg.message.content) {
|
|
99
142
|
if ("text" in block && block.text) {
|
|
143
|
+
// Guard against "Not logged in" leaking as assistant text.
|
|
144
|
+
// If the very first text chunk matches the CLI auth-error
|
|
145
|
+
// pattern, surface it as an error chunk instead of rendering
|
|
146
|
+
// it as a normal response.
|
|
147
|
+
if (!accumulatedText && isAuthErrorOutput(block.text)) {
|
|
148
|
+
yield {
|
|
149
|
+
type: "error",
|
|
150
|
+
error: "Claude CLI is not logged in. Run `claude login` on this machine.",
|
|
151
|
+
};
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
100
154
|
accumulatedText += block.text;
|
|
101
155
|
yield {
|
|
102
156
|
type: "text",
|
|
@@ -107,9 +161,25 @@ export class ClaudeSDKProvider {
|
|
|
107
161
|
}
|
|
108
162
|
if ("name" in block) {
|
|
109
163
|
localToolUseCount++;
|
|
164
|
+
// Serialise the tool input (parameters) so the message
|
|
165
|
+
// handler can surface detail for specific tools — most
|
|
166
|
+
// importantly the "Task" tool where `input.description`
|
|
167
|
+
// describes what sub-task Claude is delegating.
|
|
168
|
+
let toolInputStr;
|
|
169
|
+
if ("input" in block && block.input !== undefined) {
|
|
170
|
+
try {
|
|
171
|
+
const raw = JSON.stringify(block.input);
|
|
172
|
+
// cap at 500 chars to keep status lines manageable
|
|
173
|
+
toolInputStr = raw.length > 500 ? raw.slice(0, 500) + "…" : raw;
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
// unserializable — skip
|
|
177
|
+
}
|
|
178
|
+
}
|
|
110
179
|
yield {
|
|
111
180
|
type: "tool_use",
|
|
112
181
|
toolName: block.name,
|
|
182
|
+
toolInput: toolInputStr,
|
|
113
183
|
sessionId: capturedSessionId,
|
|
114
184
|
};
|
|
115
185
|
}
|
|
@@ -148,19 +218,41 @@ export class ClaudeSDKProvider {
|
|
|
148
218
|
}
|
|
149
219
|
}
|
|
150
220
|
async isAvailable() {
|
|
151
|
-
//
|
|
152
|
-
//
|
|
153
|
-
//
|
|
221
|
+
// Cached availability check. The previous implementation called execSync
|
|
222
|
+
// on every user message, blocking the Node event loop for up to 5s per
|
|
223
|
+
// query. We now use async execFile and cache the result for 60s.
|
|
224
|
+
const now = Date.now();
|
|
225
|
+
if (this.availabilityCache && this.availabilityCache.expiresAt > now) {
|
|
226
|
+
return this.availabilityCache.result;
|
|
227
|
+
}
|
|
228
|
+
const cache = (result) => {
|
|
229
|
+
this.availabilityCache = {
|
|
230
|
+
result,
|
|
231
|
+
expiresAt: now + ClaudeSDKProvider.AVAILABILITY_CACHE_MS,
|
|
232
|
+
};
|
|
233
|
+
return result;
|
|
234
|
+
};
|
|
154
235
|
try {
|
|
155
236
|
const claudePath = findClaudeBinary();
|
|
156
237
|
if (!claudePath)
|
|
157
|
-
return false;
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
238
|
+
return cache(false);
|
|
239
|
+
// Step 1: binary exists?
|
|
240
|
+
// Async execFile doesn't block the event loop. 5s timeout kills
|
|
241
|
+
// runaway probes without hanging the bot.
|
|
242
|
+
await execFileAsync(claudePath, ["--version"], { timeout: 5000 });
|
|
243
|
+
// Step 2: actually authenticated? The Claude Agent SDK shares the
|
|
244
|
+
// same OAuth token as the CLI — if `claude -p` says "Not logged in",
|
|
245
|
+
// the SDK will fail too. Probe with a trivial -p call and surface
|
|
246
|
+
// the failure before the registry hands a request to a broken
|
|
247
|
+
// provider.
|
|
248
|
+
const { stdout } = await execFileAsync(claudePath, ["-p", "ping", "--output-format", "text"], { timeout: 10000 });
|
|
249
|
+
if (isAuthErrorOutput(stdout)) {
|
|
250
|
+
return cache(false);
|
|
251
|
+
}
|
|
252
|
+
return cache(true);
|
|
161
253
|
}
|
|
162
254
|
catch {
|
|
163
|
-
return false;
|
|
255
|
+
return cache(false);
|
|
164
256
|
}
|
|
165
257
|
}
|
|
166
258
|
getInfo() {
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ollama Provider — OpenAI-compatible chat-completions with an on-demand
|
|
3
|
+
* daemon lifecycle.
|
|
4
|
+
*
|
|
5
|
+
* Inherits all the request/response handling (streaming, tool-calling,
|
|
6
|
+
* rate-limit extraction, vision, …) from OpenAICompatibleProvider. Only
|
|
7
|
+
* adds the `lifecycle` field so the rest of the bot (heartbeat, /model
|
|
8
|
+
* switch, /status, shutdown) can manage the local daemon generically
|
|
9
|
+
* without any hardcoded "ollama" string-matching.
|
|
10
|
+
*
|
|
11
|
+
* When the architecture needs another local runner (LM Studio, llama.cpp,
|
|
12
|
+
* vLLM, Jan.ai, …), the pattern is the same: subclass
|
|
13
|
+
* OpenAICompatibleProvider, assign a `lifecycle` with its own
|
|
14
|
+
* ensureRunning/ensureStopped/isRunning/isBotManaged implementation.
|
|
15
|
+
*/
|
|
16
|
+
import { OpenAICompatibleProvider } from "./openai-compatible.js";
|
|
17
|
+
import { ensureRunning as managerEnsureRunning, ensureStopped as managerEnsureStopped, isDaemonRunning as managerIsDaemonRunning, isBotManaged as managerIsBotManaged, } from "../services/ollama-manager.js";
|
|
18
|
+
export class OllamaProvider extends OpenAICompatibleProvider {
|
|
19
|
+
lifecycle;
|
|
20
|
+
constructor(config) {
|
|
21
|
+
super(config);
|
|
22
|
+
// Capture the model name at construction time so the lifecycle closures
|
|
23
|
+
// don't need to reach into this.config on every call.
|
|
24
|
+
const modelName = config.model;
|
|
25
|
+
this.lifecycle = {
|
|
26
|
+
ensureRunning: () => managerEnsureRunning(modelName),
|
|
27
|
+
ensureStopped: () => managerEnsureStopped(),
|
|
28
|
+
isRunning: () => managerIsDaemonRunning(),
|
|
29
|
+
isBotManaged: () => managerIsBotManaged(),
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -286,10 +286,19 @@ export class OpenAICompatibleProvider {
|
|
|
286
286
|
}
|
|
287
287
|
getInfo() {
|
|
288
288
|
const tools = this.supportsToolUse() ? " 🔧" : "";
|
|
289
|
+
// Local runners (Ollama, LM Studio, …) don't use API keys. Report their
|
|
290
|
+
// status based on whether the local endpoint is reachable at startup,
|
|
291
|
+
// not based on the missing apiKey field which is semantically irrelevant
|
|
292
|
+
// for loopback endpoints.
|
|
293
|
+
const isLocal = this.config.baseUrl?.includes("localhost")
|
|
294
|
+
|| this.config.baseUrl?.includes("127.0.0.1");
|
|
295
|
+
const status = isLocal
|
|
296
|
+
? "💤 on-demand (local)"
|
|
297
|
+
: (this.config.apiKey ? "✅ configured" : "❌ no API key");
|
|
289
298
|
return {
|
|
290
299
|
name: this.config.name + tools,
|
|
291
300
|
model: this.config.model,
|
|
292
|
-
status
|
|
301
|
+
status,
|
|
293
302
|
};
|
|
294
303
|
}
|
|
295
304
|
// ── Rate Limit Extraction ───────────────────────────────────────────────
|
|
@@ -9,7 +9,20 @@
|
|
|
9
9
|
import { ClaudeSDKProvider } from "./claude-sdk-provider.js";
|
|
10
10
|
import { CodexCLIProvider } from "./codex-cli-provider.js";
|
|
11
11
|
import { OpenAICompatibleProvider } from "./openai-compatible.js";
|
|
12
|
+
import { OllamaProvider } from "./ollama-provider.js";
|
|
12
13
|
import { PROVIDER_PRESETS } from "./types.js";
|
|
14
|
+
import { t } from "../i18n.js";
|
|
15
|
+
/**
|
|
16
|
+
* Identify an Ollama endpoint by its baseUrl rather than by a hardcoded
|
|
17
|
+
* provider key. This lets users define aliases (e.g. `my-ollama`,
|
|
18
|
+
* `ollama-local`) in FALLBACK_PROVIDERS or custom-models.json and still
|
|
19
|
+
* get the on-demand lifecycle behaviour automatically.
|
|
20
|
+
*/
|
|
21
|
+
function isOllamaEndpoint(baseUrl) {
|
|
22
|
+
if (!baseUrl)
|
|
23
|
+
return false;
|
|
24
|
+
return baseUrl.includes("localhost:11434") || baseUrl.includes("127.0.0.1:11434");
|
|
25
|
+
}
|
|
13
26
|
export class ProviderRegistry {
|
|
14
27
|
providers = new Map();
|
|
15
28
|
primaryKey;
|
|
@@ -80,6 +93,21 @@ export class ProviderRegistry {
|
|
|
80
93
|
/**
|
|
81
94
|
* Query with automatic fallback.
|
|
82
95
|
* Tries the active provider first, then fallbacks in order.
|
|
96
|
+
*
|
|
97
|
+
* Two invariants beyond the obvious chain-walk:
|
|
98
|
+
*
|
|
99
|
+
* 1. Lifecycle-managed providers (local runners like Ollama) get booted
|
|
100
|
+
* on-demand if they're not already running. Without this, a
|
|
101
|
+
* mid-session Claude failure would silently skip Ollama because its
|
|
102
|
+
* daemon isn't awake yet — the heartbeat's 5-minute cadence can't
|
|
103
|
+
* react fast enough to save an in-flight user request.
|
|
104
|
+
*
|
|
105
|
+
* 2. If the active provider has already emitted text to the user and
|
|
106
|
+
* then errors out mid-stream, we do NOT silently failover to the
|
|
107
|
+
* next provider. Chaining a second model underneath a half-finished
|
|
108
|
+
* Claude response is more confusing than surfacing a clear error
|
|
109
|
+
* and asking the user to retry. The failover is only silent when
|
|
110
|
+
* the failing provider hadn't committed any visible text yet.
|
|
83
111
|
*/
|
|
84
112
|
async *queryWithFallback(options) {
|
|
85
113
|
const chain = [this.activeKey, ...this.fallbackKeys.filter(k => k !== this.activeKey)];
|
|
@@ -88,35 +116,97 @@ export class ProviderRegistry {
|
|
|
88
116
|
const provider = this.providers.get(key);
|
|
89
117
|
if (!provider)
|
|
90
118
|
continue;
|
|
91
|
-
// Check availability
|
|
92
|
-
|
|
119
|
+
// Check availability. For lifecycle-managed providers (Ollama et al.)
|
|
120
|
+
// that are currently asleep, actively try to boot them before giving up.
|
|
121
|
+
let available = await provider.isAvailable().catch(() => false);
|
|
122
|
+
if (!available && provider.lifecycle) {
|
|
123
|
+
console.log(`Provider "${key}" asleep — booting on-demand…`);
|
|
124
|
+
const booted = await provider.lifecycle.ensureRunning().catch(() => false);
|
|
125
|
+
if (booted) {
|
|
126
|
+
available = await provider.isAvailable().catch(() => false);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
93
129
|
if (!available) {
|
|
94
130
|
console.log(`Provider "${key}" not available, trying next...`);
|
|
95
131
|
errors.push({ key, error: "not available (check auth/config)" });
|
|
96
132
|
continue;
|
|
97
133
|
}
|
|
134
|
+
// ─── Query with silent retry for transient mid-stream aborts ─────
|
|
135
|
+
// Anthropic occasionally drops streams (network hiccup, server-side
|
|
136
|
+
// flap, rate-limit blip). Rather than surfacing the error on the
|
|
137
|
+
// first failure, we retry the SAME provider once with a short delay.
|
|
138
|
+
// Only mid-stream abort-shaped errors trigger the retry — pre-stream
|
|
139
|
+
// failures and user cancels go straight to the fallback / error path.
|
|
140
|
+
const MAX_ATTEMPTS = 2;
|
|
141
|
+
const RETRY_DELAY_MS = 2_000;
|
|
142
|
+
let attempts = 0;
|
|
98
143
|
let hadError = false;
|
|
99
144
|
let lastError = "";
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
145
|
+
let hadVisibleText = false;
|
|
146
|
+
while (attempts < MAX_ATTEMPTS) {
|
|
147
|
+
attempts++;
|
|
148
|
+
hadError = false;
|
|
149
|
+
lastError = "";
|
|
150
|
+
hadVisibleText = false;
|
|
151
|
+
try {
|
|
152
|
+
for await (const chunk of provider.query(options)) {
|
|
153
|
+
if (chunk.type === "error") {
|
|
154
|
+
hadError = true;
|
|
155
|
+
lastError = chunk.error || "Unknown error";
|
|
156
|
+
break;
|
|
157
|
+
}
|
|
158
|
+
if (chunk.type === "text" && chunk.text && chunk.text.length > 0) {
|
|
159
|
+
hadVisibleText = true;
|
|
160
|
+
}
|
|
161
|
+
yield chunk;
|
|
162
|
+
if (chunk.type === "done")
|
|
163
|
+
return;
|
|
106
164
|
}
|
|
107
|
-
yield chunk;
|
|
108
|
-
if (chunk.type === "done")
|
|
109
|
-
return;
|
|
110
165
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
166
|
+
catch (err) {
|
|
167
|
+
hadError = true;
|
|
168
|
+
lastError = err instanceof Error ? err.message : String(err);
|
|
169
|
+
}
|
|
170
|
+
if (!hadError) {
|
|
171
|
+
// Loop ended naturally without a done — unusual, fall through.
|
|
172
|
+
break;
|
|
173
|
+
}
|
|
174
|
+
// Retry eligibility:
|
|
175
|
+
// - mid-stream (had visible text before error)
|
|
176
|
+
// - not a user-initiated cancel (abortSignal is externally fired)
|
|
177
|
+
// - error looks transient (contains "abort")
|
|
178
|
+
// - still have attempts left
|
|
179
|
+
const isUserAbort = options.abortSignal?.aborted === true;
|
|
180
|
+
const isTransientLooking = lastError.toLowerCase().includes("abort");
|
|
181
|
+
const shouldRetry = hadVisibleText
|
|
182
|
+
&& attempts < MAX_ATTEMPTS
|
|
183
|
+
&& !isUserAbort
|
|
184
|
+
&& isTransientLooking;
|
|
185
|
+
if (!shouldRetry)
|
|
186
|
+
break;
|
|
187
|
+
console.log(`Provider "${key}" mid-stream abort (attempt ${attempts}/${MAX_ATTEMPTS}) — retrying in ${RETRY_DELAY_MS}ms: ${lastError}`);
|
|
188
|
+
await new Promise(resolve => setTimeout(resolve, RETRY_DELAY_MS));
|
|
189
|
+
// If the user cancelled during the delay, bail before the next attempt.
|
|
190
|
+
if (options.abortSignal?.aborted === true)
|
|
191
|
+
break;
|
|
115
192
|
}
|
|
116
193
|
if (hadError) {
|
|
117
|
-
console.log(`Provider "${key}" failed: ${lastError}. Trying next
|
|
194
|
+
console.log(`Provider "${key}" failed: ${lastError}. ${hadVisibleText ? "Mid-stream — surfacing error." : "Trying next..."}`);
|
|
118
195
|
errors.push({ key, error: lastError });
|
|
119
|
-
//
|
|
196
|
+
// Mid-stream failure: the user already has partial text on screen.
|
|
197
|
+
// Yield a terminal error instead of switching to a different model
|
|
198
|
+
// that would write a second, unrelated response underneath.
|
|
199
|
+
if (hadVisibleText) {
|
|
200
|
+
yield {
|
|
201
|
+
type: "error",
|
|
202
|
+
error: t("bot.error.midStream", options.locale, {
|
|
203
|
+
name: provider.getInfo().name,
|
|
204
|
+
detail: lastError,
|
|
205
|
+
}),
|
|
206
|
+
};
|
|
207
|
+
return;
|
|
208
|
+
}
|
|
209
|
+
// Pre-stream failure: safe to silently switch to the next provider.
|
|
120
210
|
const nextIdx = chain.indexOf(key) + 1;
|
|
121
211
|
if (nextIdx < chain.length) {
|
|
122
212
|
const nextProvider = this.providers.get(chain[nextIdx]);
|
|
@@ -155,6 +245,11 @@ export class ProviderRegistry {
|
|
|
155
245
|
case "codex-cli":
|
|
156
246
|
return new CodexCLIProvider(config);
|
|
157
247
|
case "openai-compatible":
|
|
248
|
+
// Local runners that happen to speak the OpenAI-compat protocol
|
|
249
|
+
// get a subclass that layers on-demand lifecycle management.
|
|
250
|
+
if (isOllamaEndpoint(config.baseUrl)) {
|
|
251
|
+
return new OllamaProvider(config);
|
|
252
|
+
}
|
|
158
253
|
return new OpenAICompatibleProvider(config);
|
|
159
254
|
default:
|
|
160
255
|
throw new Error(`Unknown provider type: ${config.type}`);
|
package/dist/providers/types.js
CHANGED
|
@@ -14,14 +14,16 @@ export const PROVIDER_PRESETS = {
|
|
|
14
14
|
supportsTools: true,
|
|
15
15
|
supportsVision: false,
|
|
16
16
|
supportsStreaming: true,
|
|
17
|
+
contextWindow: 400_000,
|
|
17
18
|
},
|
|
18
|
-
// Anthropic (via Agent SDK — full tool use)
|
|
19
|
+
// Anthropic (via Agent SDK — full tool use, 1M-context beta enabled)
|
|
19
20
|
"claude-sdk": {
|
|
20
21
|
type: "claude-sdk",
|
|
21
22
|
name: "Claude (Agent SDK)",
|
|
22
23
|
supportsTools: true,
|
|
23
24
|
supportsVision: true,
|
|
24
25
|
supportsStreaming: true,
|
|
26
|
+
contextWindow: 1_000_000,
|
|
25
27
|
},
|
|
26
28
|
// Anthropic API (via OpenAI-compatible endpoint — no Agent SDK needed)
|
|
27
29
|
"claude-opus": {
|
|
@@ -32,6 +34,7 @@ export const PROVIDER_PRESETS = {
|
|
|
32
34
|
supportsVision: true,
|
|
33
35
|
supportsStreaming: true,
|
|
34
36
|
supportsTools: true,
|
|
37
|
+
contextWindow: 200_000,
|
|
35
38
|
},
|
|
36
39
|
"claude-sonnet": {
|
|
37
40
|
type: "openai-compatible",
|
|
@@ -41,6 +44,7 @@ export const PROVIDER_PRESETS = {
|
|
|
41
44
|
supportsVision: true,
|
|
42
45
|
supportsStreaming: true,
|
|
43
46
|
supportsTools: true,
|
|
47
|
+
contextWindow: 200_000,
|
|
44
48
|
},
|
|
45
49
|
"claude-haiku": {
|
|
46
50
|
type: "openai-compatible",
|
|
@@ -50,6 +54,7 @@ export const PROVIDER_PRESETS = {
|
|
|
50
54
|
supportsVision: true,
|
|
51
55
|
supportsStreaming: true,
|
|
52
56
|
supportsTools: true,
|
|
57
|
+
contextWindow: 200_000,
|
|
53
58
|
},
|
|
54
59
|
// Groq (fast inference, free tier, supports function calling)
|
|
55
60
|
"groq": {
|
|
@@ -60,6 +65,7 @@ export const PROVIDER_PRESETS = {
|
|
|
60
65
|
supportsVision: false,
|
|
61
66
|
supportsStreaming: true,
|
|
62
67
|
supportsTools: true,
|
|
68
|
+
contextWindow: 128_000,
|
|
63
69
|
},
|
|
64
70
|
// OpenAI (supports function calling)
|
|
65
71
|
"gpt-4o": {
|
|
@@ -70,6 +76,7 @@ export const PROVIDER_PRESETS = {
|
|
|
70
76
|
supportsVision: true,
|
|
71
77
|
supportsStreaming: true,
|
|
72
78
|
supportsTools: true,
|
|
79
|
+
contextWindow: 128_000,
|
|
73
80
|
},
|
|
74
81
|
"gpt-4o-mini": {
|
|
75
82
|
type: "openai-compatible",
|
|
@@ -79,6 +86,7 @@ export const PROVIDER_PRESETS = {
|
|
|
79
86
|
supportsVision: true,
|
|
80
87
|
supportsStreaming: true,
|
|
81
88
|
supportsTools: true,
|
|
89
|
+
contextWindow: 128_000,
|
|
82
90
|
},
|
|
83
91
|
// Google Gemini (via OpenAI-compatible endpoint, supports function calling)
|
|
84
92
|
"gemini-2.5-pro": {
|
|
@@ -89,6 +97,7 @@ export const PROVIDER_PRESETS = {
|
|
|
89
97
|
supportsVision: true,
|
|
90
98
|
supportsStreaming: true,
|
|
91
99
|
supportsTools: true,
|
|
100
|
+
contextWindow: 1_000_000,
|
|
92
101
|
},
|
|
93
102
|
"gemini-2.5-flash": {
|
|
94
103
|
type: "openai-compatible",
|
|
@@ -98,6 +107,7 @@ export const PROVIDER_PRESETS = {
|
|
|
98
107
|
supportsVision: true,
|
|
99
108
|
supportsStreaming: true,
|
|
100
109
|
supportsTools: true,
|
|
110
|
+
contextWindow: 1_000_000,
|
|
101
111
|
},
|
|
102
112
|
"gemini-3-pro": {
|
|
103
113
|
type: "openai-compatible",
|
|
@@ -107,6 +117,7 @@ export const PROVIDER_PRESETS = {
|
|
|
107
117
|
supportsVision: true,
|
|
108
118
|
supportsStreaming: true,
|
|
109
119
|
supportsTools: true,
|
|
120
|
+
contextWindow: 2_000_000,
|
|
110
121
|
},
|
|
111
122
|
"gemini-3-flash": {
|
|
112
123
|
type: "openai-compatible",
|
|
@@ -116,6 +127,7 @@ export const PROVIDER_PRESETS = {
|
|
|
116
127
|
supportsVision: true,
|
|
117
128
|
supportsStreaming: true,
|
|
118
129
|
supportsTools: true,
|
|
130
|
+
contextWindow: 1_000_000,
|
|
119
131
|
},
|
|
120
132
|
// OpenAI newer models
|
|
121
133
|
"gpt-4.1": {
|
|
@@ -126,6 +138,7 @@ export const PROVIDER_PRESETS = {
|
|
|
126
138
|
supportsVision: true,
|
|
127
139
|
supportsStreaming: true,
|
|
128
140
|
supportsTools: true,
|
|
141
|
+
contextWindow: 1_000_000,
|
|
129
142
|
},
|
|
130
143
|
"gpt-4.1-mini": {
|
|
131
144
|
type: "openai-compatible",
|
|
@@ -135,6 +148,7 @@ export const PROVIDER_PRESETS = {
|
|
|
135
148
|
supportsVision: true,
|
|
136
149
|
supportsStreaming: true,
|
|
137
150
|
supportsTools: true,
|
|
151
|
+
contextWindow: 1_000_000,
|
|
138
152
|
},
|
|
139
153
|
"o3-mini": {
|
|
140
154
|
type: "openai-compatible",
|
|
@@ -144,6 +158,7 @@ export const PROVIDER_PRESETS = {
|
|
|
144
158
|
supportsVision: false,
|
|
145
159
|
supportsStreaming: true,
|
|
146
160
|
supportsTools: true,
|
|
161
|
+
contextWindow: 200_000,
|
|
147
162
|
},
|
|
148
163
|
// Groq additional models
|
|
149
164
|
"groq-llama-3.1-8b": {
|
|
@@ -154,6 +169,7 @@ export const PROVIDER_PRESETS = {
|
|
|
154
169
|
supportsVision: false,
|
|
155
170
|
supportsStreaming: true,
|
|
156
171
|
supportsTools: true,
|
|
172
|
+
contextWindow: 128_000,
|
|
157
173
|
},
|
|
158
174
|
"groq-mixtral": {
|
|
159
175
|
type: "openai-compatible",
|
|
@@ -163,6 +179,7 @@ export const PROVIDER_PRESETS = {
|
|
|
163
179
|
supportsVision: false,
|
|
164
180
|
supportsStreaming: true,
|
|
165
181
|
supportsTools: true,
|
|
182
|
+
contextWindow: 32_768,
|
|
166
183
|
},
|
|
167
184
|
// NVIDIA NIM (150+ free models)
|
|
168
185
|
"nvidia-llama-3.3-70b": {
|
|
@@ -173,6 +190,7 @@ export const PROVIDER_PRESETS = {
|
|
|
173
190
|
supportsVision: false,
|
|
174
191
|
supportsStreaming: true,
|
|
175
192
|
supportsTools: true,
|
|
193
|
+
contextWindow: 128_000,
|
|
176
194
|
},
|
|
177
195
|
"nvidia-kimi-k2.5": {
|
|
178
196
|
type: "openai-compatible",
|
|
@@ -182,8 +200,9 @@ export const PROVIDER_PRESETS = {
|
|
|
182
200
|
supportsVision: true,
|
|
183
201
|
supportsStreaming: true,
|
|
184
202
|
supportsTools: true,
|
|
203
|
+
contextWindow: 200_000,
|
|
185
204
|
},
|
|
186
|
-
// Ollama (local models)
|
|
205
|
+
// Ollama (local models) — Gemma 4 E4B has an 8k context window
|
|
187
206
|
"ollama": {
|
|
188
207
|
type: "openai-compatible",
|
|
189
208
|
name: "Gemma 4 E4B (Ollama)",
|
|
@@ -191,8 +210,10 @@ export const PROVIDER_PRESETS = {
|
|
|
191
210
|
baseUrl: "http://localhost:11434/v1",
|
|
192
211
|
supportsVision: true,
|
|
193
212
|
supportsStreaming: true,
|
|
213
|
+
contextWindow: 8_192,
|
|
194
214
|
},
|
|
195
|
-
// OpenRouter (any model, one API, supports function calling)
|
|
215
|
+
// OpenRouter (any model, one API, supports function calling).
|
|
216
|
+
// Context window varies by model — default 200k is a middle-ground guess.
|
|
196
217
|
"openrouter": {
|
|
197
218
|
type: "openai-compatible",
|
|
198
219
|
name: "OpenRouter",
|
|
@@ -201,5 +222,6 @@ export const PROVIDER_PRESETS = {
|
|
|
201
222
|
supportsVision: true,
|
|
202
223
|
supportsStreaming: true,
|
|
203
224
|
supportsTools: true,
|
|
225
|
+
contextWindow: 200_000,
|
|
204
226
|
},
|
|
205
227
|
};
|
|
@@ -96,6 +96,8 @@ export async function compactSession(session) {
|
|
|
96
96
|
session.history = [summaryMessage, ...fallbackKeep];
|
|
97
97
|
}
|
|
98
98
|
const summaryTokens = Math.ceil(summaryMessage.content.length / 4); // rough estimate
|
|
99
|
+
// Track how many compactions this session has seen, for /status telemetry
|
|
100
|
+
session.compactionCount = (session.compactionCount || 0) + 1;
|
|
99
101
|
return {
|
|
100
102
|
removedEntries,
|
|
101
103
|
summaryTokens,
|