alvin-bot 4.5.0 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +150 -0
- package/README.md +25 -2
- package/alvin-bot-4.5.1.tgz +0 -0
- package/bin/cli.js +246 -0
- package/dist/handlers/commands.js +461 -63
- package/dist/handlers/message.js +209 -14
- package/dist/i18n.js +470 -13
- package/dist/index.js +44 -5
- package/dist/providers/claude-sdk-provider.js +106 -14
- package/dist/providers/ollama-provider.js +32 -0
- package/dist/providers/openai-compatible.js +10 -1
- package/dist/providers/registry.js +112 -17
- package/dist/providers/types.js +25 -3
- package/dist/services/compaction.js +2 -0
- package/dist/services/cron.js +53 -42
- package/dist/services/heartbeat.js +41 -7
- package/dist/services/language-detect.js +12 -2
- package/dist/services/ollama-manager.js +339 -0
- package/dist/services/personality.js +20 -14
- package/dist/services/session.js +21 -3
- package/dist/services/subagent-delivery.js +111 -0
- package/dist/services/subagents.js +341 -27
- package/dist/services/telegram.js +28 -1
- package/dist/services/updater.js +158 -0
- package/dist/services/usage-tracker.js +11 -4
- package/dist/services/users.js +2 -1
- package/dist/tui/index.js +36 -30
- package/docs/HANDBOOK.md +819 -0
- package/package.json +7 -2
- package/test/claude-sdk-provider.test.ts +69 -0
- package/test/i18n.test.ts +108 -0
- package/test/registry.test.ts +201 -0
- package/test/subagent-delivery.test.ts +169 -0
- package/test/subagents-commands.test.ts +64 -0
- package/test/subagents-config.test.ts +108 -0
- package/test/subagents-depth.test.ts +58 -0
- package/test/subagents-inheritance.test.ts +67 -0
- package/test/subagents-name-resolver.test.ts +122 -0
- package/test/subagents-priority-reject.test.ts +60 -0
- package/test/subagents-shutdown.test.ts +126 -0
- package/test/subagents-toolset.test.ts +51 -0
- package/vitest.config.ts +17 -0
package/dist/handlers/message.js
CHANGED
|
@@ -13,6 +13,87 @@ import { shouldCompact, compactSession } from "../services/compaction.js";
|
|
|
13
13
|
import { emit } from "../services/hooks.js";
|
|
14
14
|
import { trackUsage } from "../services/usage-tracker.js";
|
|
15
15
|
import { emitUserMessage as broadcastUserMessage, emitResponseStart as broadcastResponseStart, emitResponseDelta as broadcastResponseDelta, emitResponseDone as broadcastResponseDone, } from "../services/broadcast.js";
|
|
16
|
+
import { t } from "../i18n.js";
|
|
17
|
+
/**
|
|
18
|
+
* Stuck-only timeout — NO absolute cap.
|
|
19
|
+
*
|
|
20
|
+
* Alvin is designed to work as long as it needs to, including overnight
|
|
21
|
+
* on multi-hour tasks. The ONLY condition under which we abort a running
|
|
22
|
+
* query is when Claude produces no chunks at all for STUCK_TIMEOUT_MINUTES
|
|
23
|
+
* — that's a genuine hang, not legitimate work. Every text chunk and
|
|
24
|
+
* tool_use chunk resets this timer, so an actively-progressing task will
|
|
25
|
+
* never be cut off regardless of total duration.
|
|
26
|
+
*
|
|
27
|
+
* Previous design had an additional 30-minute absolute cap that violated
|
|
28
|
+
* this "work as long as needed" character. Removed entirely — only the
|
|
29
|
+
* stuck detector remains.
|
|
30
|
+
*
|
|
31
|
+
* Configurable via ALVIN_STUCK_TIMEOUT_MINUTES env var. Default 10 minutes,
|
|
32
|
+
* which is generous for normal work (Claude typically streams chunks every
|
|
33
|
+
* few seconds) but still catches real deadlocks quickly.
|
|
34
|
+
*/
|
|
35
|
+
const STUCK_TIMEOUT_MINUTES = Number(process.env.ALVIN_STUCK_TIMEOUT_MINUTES) || 10;
|
|
36
|
+
const STUCK_TIMEOUT_MS = STUCK_TIMEOUT_MINUTES * 60 * 1000;
|
|
37
|
+
/** Checkpoint reminder thresholds — kept in sync with
|
|
38
|
+
* src/providers/claude-sdk-provider.ts (where the actual hint injection
|
|
39
|
+
* happens). We mirror the check here so the session telemetry knows
|
|
40
|
+
* when the SDK provider would have injected a reminder. */
|
|
41
|
+
const CHECKPOINT_TOOL_THRESHOLD = 15;
|
|
42
|
+
const CHECKPOINT_MSG_THRESHOLD = 10;
|
|
43
|
+
/** Maximum characters in the bridge-message preamble that gets prepended
|
|
44
|
+
* to the first post-recovery SDK query. Oldest gap-turns get truncated. */
|
|
45
|
+
const BRIDGE_MAX_CHARS = 2500;
|
|
46
|
+
/** Maximum characters per individual message in the bridge preamble. */
|
|
47
|
+
const BRIDGE_MSG_MAX_CHARS = 500;
|
|
48
|
+
/**
|
|
49
|
+
* Build a "catch-up" preamble summarising turns that happened while the
|
|
50
|
+
* SDK was not the active provider (i.e., during a failover to Ollama or
|
|
51
|
+
* a manual /model switch). This gets prepended to the first post-recovery
|
|
52
|
+
* prompt so the SDK sees what its alter-ego did.
|
|
53
|
+
*/
|
|
54
|
+
function buildBridgeMessage(fallbackTurns) {
|
|
55
|
+
if (fallbackTurns.length === 0)
|
|
56
|
+
return "";
|
|
57
|
+
const renderTurn = (m) => {
|
|
58
|
+
const label = m.role === "user" ? "User" : "Assistant (Fallback)";
|
|
59
|
+
const content = m.content.length > BRIDGE_MSG_MAX_CHARS
|
|
60
|
+
? m.content.slice(0, BRIDGE_MSG_MAX_CHARS) + "…"
|
|
61
|
+
: m.content;
|
|
62
|
+
return `${label}: ${content}`;
|
|
63
|
+
};
|
|
64
|
+
// Start with all turns rendered, then trim from the oldest if we exceed budget.
|
|
65
|
+
let lines = fallbackTurns.map(renderTurn);
|
|
66
|
+
let body = lines.join("\n\n");
|
|
67
|
+
let truncatedOldest = 0;
|
|
68
|
+
while (body.length > BRIDGE_MAX_CHARS && lines.length > 2) {
|
|
69
|
+
lines.shift();
|
|
70
|
+
truncatedOldest++;
|
|
71
|
+
body = lines.join("\n\n");
|
|
72
|
+
}
|
|
73
|
+
const omittedNote = truncatedOldest > 0
|
|
74
|
+
? `[…${truncatedOldest} older turn(s) omitted…]\n\n`
|
|
75
|
+
: "";
|
|
76
|
+
const count = fallbackTurns.length;
|
|
77
|
+
return (`[Context: While you (Claude) were briefly not the active provider, ` +
|
|
78
|
+
`the following ${count} message(s) were exchanged with a fallback model. ` +
|
|
79
|
+
`Catching you up:\n\n` +
|
|
80
|
+
omittedNote +
|
|
81
|
+
body +
|
|
82
|
+
`\n\n--- New message from user: ---]\n\n`);
|
|
83
|
+
}
|
|
84
|
+
/** Tool name → emoji. Used to render a status line while Alvin is running
|
|
85
|
+
* tools, so users see real progress instead of an endless typing indicator. */
|
|
86
|
+
const TOOL_ICONS = {
|
|
87
|
+
Read: "📖",
|
|
88
|
+
Write: "📝",
|
|
89
|
+
Edit: "✏️",
|
|
90
|
+
Bash: "⚡",
|
|
91
|
+
Glob: "🔍",
|
|
92
|
+
Grep: "🔎",
|
|
93
|
+
WebSearch: "🌐",
|
|
94
|
+
WebFetch: "📡",
|
|
95
|
+
Task: "🤖",
|
|
96
|
+
};
|
|
16
97
|
/** React to a message with an emoji. Silently fails if reactions aren't supported. */
|
|
17
98
|
async function react(ctx, emoji) {
|
|
18
99
|
try {
|
|
@@ -77,9 +158,25 @@ export async function handleMessage(ctx) {
|
|
|
77
158
|
session.abortController = new AbortController();
|
|
78
159
|
const streamer = new TelegramStreamer(ctx.chat.id, ctx.api, ctx.message?.message_id);
|
|
79
160
|
let finalText = "";
|
|
161
|
+
let timedOut = false;
|
|
80
162
|
const typingInterval = setInterval(() => {
|
|
81
163
|
ctx.api.sendChatAction(ctx.chat.id, "typing").catch(() => { });
|
|
82
164
|
}, 4000);
|
|
165
|
+
// Stuck-only timer: fires if NO chunks arrive for STUCK_TIMEOUT_MS.
|
|
166
|
+
// Reset on every chunk so any actively-progressing task runs indefinitely.
|
|
167
|
+
// No absolute cap — Alvin is allowed to work as long as needed.
|
|
168
|
+
let stuckTimer = null;
|
|
169
|
+
const resetStuckTimer = () => {
|
|
170
|
+
if (stuckTimer)
|
|
171
|
+
clearTimeout(stuckTimer);
|
|
172
|
+
stuckTimer = setTimeout(() => {
|
|
173
|
+
if (session.abortController && !session.abortController.signal.aborted) {
|
|
174
|
+
timedOut = true;
|
|
175
|
+
session.abortController.abort();
|
|
176
|
+
}
|
|
177
|
+
}, STUCK_TIMEOUT_MS);
|
|
178
|
+
};
|
|
179
|
+
resetStuckTimer();
|
|
83
180
|
try {
|
|
84
181
|
// React with 🤔 to show we're thinking
|
|
85
182
|
await react(ctx, "🤔");
|
|
@@ -126,32 +223,69 @@ export async function handleMessage(ctx) {
|
|
|
126
223
|
const systemPrompt = (isSDK
|
|
127
224
|
? buildSystemPrompt(isSDK, session.language, chatIdStr)
|
|
128
225
|
: await buildSmartSystemPrompt(isSDK, session.language, text, chatIdStr)) + skillContext;
|
|
226
|
+
// Track the user turn in history regardless of provider type. This keeps
|
|
227
|
+
// the fallback path (Ollama etc.) aware of what was said on SDK turns.
|
|
228
|
+
addToHistory(userId, { role: "user", content: text });
|
|
229
|
+
// Checkpoint telemetry: mirror the SDK provider's threshold check here
|
|
230
|
+
// so session.checkpointHintsInjected reflects reality. The provider
|
|
231
|
+
// evaluates the exact same condition at query time — if it's true,
|
|
232
|
+
// it prepends a [CHECKPOINT] reminder to the prompt.
|
|
233
|
+
if (isSDK) {
|
|
234
|
+
const wouldInjectCheckpoint = session.toolUseCount >= CHECKPOINT_TOOL_THRESHOLD ||
|
|
235
|
+
session.messageCount >= CHECKPOINT_MSG_THRESHOLD;
|
|
236
|
+
if (wouldInjectCheckpoint) {
|
|
237
|
+
session.checkpointHintsInjected++;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
// B2 Bridge-Message: if SDK is active but there are non-SDK turns since
|
|
241
|
+
// the last SDK turn, prepend a catch-up preamble so the SDK sees what
|
|
242
|
+
// happened during the failover. We defensively clamp the index against
|
|
243
|
+
// history bounds in case compaction shrank the array under our feet.
|
|
244
|
+
let bridgedPrompt = text;
|
|
245
|
+
if (isSDK) {
|
|
246
|
+
const anchor = Math.min(session.lastSdkHistoryIndex, session.history.length - 1);
|
|
247
|
+
const gapStart = Math.max(0, anchor + 1);
|
|
248
|
+
// gapEnd excludes the user message we just added (history.length - 1).
|
|
249
|
+
const gapEnd = session.history.length - 1;
|
|
250
|
+
if (gapEnd > gapStart) {
|
|
251
|
+
const gapTurns = session.history.slice(gapStart, gapEnd);
|
|
252
|
+
const bridge = buildBridgeMessage(gapTurns);
|
|
253
|
+
if (bridge) {
|
|
254
|
+
bridgedPrompt = bridge + text;
|
|
255
|
+
console.log(`[bridge] SDK recovery: injecting ${gapTurns.length} fallback turn(s) into prompt`);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
129
259
|
const queryOpts = {
|
|
130
|
-
prompt:
|
|
260
|
+
prompt: bridgedPrompt,
|
|
131
261
|
systemPrompt,
|
|
132
262
|
workingDir: session.workingDir,
|
|
133
263
|
effort: session.effort,
|
|
134
264
|
abortSignal: session.abortController.signal,
|
|
265
|
+
// User's UI locale — registry uses it to localize failure messages.
|
|
266
|
+
locale: session.language,
|
|
135
267
|
// SDK-specific
|
|
136
268
|
sessionId: isSDK ? session.sessionId : null,
|
|
137
|
-
//
|
|
138
|
-
|
|
269
|
+
// Unified history: SDK ignores it (uses filesystem-resume instead),
|
|
270
|
+
// non-SDK providers use it for context. Keeping it populated for both
|
|
271
|
+
// means a failover from SDK → Ollama keeps the conversation context.
|
|
272
|
+
history: session.history,
|
|
139
273
|
// SDK checkpoint tracking
|
|
140
274
|
_sessionState: isSDK ? {
|
|
141
275
|
messageCount: session.messageCount,
|
|
142
276
|
toolUseCount: session.toolUseCount,
|
|
143
277
|
} : undefined,
|
|
144
278
|
};
|
|
145
|
-
// Add user message to history (for non-SDK providers)
|
|
146
|
-
if (!isSDK) {
|
|
147
|
-
addToHistory(userId, { role: "user", content: text });
|
|
148
|
-
}
|
|
149
279
|
// Stream response from provider (with fallback)
|
|
150
280
|
let lastBroadcastLen = 0;
|
|
151
281
|
for await (const chunk of registry.queryWithFallback(queryOpts)) {
|
|
282
|
+
// Any chunk is progress — reset the stuck timer.
|
|
283
|
+
resetStuckTimer();
|
|
152
284
|
switch (chunk.type) {
|
|
153
285
|
case "text":
|
|
154
286
|
finalText = chunk.text || "";
|
|
287
|
+
// Clear any tool-use status line — real content is flowing now.
|
|
288
|
+
streamer.setStatus(null);
|
|
155
289
|
await streamer.update(finalText);
|
|
156
290
|
// Emit the new delta for observers — accumulated text minus what
|
|
157
291
|
// we already broadcast.
|
|
@@ -168,9 +302,42 @@ export async function handleMessage(ctx) {
|
|
|
168
302
|
}
|
|
169
303
|
break;
|
|
170
304
|
case "tool_use":
|
|
171
|
-
//
|
|
305
|
+
// Surface the active tool so users see real progress instead of
|
|
306
|
+
// an endless typing indicator. The streamer renders this as a
|
|
307
|
+
// dim italic footer under any accumulated text.
|
|
172
308
|
if (chunk.toolName) {
|
|
173
309
|
session.toolUseCount++;
|
|
310
|
+
const icon = TOOL_ICONS[chunk.toolName] || "🔧";
|
|
311
|
+
// Special treatment for Claude's SDK-internal Task tool:
|
|
312
|
+
// track how many sub-tasks Claude delegated and surface the
|
|
313
|
+
// task description in the status line so the user sees WHAT
|
|
314
|
+
// is being delegated, not just "Task…".
|
|
315
|
+
if (chunk.toolName === "Task") {
|
|
316
|
+
session.sdkSubTaskCount++;
|
|
317
|
+
let label = "Task";
|
|
318
|
+
if (chunk.toolInput) {
|
|
319
|
+
try {
|
|
320
|
+
const parsed = JSON.parse(chunk.toolInput);
|
|
321
|
+
if (parsed.description) {
|
|
322
|
+
// Trim long descriptions so the status line stays readable
|
|
323
|
+
const desc = parsed.description.length > 80
|
|
324
|
+
? parsed.description.slice(0, 80) + "…"
|
|
325
|
+
: parsed.description;
|
|
326
|
+
label = `Task: ${desc}`;
|
|
327
|
+
}
|
|
328
|
+
else if (parsed.subagent_type) {
|
|
329
|
+
label = `Task (${parsed.subagent_type})`;
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
catch {
|
|
333
|
+
// not JSON — keep generic label
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
streamer.setStatus(`${icon} ${label}…`);
|
|
337
|
+
}
|
|
338
|
+
else {
|
|
339
|
+
streamer.setStatus(`${icon} ${chunk.toolName}…`);
|
|
340
|
+
}
|
|
174
341
|
}
|
|
175
342
|
break;
|
|
176
343
|
case "done":
|
|
@@ -178,6 +345,13 @@ export async function handleMessage(ctx) {
|
|
|
178
345
|
session.sessionId = chunk.sessionId;
|
|
179
346
|
if (chunk.costUsd)
|
|
180
347
|
session.totalCost += chunk.costUsd;
|
|
348
|
+
// Track the input tokens this turn used — this approximates the
|
|
349
|
+
// current context window usage since the model receives the full
|
|
350
|
+
// conversation context on every turn. Used for the Context:X/Y
|
|
351
|
+
// progress meter in /status.
|
|
352
|
+
if (typeof chunk.inputTokens === "number" && chunk.inputTokens > 0) {
|
|
353
|
+
session.lastTurnInputTokens = chunk.inputTokens;
|
|
354
|
+
}
|
|
181
355
|
trackProviderUsage(userId, registry.getActiveKey(), chunk.costUsd || 0, chunk.inputTokens, chunk.outputTokens);
|
|
182
356
|
trackUsage(registry.getActiveKey(), chunk.inputTokens || 0, chunk.outputTokens || 0, chunk.costUsd || 0);
|
|
183
357
|
session.lastActivity = Date.now();
|
|
@@ -186,7 +360,16 @@ export async function handleMessage(ctx) {
|
|
|
186
360
|
await ctx.reply(`⚡ _${chunk.failedProvider} unavailable — switching to ${chunk.providerName}_`, { parse_mode: "Markdown" });
|
|
187
361
|
break;
|
|
188
362
|
case "error":
|
|
189
|
-
|
|
363
|
+
// If our stuck-timer fired, the abort travels up as a registry
|
|
364
|
+
// mid-stream error chunk. Prefer the explicit stuck message over
|
|
365
|
+
// the generic one so the user understands this was a real hang,
|
|
366
|
+
// not a random error.
|
|
367
|
+
if (timedOut) {
|
|
368
|
+
await ctx.reply(t("bot.error.timeoutStuck", session.language, { min: STUCK_TIMEOUT_MINUTES }));
|
|
369
|
+
}
|
|
370
|
+
else {
|
|
371
|
+
await ctx.reply(`${t("bot.error.prefix", session.language)} ${chunk.error}`);
|
|
372
|
+
}
|
|
190
373
|
break;
|
|
191
374
|
}
|
|
192
375
|
}
|
|
@@ -203,9 +386,15 @@ export async function handleMessage(ctx) {
|
|
|
203
386
|
});
|
|
204
387
|
// Clear thinking reaction (replace with nothing — message was answered)
|
|
205
388
|
await react(ctx, "👍");
|
|
206
|
-
//
|
|
207
|
-
|
|
389
|
+
// Track the assistant turn in history regardless of provider type
|
|
390
|
+
// (unified history for seamless failover between SDK and Ollama).
|
|
391
|
+
if (finalText) {
|
|
208
392
|
addToHistory(userId, { role: "assistant", content: finalText });
|
|
393
|
+
// Advance the B2 bridge anchor to the assistant turn we just added,
|
|
394
|
+
// so the next SDK turn only bridges turns that happened AFTER this one.
|
|
395
|
+
if (isSDK) {
|
|
396
|
+
session.lastSdkHistoryIndex = session.history.length - 1;
|
|
397
|
+
}
|
|
209
398
|
}
|
|
210
399
|
// Voice reply if enabled
|
|
211
400
|
if (session.voiceReply && finalText.trim()) {
|
|
@@ -222,15 +411,21 @@ export async function handleMessage(ctx) {
|
|
|
222
411
|
}
|
|
223
412
|
catch (err) {
|
|
224
413
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
414
|
+
const lang = session.language;
|
|
225
415
|
await react(ctx, "👎");
|
|
226
|
-
if (
|
|
227
|
-
await ctx.reply("
|
|
416
|
+
if (timedOut) {
|
|
417
|
+
await ctx.reply(t("bot.error.timeoutStuck", lang, { min: STUCK_TIMEOUT_MINUTES }));
|
|
418
|
+
}
|
|
419
|
+
else if (errorMsg.includes("abort")) {
|
|
420
|
+
await ctx.reply(t("bot.error.requestCancelled", lang));
|
|
228
421
|
}
|
|
229
422
|
else {
|
|
230
|
-
await ctx.reply(
|
|
423
|
+
await ctx.reply(`${t("bot.error.prefix", lang)} ${errorMsg}`);
|
|
231
424
|
}
|
|
232
425
|
}
|
|
233
426
|
finally {
|
|
427
|
+
if (stuckTimer)
|
|
428
|
+
clearTimeout(stuckTimer);
|
|
234
429
|
clearInterval(typingInterval);
|
|
235
430
|
session.isProcessing = false;
|
|
236
431
|
session.abortController = null;
|