alvin-bot 4.5.0 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/CHANGELOG.md +150 -0
  2. package/README.md +25 -2
  3. package/alvin-bot-4.5.1.tgz +0 -0
  4. package/bin/cli.js +246 -0
  5. package/dist/handlers/commands.js +461 -63
  6. package/dist/handlers/message.js +209 -14
  7. package/dist/i18n.js +470 -13
  8. package/dist/index.js +44 -5
  9. package/dist/providers/claude-sdk-provider.js +106 -14
  10. package/dist/providers/ollama-provider.js +32 -0
  11. package/dist/providers/openai-compatible.js +10 -1
  12. package/dist/providers/registry.js +112 -17
  13. package/dist/providers/types.js +25 -3
  14. package/dist/services/compaction.js +2 -0
  15. package/dist/services/cron.js +53 -42
  16. package/dist/services/heartbeat.js +41 -7
  17. package/dist/services/language-detect.js +12 -2
  18. package/dist/services/ollama-manager.js +339 -0
  19. package/dist/services/personality.js +20 -14
  20. package/dist/services/session.js +21 -3
  21. package/dist/services/subagent-delivery.js +111 -0
  22. package/dist/services/subagents.js +341 -27
  23. package/dist/services/telegram.js +28 -1
  24. package/dist/services/updater.js +158 -0
  25. package/dist/services/usage-tracker.js +11 -4
  26. package/dist/services/users.js +2 -1
  27. package/dist/tui/index.js +36 -30
  28. package/docs/HANDBOOK.md +819 -0
  29. package/package.json +7 -2
  30. package/test/claude-sdk-provider.test.ts +69 -0
  31. package/test/i18n.test.ts +108 -0
  32. package/test/registry.test.ts +201 -0
  33. package/test/subagent-delivery.test.ts +169 -0
  34. package/test/subagents-commands.test.ts +64 -0
  35. package/test/subagents-config.test.ts +108 -0
  36. package/test/subagents-depth.test.ts +58 -0
  37. package/test/subagents-inheritance.test.ts +67 -0
  38. package/test/subagents-name-resolver.test.ts +122 -0
  39. package/test/subagents-priority-reject.test.ts +60 -0
  40. package/test/subagents-shutdown.test.ts +126 -0
  41. package/test/subagents-toolset.test.ts +51 -0
  42. package/vitest.config.ts +17 -0
@@ -13,6 +13,87 @@ import { shouldCompact, compactSession } from "../services/compaction.js";
13
13
  import { emit } from "../services/hooks.js";
14
14
  import { trackUsage } from "../services/usage-tracker.js";
15
15
  import { emitUserMessage as broadcastUserMessage, emitResponseStart as broadcastResponseStart, emitResponseDelta as broadcastResponseDelta, emitResponseDone as broadcastResponseDone, } from "../services/broadcast.js";
16
+ import { t } from "../i18n.js";
17
+ /**
18
+ * Stuck-only timeout — NO absolute cap.
19
+ *
20
+ * Alvin is designed to work as long as it needs to, including overnight
21
+ * on multi-hour tasks. The ONLY condition under which we abort a running
22
+ * query is when Claude produces no chunks at all for STUCK_TIMEOUT_MINUTES
23
+ * — that's a genuine hang, not legitimate work. Every text chunk and
24
+ * tool_use chunk resets this timer, so an actively-progressing task will
25
+ * never be cut off regardless of total duration.
26
+ *
27
+ * Previous design had an additional 30-minute absolute cap that violated
28
+ * this "work as long as needed" character. Removed entirely — only the
29
+ * stuck detector remains.
30
+ *
31
+ * Configurable via ALVIN_STUCK_TIMEOUT_MINUTES env var. Default 10 minutes,
32
+ * which is generous for normal work (Claude typically streams chunks every
33
+ * few seconds) but still catches real deadlocks quickly.
34
+ */
35
+ const STUCK_TIMEOUT_MINUTES = Number(process.env.ALVIN_STUCK_TIMEOUT_MINUTES) || 10;
36
+ const STUCK_TIMEOUT_MS = STUCK_TIMEOUT_MINUTES * 60 * 1000;
37
+ /** Checkpoint reminder thresholds — kept in sync with
38
+ * src/providers/claude-sdk-provider.ts (where the actual hint injection
39
+ * happens). We mirror the check here so the session telemetry knows
40
+ * when the SDK provider would have injected a reminder. */
41
+ const CHECKPOINT_TOOL_THRESHOLD = 15;
42
+ const CHECKPOINT_MSG_THRESHOLD = 10;
43
+ /** Maximum characters in the bridge-message preamble that gets prepended
44
+ * to the first post-recovery SDK query. Oldest gap-turns get truncated. */
45
+ const BRIDGE_MAX_CHARS = 2500;
46
+ /** Maximum characters per individual message in the bridge preamble. */
47
+ const BRIDGE_MSG_MAX_CHARS = 500;
48
+ /**
49
+ * Build a "catch-up" preamble summarising turns that happened while the
50
+ * SDK was not the active provider (i.e., during a failover to Ollama or
51
+ * a manual /model switch). This gets prepended to the first post-recovery
52
+ * prompt so the SDK sees what its alter-ego did.
53
+ */
54
+ function buildBridgeMessage(fallbackTurns) {
55
+ if (fallbackTurns.length === 0)
56
+ return "";
57
+ const renderTurn = (m) => {
58
+ const label = m.role === "user" ? "User" : "Assistant (Fallback)";
59
+ const content = m.content.length > BRIDGE_MSG_MAX_CHARS
60
+ ? m.content.slice(0, BRIDGE_MSG_MAX_CHARS) + "…"
61
+ : m.content;
62
+ return `${label}: ${content}`;
63
+ };
64
+ // Start with all turns rendered, then trim from the oldest if we exceed budget.
65
+ let lines = fallbackTurns.map(renderTurn);
66
+ let body = lines.join("\n\n");
67
+ let truncatedOldest = 0;
68
+ while (body.length > BRIDGE_MAX_CHARS && lines.length > 2) {
69
+ lines.shift();
70
+ truncatedOldest++;
71
+ body = lines.join("\n\n");
72
+ }
73
+ const omittedNote = truncatedOldest > 0
74
+ ? `[…${truncatedOldest} older turn(s) omitted…]\n\n`
75
+ : "";
76
+ const count = fallbackTurns.length;
77
+ return (`[Context: While you (Claude) were briefly not the active provider, ` +
78
+ `the following ${count} message(s) were exchanged with a fallback model. ` +
79
+ `Catching you up:\n\n` +
80
+ omittedNote +
81
+ body +
82
+ `\n\n--- New message from user: ---]\n\n`);
83
+ }
84
+ /** Tool name → emoji. Used to render a status line while Alvin is running
85
+ * tools, so users see real progress instead of an endless typing indicator. */
86
+ const TOOL_ICONS = {
87
+ Read: "📖",
88
+ Write: "📝",
89
+ Edit: "✏️",
90
+ Bash: "⚡",
91
+ Glob: "🔍",
92
+ Grep: "🔎",
93
+ WebSearch: "🌐",
94
+ WebFetch: "📡",
95
+ Task: "🤖",
96
+ };
16
97
  /** React to a message with an emoji. Silently fails if reactions aren't supported. */
17
98
  async function react(ctx, emoji) {
18
99
  try {
@@ -77,9 +158,25 @@ export async function handleMessage(ctx) {
77
158
  session.abortController = new AbortController();
78
159
  const streamer = new TelegramStreamer(ctx.chat.id, ctx.api, ctx.message?.message_id);
79
160
  let finalText = "";
161
+ let timedOut = false;
80
162
  const typingInterval = setInterval(() => {
81
163
  ctx.api.sendChatAction(ctx.chat.id, "typing").catch(() => { });
82
164
  }, 4000);
165
+ // Stuck-only timer: fires if NO chunks arrive for STUCK_TIMEOUT_MS.
166
+ // Reset on every chunk so any actively-progressing task runs indefinitely.
167
+ // No absolute cap — Alvin is allowed to work as long as needed.
168
+ let stuckTimer = null;
169
+ const resetStuckTimer = () => {
170
+ if (stuckTimer)
171
+ clearTimeout(stuckTimer);
172
+ stuckTimer = setTimeout(() => {
173
+ if (session.abortController && !session.abortController.signal.aborted) {
174
+ timedOut = true;
175
+ session.abortController.abort();
176
+ }
177
+ }, STUCK_TIMEOUT_MS);
178
+ };
179
+ resetStuckTimer();
83
180
  try {
84
181
  // React with 🤔 to show we're thinking
85
182
  await react(ctx, "🤔");
@@ -126,32 +223,69 @@ export async function handleMessage(ctx) {
126
223
  const systemPrompt = (isSDK
127
224
  ? buildSystemPrompt(isSDK, session.language, chatIdStr)
128
225
  : await buildSmartSystemPrompt(isSDK, session.language, text, chatIdStr)) + skillContext;
226
+ // Track the user turn in history regardless of provider type. This keeps
227
+ // the fallback path (Ollama etc.) aware of what was said on SDK turns.
228
+ addToHistory(userId, { role: "user", content: text });
229
+ // Checkpoint telemetry: mirror the SDK provider's threshold check here
230
+ // so session.checkpointHintsInjected reflects reality. The provider
231
+ // evaluates the exact same condition at query time — if it's true,
232
+ // it prepends a [CHECKPOINT] reminder to the prompt.
233
+ if (isSDK) {
234
+ const wouldInjectCheckpoint = session.toolUseCount >= CHECKPOINT_TOOL_THRESHOLD ||
235
+ session.messageCount >= CHECKPOINT_MSG_THRESHOLD;
236
+ if (wouldInjectCheckpoint) {
237
+ session.checkpointHintsInjected++;
238
+ }
239
+ }
240
+ // B2 Bridge-Message: if SDK is active but there are non-SDK turns since
241
+ // the last SDK turn, prepend a catch-up preamble so the SDK sees what
242
+ // happened during the failover. We defensively clamp the index against
243
+ // history bounds in case compaction shrank the array under our feet.
244
+ let bridgedPrompt = text;
245
+ if (isSDK) {
246
+ const anchor = Math.min(session.lastSdkHistoryIndex, session.history.length - 1);
247
+ const gapStart = Math.max(0, anchor + 1);
248
+ // gapEnd excludes the user message we just added (history.length - 1).
249
+ const gapEnd = session.history.length - 1;
250
+ if (gapEnd > gapStart) {
251
+ const gapTurns = session.history.slice(gapStart, gapEnd);
252
+ const bridge = buildBridgeMessage(gapTurns);
253
+ if (bridge) {
254
+ bridgedPrompt = bridge + text;
255
+ console.log(`[bridge] SDK recovery: injecting ${gapTurns.length} fallback turn(s) into prompt`);
256
+ }
257
+ }
258
+ }
129
259
  const queryOpts = {
130
- prompt: text,
260
+ prompt: bridgedPrompt,
131
261
  systemPrompt,
132
262
  workingDir: session.workingDir,
133
263
  effort: session.effort,
134
264
  abortSignal: session.abortController.signal,
265
+ // User's UI locale — registry uses it to localize failure messages.
266
+ locale: session.language,
135
267
  // SDK-specific
136
268
  sessionId: isSDK ? session.sessionId : null,
137
- // Non-SDK: include conversation history
138
- history: !isSDK ? session.history : undefined,
269
+ // Unified history: SDK ignores it (uses filesystem-resume instead),
270
+ // non-SDK providers use it for context. Keeping it populated for both
271
+ // means a failover from SDK → Ollama keeps the conversation context.
272
+ history: session.history,
139
273
  // SDK checkpoint tracking
140
274
  _sessionState: isSDK ? {
141
275
  messageCount: session.messageCount,
142
276
  toolUseCount: session.toolUseCount,
143
277
  } : undefined,
144
278
  };
145
- // Add user message to history (for non-SDK providers)
146
- if (!isSDK) {
147
- addToHistory(userId, { role: "user", content: text });
148
- }
149
279
  // Stream response from provider (with fallback)
150
280
  let lastBroadcastLen = 0;
151
281
  for await (const chunk of registry.queryWithFallback(queryOpts)) {
282
+ // Any chunk is progress — reset the stuck timer.
283
+ resetStuckTimer();
152
284
  switch (chunk.type) {
153
285
  case "text":
154
286
  finalText = chunk.text || "";
287
+ // Clear any tool-use status line — real content is flowing now.
288
+ streamer.setStatus(null);
155
289
  await streamer.update(finalText);
156
290
  // Emit the new delta for observers — accumulated text minus what
157
291
  // we already broadcast.
@@ -168,9 +302,42 @@ export async function handleMessage(ctx) {
168
302
  }
169
303
  break;
170
304
  case "tool_use":
171
- // Could show tool activity indicator
305
+ // Surface the active tool so users see real progress instead of
306
+ // an endless typing indicator. The streamer renders this as a
307
+ // dim italic footer under any accumulated text.
172
308
  if (chunk.toolName) {
173
309
  session.toolUseCount++;
310
+ const icon = TOOL_ICONS[chunk.toolName] || "🔧";
311
+ // Special treatment for Claude's SDK-internal Task tool:
312
+ // track how many sub-tasks Claude delegated and surface the
313
+ // task description in the status line so the user sees WHAT
314
+ // is being delegated, not just "Task…".
315
+ if (chunk.toolName === "Task") {
316
+ session.sdkSubTaskCount++;
317
+ let label = "Task";
318
+ if (chunk.toolInput) {
319
+ try {
320
+ const parsed = JSON.parse(chunk.toolInput);
321
+ if (parsed.description) {
322
+ // Trim long descriptions so the status line stays readable
323
+ const desc = parsed.description.length > 80
324
+ ? parsed.description.slice(0, 80) + "…"
325
+ : parsed.description;
326
+ label = `Task: ${desc}`;
327
+ }
328
+ else if (parsed.subagent_type) {
329
+ label = `Task (${parsed.subagent_type})`;
330
+ }
331
+ }
332
+ catch {
333
+ // not JSON — keep generic label
334
+ }
335
+ }
336
+ streamer.setStatus(`${icon} ${label}…`);
337
+ }
338
+ else {
339
+ streamer.setStatus(`${icon} ${chunk.toolName}…`);
340
+ }
174
341
  }
175
342
  break;
176
343
  case "done":
@@ -178,6 +345,13 @@ export async function handleMessage(ctx) {
178
345
  session.sessionId = chunk.sessionId;
179
346
  if (chunk.costUsd)
180
347
  session.totalCost += chunk.costUsd;
348
+ // Track the input tokens this turn used — this approximates the
349
+ // current context window usage since the model receives the full
350
+ // conversation context on every turn. Used for the Context:X/Y
351
+ // progress meter in /status.
352
+ if (typeof chunk.inputTokens === "number" && chunk.inputTokens > 0) {
353
+ session.lastTurnInputTokens = chunk.inputTokens;
354
+ }
181
355
  trackProviderUsage(userId, registry.getActiveKey(), chunk.costUsd || 0, chunk.inputTokens, chunk.outputTokens);
182
356
  trackUsage(registry.getActiveKey(), chunk.inputTokens || 0, chunk.outputTokens || 0, chunk.costUsd || 0);
183
357
  session.lastActivity = Date.now();
@@ -186,7 +360,16 @@ export async function handleMessage(ctx) {
186
360
  await ctx.reply(`⚡ _${chunk.failedProvider} unavailable — switching to ${chunk.providerName}_`, { parse_mode: "Markdown" });
187
361
  break;
188
362
  case "error":
189
- await ctx.reply(`Error: ${chunk.error}`);
363
+ // If our stuck-timer fired, the abort travels up as a registry
364
+ // mid-stream error chunk. Prefer the explicit stuck message over
365
+ // the generic one so the user understands this was a real hang,
366
+ // not a random error.
367
+ if (timedOut) {
368
+ await ctx.reply(t("bot.error.timeoutStuck", session.language, { min: STUCK_TIMEOUT_MINUTES }));
369
+ }
370
+ else {
371
+ await ctx.reply(`${t("bot.error.prefix", session.language)} ${chunk.error}`);
372
+ }
190
373
  break;
191
374
  }
192
375
  }
@@ -203,9 +386,15 @@ export async function handleMessage(ctx) {
203
386
  });
204
387
  // Clear thinking reaction (replace with nothing — message was answered)
205
388
  await react(ctx, "👍");
206
- // Add assistant response to history (for non-SDK providers)
207
- if (!isSDK && finalText) {
389
+ // Track the assistant turn in history regardless of provider type
390
+ // (unified history for seamless failover between SDK and Ollama).
391
+ if (finalText) {
208
392
  addToHistory(userId, { role: "assistant", content: finalText });
393
+ // Advance the B2 bridge anchor to the assistant turn we just added,
394
+ // so the next SDK turn only bridges turns that happened AFTER this one.
395
+ if (isSDK) {
396
+ session.lastSdkHistoryIndex = session.history.length - 1;
397
+ }
209
398
  }
210
399
  // Voice reply if enabled
211
400
  if (session.voiceReply && finalText.trim()) {
@@ -222,15 +411,21 @@ export async function handleMessage(ctx) {
222
411
  }
223
412
  catch (err) {
224
413
  const errorMsg = err instanceof Error ? err.message : String(err);
414
+ const lang = session.language;
225
415
  await react(ctx, "👎");
226
- if (errorMsg.includes("abort")) {
227
- await ctx.reply("Anfrage abgebrochen.");
416
+ if (timedOut) {
417
+ await ctx.reply(t("bot.error.timeoutStuck", lang, { min: STUCK_TIMEOUT_MINUTES }));
418
+ }
419
+ else if (errorMsg.includes("abort")) {
420
+ await ctx.reply(t("bot.error.requestCancelled", lang));
228
421
  }
229
422
  else {
230
- await ctx.reply(`Error: ${errorMsg}`);
423
+ await ctx.reply(`${t("bot.error.prefix", lang)} ${errorMsg}`);
231
424
  }
232
425
  }
233
426
  finally {
427
+ if (stuckTimer)
428
+ clearTimeout(stuckTimer);
234
429
  clearInterval(typingInterval);
235
430
  session.isProcessing = false;
236
431
  session.abortController = null;