alvin-bot 5.4.0 → 5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/dist/handlers/commands.js +10 -2
- package/dist/handlers/message.js +74 -16
- package/dist/i18n.js +6 -0
- package/dist/index.js +7 -1
- package/dist/services/async-agent-watcher.js +53 -6
- package/dist/services/subagent-delivery.js +31 -8
- package/dist/services/telegram.js +9 -0
- package/dist/services/trends.js +202 -2
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,31 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to Alvin Bot are documented here.
|
|
4
4
|
|
|
5
|
+
## [5.5.0] — 2026-05-18
|
|
6
|
+
|
|
7
|
+
### The ⛔ Stop button now responds instantly — and honestly
|
|
8
|
+
|
|
9
|
+
Stopping a task is now crisp and truthful. The moment a task finishes,
|
|
10
|
+
the Stop button disappears, so you're never tapping a control for
|
|
11
|
+
something that's already done. And the feedback always matches reality:
|
|
12
|
+
if you tap Stop while Alvin is genuinely working, it stops and says so;
|
|
13
|
+
if the task had already completed, Alvin tells you that plainly instead
|
|
14
|
+
of implying it cut something short. If you hit Stop in that brief moment
|
|
15
|
+
while an answer is being prepared, that answer is now held back — "I
|
|
16
|
+
stopped it" means nothing more arrives. Anything Alvin had already
|
|
17
|
+
shown you stays exactly as it was.
|
|
18
|
+
|
|
19
|
+
### Fewer false alerts — smarter health monitoring
|
|
20
|
+
|
|
21
|
+
Alvin's self-monitoring got a lot more trustworthy. A planned restart
|
|
22
|
+
or an update is no longer mistaken for a problem, and the daily health
|
|
23
|
+
summary only raises a flag when there's real evidence something is
|
|
24
|
+
actually wrong — so the alerts you do get are ones worth reading.
|
|
25
|
+
Routine background housekeeping no longer shows up as noise.
|
|
26
|
+
|
|
27
|
+
As always, this shipped after a full multi-pass review and a
|
|
28
|
+
fresh-install + stress verification on a clean separate machine.
|
|
29
|
+
|
|
5
30
|
## [5.4.0] — 2026-05-18
|
|
6
31
|
|
|
7
32
|
### Smoother background tasks — and Alvin always tells you the truth
|
|
@@ -1946,11 +1946,19 @@ export function registerCommands(bot) {
|
|
|
1946
1946
|
const sessionKey = ctx.match[1];
|
|
1947
1947
|
const session = getSession(sessionKey);
|
|
1948
1948
|
const lang = session.language;
|
|
1949
|
-
|
|
1949
|
+
// A1 — Capture isProcessing BEFORE requestStop (which sets it false)
|
|
1950
|
+
// so we can show the right toast: "stopped" vs "already finished".
|
|
1951
|
+
const wasProcessing = session.isProcessing;
|
|
1952
|
+
if (wasProcessing) {
|
|
1950
1953
|
requestStop(session, "soft", buildStopDeps(session));
|
|
1951
1954
|
}
|
|
1955
|
+
// A1 — Honest toast: if the turn had already finished when the button was
|
|
1956
|
+
// tapped, don't claim "stopped" — tell the user it was already done.
|
|
1957
|
+
const toastKey = wasProcessing
|
|
1958
|
+
? "bot.cancel.stoppedToast"
|
|
1959
|
+
: "bot.cancel.alreadyDone";
|
|
1952
1960
|
try {
|
|
1953
|
-
await ctx.answerCallbackQuery({ text: t(
|
|
1961
|
+
await ctx.answerCallbackQuery({ text: t(toastKey, lang) });
|
|
1954
1962
|
}
|
|
1955
1963
|
catch { /* harmless grammy race */ }
|
|
1956
1964
|
try {
|
package/dist/handlers/message.js
CHANGED
|
@@ -122,6 +122,37 @@ const TOOL_ICONS = {
|
|
|
122
122
|
WebFetch: "📡",
|
|
123
123
|
Task: "🤖",
|
|
124
124
|
};
|
|
125
|
+
// ── A3 — stop-suppress-undelivered pure predicate ────────────────────────────
|
|
126
|
+
/**
|
|
127
|
+
* Determine whether the final answer send should be suppressed because a stop
|
|
128
|
+
* was requested and no visible text has yet been delivered to the user.
|
|
129
|
+
*
|
|
130
|
+
* This closes the gap behind "I clicked Stop but it answered anyway": the
|
|
131
|
+
* Claude SDK delivers short answers atomically, so the for-await loop parks
|
|
132
|
+
* on IPC the whole time, and the complete answer arrives as one block. By the
|
|
133
|
+
* time the consumer bail fires at the top of the loop, the answer is computed
|
|
134
|
+
* and about to be sent. This guard is the only stoppable moment for atomic
|
|
135
|
+
* answers.
|
|
136
|
+
*
|
|
137
|
+
* HARD CONSTRAINT — no-retract invariant: if ANY visible text has already
|
|
138
|
+
* been streamed/committed to the user (visibleTextAlreadySent=true), the
|
|
139
|
+
* predicate returns false regardless of stop state. Partial output that
|
|
140
|
+
* already reached the user is NEVER retracted. The consumer bail in the
|
|
141
|
+
* for-await loop already handles mid-stream stops; this guard only acts on
|
|
142
|
+
* the final commit step.
|
|
143
|
+
*
|
|
144
|
+
* Truth table:
|
|
145
|
+
* stopRequested=truthy + visibleTextAlreadySent=false → true (suppress)
|
|
146
|
+
* stopRequested=truthy + visibleTextAlreadySent=true → false (no-retract)
|
|
147
|
+
* stopRequested=falsy + * → false (normal)
|
|
148
|
+
*/
|
|
149
|
+
export function shouldSuppressFinalSend(args) {
|
|
150
|
+
if (!args.stopRequested)
|
|
151
|
+
return false;
|
|
152
|
+
if (args.visibleTextAlreadySent)
|
|
153
|
+
return false;
|
|
154
|
+
return true;
|
|
155
|
+
}
|
|
125
156
|
// ── v5.2 live steering — pure routing helper ─────────────────────────────────
|
|
126
157
|
/**
|
|
127
158
|
* Decide how a mid-task message (arriving while `session.isProcessing`) should
|
|
@@ -785,19 +816,45 @@ export async function handleMessage(ctx) {
|
|
|
785
816
|
/* harmless — notice is best-effort */
|
|
786
817
|
}
|
|
787
818
|
}
|
|
788
|
-
// v5.1 stop: user stopped this query — do NOT finalize partial output
|
|
789
|
-
// as a successful answer, no 👍, no history commit. The stop trigger
|
|
790
|
-
// (/cancel | /stopall | ⛔ button) already acknowledged to the user.
|
|
791
|
-
// The `finally` still runs (clears isProcessing/_qHandle/_stopRequested
|
|
792
|
-
// + typing indicator).
|
|
793
|
-
if (session._stopRequested) {
|
|
794
|
-
return;
|
|
795
|
-
}
|
|
796
819
|
if (bypassAborted) {
|
|
797
820
|
// v4.12.3 — Bypass path took over; don't finalize, don't react 👍.
|
|
798
821
|
// Just clean up and return. The finally block still fires.
|
|
799
822
|
return;
|
|
800
823
|
}
|
|
824
|
+
// A3 — Suppress-or-finalize gate for stopped turns.
|
|
825
|
+
//
|
|
826
|
+
// shouldSuppressFinalSend is the SINGLE gate controlling whether finalize runs:
|
|
827
|
+
//
|
|
828
|
+
// stop + no visible text (suppress=true):
|
|
829
|
+
// Skip finalize and all side-effects. Nothing reached the user — correct.
|
|
830
|
+
// The stop trigger (/cancel | /stopall | ⛔) already acknowledged this.
|
|
831
|
+
// The `finally` still runs (clears isProcessing/_qHandle/_stopRequested
|
|
832
|
+
// + typing indicator).
|
|
833
|
+
//
|
|
834
|
+
// stop + visible text already sent (suppress=false, _stopRequested truthy):
|
|
835
|
+
// The no-retract invariant applies — partial output already shown must not
|
|
836
|
+
// be left visually unfinished. Run streamer.finalize to flush the throttle
|
|
837
|
+
// timer and drop the status line, then return BEFORE the completed-answer
|
|
838
|
+
// side-effects (👍 / broadcastResponseDone / addToHistory). A stopped turn
|
|
839
|
+
// is NOT a successfully completed turn.
|
|
840
|
+
//
|
|
841
|
+
// no stop (suppress=false, _stopRequested falsy):
|
|
842
|
+
// Normal path — fall through to finalize + all side-effects.
|
|
843
|
+
if (shouldSuppressFinalSend({
|
|
844
|
+
stopRequested: session._stopRequested,
|
|
845
|
+
visibleTextAlreadySent: streamer.hasSentText,
|
|
846
|
+
})) {
|
|
847
|
+
// Branch A: stop + no visible text → suppress entirely.
|
|
848
|
+
return;
|
|
849
|
+
}
|
|
850
|
+
if (session._stopRequested && streamer.hasSentText) {
|
|
851
|
+
// Branch B: stop + visible text already sent → finalize the partial cleanly
|
|
852
|
+
// (flushes throttle timer, clears status line) but do NOT emit the
|
|
853
|
+
// completed-answer signals or commit to history.
|
|
854
|
+
await streamer.finalize(finalText);
|
|
855
|
+
return;
|
|
856
|
+
}
|
|
857
|
+
// Branch C: normal (no stop) — fall through.
|
|
801
858
|
await streamer.finalize(finalText);
|
|
802
859
|
emit("message:sent", { userId, text: finalText, platform: "telegram" });
|
|
803
860
|
// v4.5.0: tell observers the response is complete.
|
|
@@ -874,6 +931,15 @@ export async function handleMessage(ctx) {
|
|
|
874
931
|
// but if a new turn started and re-populated _qHandle via onQueryHandle we
|
|
875
932
|
// must NOT null it here — that would break Cycle-1 stop teeth for the new turn.
|
|
876
933
|
if (session._turnId === _thisTurnId) {
|
|
934
|
+
// A2 — Remove the ⛔ Stop control message as the FIRST action when the
|
|
935
|
+
// turn ends, so the stale button disappears before any post-turn work.
|
|
936
|
+
// Best-effort: if it was already deleted or the bot lacks permission, ignore.
|
|
937
|
+
if (stopMsgId !== null) {
|
|
938
|
+
try {
|
|
939
|
+
await ctx.api.deleteMessage(ctx.chat.id, stopMsgId);
|
|
940
|
+
}
|
|
941
|
+
catch { /* harmless grammy race */ }
|
|
942
|
+
}
|
|
877
943
|
session.isProcessing = false;
|
|
878
944
|
session.abortController = null;
|
|
879
945
|
// v5.2 — Close and clear the SteerChannel; reset per-turn ack flag.
|
|
@@ -887,14 +953,6 @@ export async function handleMessage(ctx) {
|
|
|
887
953
|
session._stopRequested = null; // safe: token matches → no newer turn has set this
|
|
888
954
|
session._turnId = null;
|
|
889
955
|
}
|
|
890
|
-
// v5.1 — Remove the ⛔ Stop control message (sent at processing start).
|
|
891
|
-
// Best-effort: if it was already deleted or the bot lacks permission, ignore.
|
|
892
|
-
if (stopMsgId !== null) {
|
|
893
|
-
try {
|
|
894
|
-
await ctx.api.deleteMessage(ctx.chat.id, stopMsgId);
|
|
895
|
-
}
|
|
896
|
-
catch { /* harmless grammy race */ }
|
|
897
|
-
}
|
|
898
956
|
// Check for queued messages — they'll be prepended to the next real message
|
|
899
957
|
// Queue stays in session and gets consumed on next handleMessage call
|
|
900
958
|
}
|
package/dist/i18n.js
CHANGED
|
@@ -378,6 +378,12 @@ const strings = {
|
|
|
378
378
|
es: "⛔ Detenido",
|
|
379
379
|
fr: "⛔ Arrêté",
|
|
380
380
|
},
|
|
381
|
+
"bot.cancel.alreadyDone": {
|
|
382
|
+
en: "Nothing running — that already finished.",
|
|
383
|
+
de: "Nichts läuft — das war schon fertig.",
|
|
384
|
+
es: "Nada en curso — eso ya terminó.",
|
|
385
|
+
fr: "Rien en cours — c'était déjà terminé.",
|
|
386
|
+
},
|
|
381
387
|
// /model
|
|
382
388
|
"bot.model.chooseHeader": {
|
|
383
389
|
en: "🤖 *Choose model:*",
|
package/dist/index.js
CHANGED
|
@@ -187,7 +187,7 @@ import { loadSkills } from "./services/skills.js";
|
|
|
187
187
|
import { loadHooks } from "./services/hooks.js";
|
|
188
188
|
import { registerShutdownHandler } from "./services/restart.js";
|
|
189
189
|
import { cancelAllSubAgents } from "./services/subagents.js";
|
|
190
|
-
import { startWatchdog, stopWatchdog, checkCrashLoopBrake } from "./services/watchdog.js";
|
|
190
|
+
import { startWatchdog, stopWatchdog, checkCrashLoopBrake, markExpectedRestart } from "./services/watchdog.js";
|
|
191
191
|
import { getRegistry } from "./engine.js";
|
|
192
192
|
import { scanAssets } from "./services/asset-index.js";
|
|
193
193
|
// Scan asset directory and generate INDEX.json + INDEX.md
|
|
@@ -383,6 +383,12 @@ const shutdown = async () => {
|
|
|
383
383
|
return;
|
|
384
384
|
isShuttingDown = true;
|
|
385
385
|
console.log("Graceful shutdown initiated...");
|
|
386
|
+
// Mark the imminent exit as an intentional restart so the next boot's
|
|
387
|
+
// decideBrakeAction does not count it as a crash. This covers launchctl
|
|
388
|
+
// unload/load (SIGTERM from launchd) in addition to /restart and /update
|
|
389
|
+
// which call markExpectedRestart() themselves before process.exit(0).
|
|
390
|
+
// Must run before stopWatchdog() (which just clears timers, not the beacon).
|
|
391
|
+
markExpectedRestart();
|
|
386
392
|
// E2: shutdown-notification — await the async cancellation so running
|
|
387
393
|
// agents can post a cancellation message to Telegram before the bot
|
|
388
394
|
// stops. Capped at 5s internally so a hang can't block shutdown.
|
|
@@ -27,6 +27,25 @@ import { dirname } from "path";
|
|
|
27
27
|
import { parseOutputFileStatus } from "./async-agent-parser.js";
|
|
28
28
|
import { ASYNC_AGENTS_STATE_FILE } from "../paths.js";
|
|
29
29
|
import { getAllSessions } from "./session.js";
|
|
30
|
+
/**
|
|
31
|
+
* B3 — Detect a permanent "target chat does not exist" delivery failure
|
|
32
|
+
* (Telegram 400 "Bad Request: chat not found"), e.g. the stale chat_id:1
|
|
33
|
+
* test agent. Such an agent must be abandoned, not retried forever.
|
|
34
|
+
*
|
|
35
|
+
* Kept as a local predicate (mirrors isChatNotFoundError in
|
|
36
|
+
* subagent-delivery.ts) so the watcher does NOT take a new hard
|
|
37
|
+
* dependency on a fresh subagent-delivery export — many test suites mock
|
|
38
|
+
* that module with only deliverSubAgentResult, and a destructured import
|
|
39
|
+
* of a non-mocked symbol would throw. Matched narrowly on the
|
|
40
|
+
* chat-not-found signature only.
|
|
41
|
+
*/
|
|
42
|
+
function isChatNotFoundError(err) {
|
|
43
|
+
if (!err || typeof err !== "object")
|
|
44
|
+
return false;
|
|
45
|
+
const e = err;
|
|
46
|
+
const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
|
|
47
|
+
return /chat not found/i.test(haystack);
|
|
48
|
+
}
|
|
30
49
|
/** How often the polling loop runs against each pending agent. */
|
|
31
50
|
const POLL_INTERVAL_MS = 15_000;
|
|
32
51
|
/** Hard ceiling per agent — 12h. After this, give up and deliver
|
|
@@ -199,22 +218,38 @@ export async function pollOnce() {
|
|
|
199
218
|
const now = Date.now();
|
|
200
219
|
const toRemove = [];
|
|
201
220
|
const missingFileFailureMs = getMissingFileFailureMs();
|
|
221
|
+
// B3 — when a delivery attempt proves the target chat is permanently
|
|
222
|
+
// invalid ("chat not found", e.g. the stale chat_id:1 test agent),
|
|
223
|
+
// abandon the agent so the watcher never retries it. Without this, a
|
|
224
|
+
// pending agent with an invalid target spams stderr on every poll
|
|
225
|
+
// cycle (inflating errors_24h) and lingers until the 12h giveUpAt.
|
|
226
|
+
const abandonIfInvalidTarget = (entry, outcome) => {
|
|
227
|
+
if (!outcome.chatNotFound)
|
|
228
|
+
return;
|
|
229
|
+
if (!toRemove.includes(entry.agentId))
|
|
230
|
+
toRemove.push(entry.agentId);
|
|
231
|
+
console.warn(`[async-watcher] abandoning agent ${entry.agentId} — delivery target ` +
|
|
232
|
+
`chat ${String(entry.chatId)} not found (invalid/stale); will not retry`);
|
|
233
|
+
};
|
|
202
234
|
for (const entry of pending.values()) {
|
|
203
235
|
entry.lastCheckedAt = now;
|
|
204
236
|
// Timeout check first — if the agent is past its giveUpAt, give up
|
|
205
237
|
// regardless of whether the file shows progress.
|
|
206
238
|
if (now >= entry.giveUpAt) {
|
|
207
|
-
await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
|
|
239
|
+
const outcome = await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
|
|
240
|
+
abandonIfInvalidTarget(entry, outcome);
|
|
208
241
|
toRemove.push(entry.agentId);
|
|
209
242
|
continue;
|
|
210
243
|
}
|
|
211
244
|
const status = await parseOutputFileStatus(entry.outputFile);
|
|
212
245
|
if (status.state === "completed") {
|
|
213
|
-
await deliverAsCompleted(entry, status.output, status.tokensUsed);
|
|
246
|
+
const outcome = await deliverAsCompleted(entry, status.output, status.tokensUsed);
|
|
247
|
+
abandonIfInvalidTarget(entry, outcome);
|
|
214
248
|
toRemove.push(entry.agentId);
|
|
215
249
|
}
|
|
216
250
|
else if (status.state === "failed") {
|
|
217
|
-
await deliverAsFailure(entry, "error", status.error);
|
|
251
|
+
const outcome = await deliverAsFailure(entry, "error", status.error);
|
|
252
|
+
abandonIfInvalidTarget(entry, outcome);
|
|
218
253
|
toRemove.push(entry.agentId);
|
|
219
254
|
}
|
|
220
255
|
else if (status.state === "missing" &&
|
|
@@ -222,7 +257,8 @@ export async function pollOnce() {
|
|
|
222
257
|
// v4.14.2 — Zombie guard: the subprocess never created its
|
|
223
258
|
// output file within `missingFileFailureMs` (default 10 min).
|
|
224
259
|
// Declare failed instead of polling until the 12h giveUpAt.
|
|
225
|
-
await deliverAsFailure(entry, "error", `Dispatched subprocess never wrote its output file (${Math.round((now - entry.startedAt) / 60_000)}m after start). Likely crashed before initializing, or the file was removed externally.`);
|
|
260
|
+
const outcome = await deliverAsFailure(entry, "error", `Dispatched subprocess never wrote its output file (${Math.round((now - entry.startedAt) / 60_000)}m after start). Likely crashed before initializing, or the file was removed externally.`);
|
|
261
|
+
abandonIfInvalidTarget(entry, outcome);
|
|
226
262
|
toRemove.push(entry.agentId);
|
|
227
263
|
}
|
|
228
264
|
// running / missing-but-young → keep polling next cycle
|
|
@@ -254,13 +290,20 @@ async function deliverAsCompleted(entry, output, tokensUsed) {
|
|
|
254
290
|
tokensUsed: tokensUsed ?? { input: 0, output: 0 },
|
|
255
291
|
duration: Date.now() - entry.startedAt,
|
|
256
292
|
};
|
|
293
|
+
let chatNotFound = false;
|
|
257
294
|
try {
|
|
258
|
-
await deliverSubAgentResult(info, result);
|
|
295
|
+
const outcome = await deliverSubAgentResult(info, result);
|
|
296
|
+
chatNotFound = !!outcome?.chatNotFound;
|
|
259
297
|
}
|
|
260
298
|
catch (err) {
|
|
261
299
|
console.error(`[async-watcher] delivery failed for ${entry.agentId}:`, err);
|
|
300
|
+
// deliverSubAgentResult normally swallows send errors and reports
|
|
301
|
+
// chatNotFound via its return value; if it ever throws, still detect
|
|
302
|
+
// the permanent invalid-target case here.
|
|
303
|
+
chatNotFound = isChatNotFoundError(err);
|
|
262
304
|
}
|
|
263
305
|
decrementPendingCount(entry.sessionKey);
|
|
306
|
+
return { chatNotFound };
|
|
264
307
|
}
|
|
265
308
|
async function deliverAsFailure(entry, status, error) {
|
|
266
309
|
const { deliverSubAgentResult } = await import("./subagent-delivery.js");
|
|
@@ -283,13 +326,17 @@ async function deliverAsFailure(entry, status, error) {
|
|
|
283
326
|
duration: Date.now() - entry.startedAt,
|
|
284
327
|
error,
|
|
285
328
|
};
|
|
329
|
+
let chatNotFound = false;
|
|
286
330
|
try {
|
|
287
|
-
await deliverSubAgentResult(info, result);
|
|
331
|
+
const outcome = await deliverSubAgentResult(info, result);
|
|
332
|
+
chatNotFound = !!outcome?.chatNotFound;
|
|
288
333
|
}
|
|
289
334
|
catch (err) {
|
|
290
335
|
console.error(`[async-watcher] failure delivery failed for ${entry.agentId}:`, err);
|
|
336
|
+
chatNotFound = isChatNotFoundError(err);
|
|
291
337
|
}
|
|
292
338
|
decrementPendingCount(entry.sessionKey);
|
|
339
|
+
return { chatNotFound };
|
|
293
340
|
}
|
|
294
341
|
// ── Test helpers ──────────────────────────────────────────────────
|
|
295
342
|
/**
|
|
@@ -24,6 +24,22 @@ function isTelegramParseError(err) {
|
|
|
24
24
|
const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
|
|
25
25
|
return /can't parse entities|can't find end of the entity/i.test(haystack);
|
|
26
26
|
}
|
|
27
|
+
/**
|
|
28
|
+
* B3 — A Telegram send rejected because the TARGET CHAT DOES NOT EXIST
|
|
29
|
+
* (HTTP 400 "Bad Request: chat not found"). This is a permanent,
|
|
30
|
+
* non-recoverable condition: the chat id is invalid (e.g. the stale
|
|
31
|
+
* chat_id:1 test agent), so every retry will fail identically and just
|
|
32
|
+
* spam stderr. Distinct from transient failures (network, rate-limit)
|
|
33
|
+
* which ARE worth retrying. Matched narrowly on the chat-not-found
|
|
34
|
+
* signature only — never on generic Bad Request.
|
|
35
|
+
*/
|
|
36
|
+
export function isChatNotFoundError(err) {
|
|
37
|
+
if (!err || typeof err !== "object")
|
|
38
|
+
return false;
|
|
39
|
+
const e = err;
|
|
40
|
+
const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
|
|
41
|
+
return /chat not found/i.test(haystack);
|
|
42
|
+
}
|
|
27
43
|
/**
|
|
28
44
|
* Send a Markdown message with an automatic plain-text retry on parse
|
|
29
45
|
* errors. Any other error propagates to the caller's outer catch.
|
|
@@ -251,28 +267,29 @@ export function createLiveStream(chatId, agentName) {
|
|
|
251
267
|
* - "slack" / "discord" / "whatsapp" → delivery-registry lookup
|
|
252
268
|
*/
|
|
253
269
|
export async function deliverSubAgentResult(info, result, opts = {}) {
|
|
270
|
+
const OK = { chatNotFound: false };
|
|
254
271
|
// Implicit spawns: the Task-tool bridge in the main stream has already
|
|
255
272
|
// surfaced the output; extra delivery would be duplication.
|
|
256
273
|
if (info.source === "implicit")
|
|
257
|
-
return;
|
|
274
|
+
return OK;
|
|
258
275
|
const effective = opts.visibility ?? getVisibility();
|
|
259
276
|
if (effective === "silent")
|
|
260
|
-
return;
|
|
277
|
+
return OK;
|
|
261
278
|
if (!info.parentChatId) {
|
|
262
279
|
console.warn(`[subagent-delivery] missing parentChatId for ${info.name} (source=${info.source})`);
|
|
263
|
-
return;
|
|
280
|
+
return OK;
|
|
264
281
|
}
|
|
265
282
|
// v4.14 — Platform routing. Telegram is the default path (unchanged).
|
|
266
283
|
const platform = info.platform ?? "telegram";
|
|
267
284
|
if (platform !== "telegram") {
|
|
268
285
|
await deliverViaRegistry(platform, info, result);
|
|
269
|
-
return;
|
|
286
|
+
return OK;
|
|
270
287
|
}
|
|
271
288
|
// ── Telegram path (v4.12.x behavior, unchanged) ──────────────────
|
|
272
289
|
const api = getBotApi();
|
|
273
290
|
if (!api) {
|
|
274
291
|
console.warn(`[subagent-delivery] no bot api available for ${info.name}`);
|
|
275
|
-
return;
|
|
292
|
+
return OK;
|
|
276
293
|
}
|
|
277
294
|
// Telegram's chatId is always a number at runtime; defensive cast.
|
|
278
295
|
const tgChatId = typeof info.parentChatId === "number"
|
|
@@ -280,7 +297,7 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
|
|
|
280
297
|
: Number(info.parentChatId);
|
|
281
298
|
if (!Number.isFinite(tgChatId)) {
|
|
282
299
|
console.warn(`[subagent-delivery] invalid telegram chatId for ${info.name}`);
|
|
283
|
-
return;
|
|
300
|
+
return OK;
|
|
284
301
|
}
|
|
285
302
|
const banner = buildBanner(info, result);
|
|
286
303
|
const body = result.output?.trim() || `(empty output)`;
|
|
@@ -297,12 +314,12 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
|
|
|
297
314
|
console.error(`[subagent-delivery] file upload failed:`, err);
|
|
298
315
|
await api.sendMessage(tgChatId, body.slice(0, MAX_TG_CHUNK));
|
|
299
316
|
}
|
|
300
|
-
return;
|
|
317
|
+
return OK;
|
|
301
318
|
}
|
|
302
319
|
// Case 2: fits in a single message → banner + body joined
|
|
303
320
|
if (body.length + banner.length + 2 <= MAX_TG_CHUNK) {
|
|
304
321
|
await sendWithMarkdownFallback(api, tgChatId, `${banner}\n\n${body}`);
|
|
305
|
-
return;
|
|
322
|
+
return OK;
|
|
306
323
|
}
|
|
307
324
|
// Case 3: medium output → banner as its own message, body chunked
|
|
308
325
|
await sendWithMarkdownFallback(api, tgChatId, banner);
|
|
@@ -311,9 +328,15 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
|
|
|
311
328
|
// arbitrary chunk boundaries would be inconsistent anyway.
|
|
312
329
|
await api.sendMessage(tgChatId, body.slice(i, i + MAX_TG_CHUNK));
|
|
313
330
|
}
|
|
331
|
+
return OK;
|
|
314
332
|
}
|
|
315
333
|
catch (err) {
|
|
316
334
|
console.error(`[subagent-delivery] send failed for ${info.name}:`, err);
|
|
335
|
+
// B3 — report a permanent invalid-target failure so the watcher can
|
|
336
|
+
// abandon this agent instead of retrying it forever. Any other error
|
|
337
|
+
// (network, rate-limit, parse) is NOT reported as chatNotFound, so the
|
|
338
|
+
// agent's normal retry/timeout lifecycle is unchanged.
|
|
339
|
+
return { chatNotFound: isChatNotFoundError(err) };
|
|
317
340
|
}
|
|
318
341
|
}
|
|
319
342
|
/**
|
|
@@ -17,6 +17,15 @@ export class TelegramStreamer {
|
|
|
17
17
|
this.api = api;
|
|
18
18
|
this.replyTo = replyToMessageId;
|
|
19
19
|
}
|
|
20
|
+
/**
|
|
21
|
+
* True when at least one message has been sent to the user (i.e. messageId
|
|
22
|
+
* is set). Used by the A3 suppress-undelivered guard in message.ts to
|
|
23
|
+
* determine whether visible text has already reached the user — if so, the
|
|
24
|
+
* no-retract invariant prevents suppressing the final send.
|
|
25
|
+
*/
|
|
26
|
+
get hasSentText() {
|
|
27
|
+
return this.messageId !== null;
|
|
28
|
+
}
|
|
20
29
|
/**
|
|
21
30
|
* Set a transient status line (e.g. "📖 Read file.html…") that gets
|
|
22
31
|
* appended to the current accumulated text. Passing null clears it.
|
package/dist/services/trends.js
CHANGED
|
@@ -33,12 +33,81 @@
|
|
|
33
33
|
* ALVIN_TRENDS_INTERVAL_HOURS=24 → snapshot cadence
|
|
34
34
|
* ALVIN_TRENDS_AI_AFTER_DAYS=7 → days of data before AI analysis kicks in
|
|
35
35
|
*/
|
|
36
|
-
import { appendFileSync, existsSync, readFileSync, mkdirSync } from "fs";
|
|
36
|
+
import { appendFileSync, existsSync, readFileSync, writeFileSync, mkdirSync } from "fs";
|
|
37
37
|
import { join, dirname } from "path";
|
|
38
38
|
import { homedir } from "os";
|
|
39
39
|
import { BOT_VERSION } from "../version.js";
|
|
40
40
|
import { emitCritical } from "./critical-notify.js";
|
|
41
41
|
const TRENDS_PATH = join(homedir(), ".alvin-bot", "state", "trends.jsonl");
|
|
42
|
+
/**
|
|
43
|
+
* B2 — peak-uptime high-water mark. The trends collector takes its FIRST
|
|
44
|
+
* snapshot ~60s after every boot (startTrendsCollector schedules it at
|
|
45
|
+
* 60_000ms). takeSnapshot() records uptime_s = process.uptime(), so the
|
|
46
|
+
* first post-restart sample is structurally ≈ 62s. With deliberate
|
|
47
|
+
* restarts (/update, launchctl reload) those ~62s samples dominate
|
|
48
|
+
* trends.jsonl, so the 30-day AI pass perpetually concludes "restart
|
|
49
|
+
* loop, never lives past ~62s" even when the process has actually been
|
|
50
|
+
* continuously up for hours by the time the daily snapshot fires.
|
|
51
|
+
*
|
|
52
|
+
* Fix: persist the MAXIMUM real uptime this bot has ever observed (across
|
|
53
|
+
* process generations) and record it on every snapshot as uptime_peak_s.
|
|
54
|
+
* The peak only ever derives from process.uptime() — it is never
|
|
55
|
+
* fabricated or extrapolated. The anomaly evaluation then keys on the
|
|
56
|
+
* peak (hasRepresentativeUptime), so a process that genuinely lived for
|
|
57
|
+
* hours is not flagged as a ~62s loop, while a genuine fast-restart loop
|
|
58
|
+
* (peak never climbs past the startup transient) still fires.
|
|
59
|
+
*
|
|
60
|
+
* Stored next to trends.jsonl (state/), honoring ALVIN_DATA_DIR so tests
|
|
61
|
+
* and non-default installs work. Survives restarts by design — that is
|
|
62
|
+
* the whole point of a high-water mark.
|
|
63
|
+
*/
|
|
64
|
+
function trendsStateDir() {
|
|
65
|
+
const base = process.env.ALVIN_DATA_DIR || join(homedir(), ".alvin-bot");
|
|
66
|
+
return join(base, "state");
|
|
67
|
+
}
|
|
68
|
+
function uptimePeakPath() {
|
|
69
|
+
return join(trendsStateDir(), "uptime-peak.json");
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* The startup transient: takeSnapshot's first sample is taken ~60s after
|
|
73
|
+
* boot, so any uptime at/under this is indistinguishable from "just
|
|
74
|
+
* restarted". An uptime ABOVE this proves the process actually lived past
|
|
75
|
+
* the post-restart sampling window. 600s (10 min) is comfortably above
|
|
76
|
+
* the 60s first-sample delay + scheduling jitter and far below the 24h
|
|
77
|
+
* cron cadence, so a healthy bot trivially clears it while a real
|
|
78
|
+
* crash-loop (exits within seconds/a couple minutes) never does.
|
|
79
|
+
*/
|
|
80
|
+
export const STARTUP_TRANSIENT_S = 600;
|
|
81
|
+
/**
|
|
82
|
+
* Read the persisted peak uptime, fold in the CURRENT real uptime, persist
|
|
83
|
+
* the (possibly larger) high-water mark, and return it. Pure w.r.t. time
|
|
84
|
+
* sources: the only uptime input is process.uptime() — nothing invented.
|
|
85
|
+
* Disk failures degrade gracefully to the current real uptime.
|
|
86
|
+
*/
|
|
87
|
+
function bumpAndReadUptimePeak() {
|
|
88
|
+
const currentReal = Math.round(process.uptime());
|
|
89
|
+
let stored = 0;
|
|
90
|
+
try {
|
|
91
|
+
const raw = readFileSync(uptimePeakPath(), "utf-8");
|
|
92
|
+
const parsed = JSON.parse(raw);
|
|
93
|
+
if (typeof parsed.peak_s === "number" && Number.isFinite(parsed.peak_s) && parsed.peak_s > 0) {
|
|
94
|
+
stored = parsed.peak_s;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
// No file yet / unreadable — start the high-water mark from the
|
|
99
|
+
// current real uptime. Not an error.
|
|
100
|
+
}
|
|
101
|
+
const peak = Math.max(stored, currentReal);
|
|
102
|
+
try {
|
|
103
|
+
mkdirSync(trendsStateDir(), { recursive: true });
|
|
104
|
+
writeFileSync(uptimePeakPath(), JSON.stringify({ peak_s: peak }), "utf-8");
|
|
105
|
+
}
|
|
106
|
+
catch {
|
|
107
|
+
// Disk full / permissions — non-fatal; we still return the in-memory peak.
|
|
108
|
+
}
|
|
109
|
+
return peak;
|
|
110
|
+
}
|
|
42
111
|
const DEFAULT_INTERVAL_HOURS = 24;
|
|
43
112
|
const DEFAULT_AI_THRESHOLD_DAYS = 7;
|
|
44
113
|
const MAX_RETAIN_DAYS = 90;
|
|
@@ -54,6 +123,18 @@ const MAX_RETAIN_DAYS = 90;
|
|
|
54
123
|
* (a successful, expected fallback — not an error)
|
|
55
124
|
* - critical-notify's own delivery-outcome line, kept on stderr on
|
|
56
125
|
* purpose so it stays visible even in brake/crash context
|
|
126
|
+
* - B3: subagent-delivery's "send failed … chat not found" line for a
|
|
127
|
+
* stale/test async-agent whose delivery target chat no longer exists
|
|
128
|
+
* (e.g. the recurring chat_id:1 test agent). This is benign noise,
|
|
129
|
+
* not a real fault: the target chat is invalid, the watcher now
|
|
130
|
+
* abandons such agents (see async-agent-watcher.ts), and counting it
|
|
131
|
+
* made errors_24h creep upward indefinitely on every poll cycle.
|
|
132
|
+
* The match is DELIBERATELY narrow — it requires BOTH the
|
|
133
|
+
* `[subagent-delivery] send failed` prefix AND a `chat not found`
|
|
134
|
+
* cause on the same line. A subagent-delivery failure for ANY other
|
|
135
|
+
* reason (network, rate-limit, parse) is still counted, and a
|
|
136
|
+
* `chat not found` from ANY OTHER subsystem (a real misconfigured
|
|
137
|
+
* target) is still counted.
|
|
57
138
|
*
|
|
58
139
|
* Counting those turned this very monitor into a false-alarm generator:
|
|
59
140
|
* it flagged its OWN log lines plus every release's restart churn, so
|
|
@@ -65,7 +146,7 @@ const MAX_RETAIN_DAYS = 90;
|
|
|
65
146
|
* any, get added here in one place instead of being chased across the
|
|
66
147
|
* codebase.
|
|
67
148
|
*/
|
|
68
|
-
export const ERR_LOG_PATTERN = /^(?!.*(?:\[critical-notify\]|\[subagent-delivery\] Markdown parse failed)).+/;
|
|
149
|
+
export const ERR_LOG_PATTERN = /^(?!.*(?:\[critical-notify\]|\[subagent-delivery\] Markdown parse failed|\[subagent-delivery\] send failed.*chat not found)).+/;
|
|
69
150
|
let trendsTimer = null;
|
|
70
151
|
function isDisabled() {
|
|
71
152
|
return (process.env.ALVIN_DISABLE_TRENDS === "true" ||
|
|
@@ -134,6 +215,7 @@ function takeSnapshot(activeProvider) {
|
|
|
134
215
|
return {
|
|
135
216
|
ts: new Date().toISOString(),
|
|
136
217
|
uptime_s: Math.round(process.uptime()),
|
|
218
|
+
uptime_peak_s: bumpAndReadUptimePeak(),
|
|
137
219
|
rss_mb: Math.round(mem.rss / 1024 / 1024),
|
|
138
220
|
heap_mb: Math.round(mem.heapUsed / 1024 / 1024),
|
|
139
221
|
crashes_24h: readWatchdogCrashes24h(),
|
|
@@ -195,6 +277,92 @@ SUGGESTION: <one shell command OR observation for the operator>
|
|
|
195
277
|
--- LAST {N} DAYS OF SNAPSHOTS ---
|
|
196
278
|
{SNAPSHOTS}
|
|
197
279
|
--- END ---`;
|
|
280
|
+
/**
|
|
281
|
+
* Returns true if at least one snapshot in `snaps` has a non-zero
|
|
282
|
+
* crashes_24h value, meaning a REAL crash (not an expected/deliberate
|
|
283
|
+
* restart) was recorded on that day.
|
|
284
|
+
*
|
|
285
|
+
* After the B1 fix, deliberate restarts (SIGTERM / launchctl reload /
|
|
286
|
+
* /restart / /update) write the expectedRestart beacon flag and are NOT
|
|
287
|
+
* counted in dailyCrashCount. So crashes_24h === 0 across all snapshots
|
|
288
|
+
* means the bot was only restarted intentionally — no real crash evidence.
|
|
289
|
+
*
|
|
290
|
+
* Pure function, exported for unit testing.
|
|
291
|
+
*/
|
|
292
|
+
export function hasRealCrashEvidence(snaps) {
|
|
293
|
+
return snaps.some((s) => typeof s.crashes_24h === "number" && s.crashes_24h > 0);
|
|
294
|
+
}
|
|
295
|
+
/**
|
|
296
|
+
* B2 — Returns true if AT LEAST ONE snapshot proves the bot process
|
|
297
|
+
* genuinely lived past the startup transient (i.e. it is NOT a ~62s
|
|
298
|
+
* restart loop).
|
|
299
|
+
*
|
|
300
|
+
* The first per-boot snapshot is structurally taken ~60s after boot, so
|
|
301
|
+
* its raw uptime_s is always ≈ 62 regardless of how long the process
|
|
302
|
+
* subsequently runs. uptime_peak_s is the high-water mark of REAL
|
|
303
|
+
* process.uptime() carried across process generations, so a single
|
|
304
|
+
* snapshot whose peak exceeds STARTUP_TRANSIENT_S is hard evidence the
|
|
305
|
+
* process did live for a representative duration. Legacy pre-B2 lines
|
|
306
|
+
* have no uptime_peak_s — we fall back to their raw uptime_s, so a legacy
|
|
307
|
+
* 24h cron snapshot still counts as representative on its own.
|
|
308
|
+
*
|
|
309
|
+
* A genuine fast-restart loop never lets the peak climb past the
|
|
310
|
+
* transient, so it correctly returns false and the WARN still fires.
|
|
311
|
+
*
|
|
312
|
+
* Pure function, exported for unit testing.
|
|
313
|
+
*/
|
|
314
|
+
export function hasRepresentativeUptime(snaps) {
|
|
315
|
+
return snaps.some((s) => {
|
|
316
|
+
const peak = typeof s.uptime_peak_s === "number" && Number.isFinite(s.uptime_peak_s)
|
|
317
|
+
? s.uptime_peak_s
|
|
318
|
+
: typeof s.uptime_s === "number" && Number.isFinite(s.uptime_s)
|
|
319
|
+
? s.uptime_s
|
|
320
|
+
: 0;
|
|
321
|
+
return peak > STARTUP_TRANSIENT_S;
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* B2/B4 — Pure crash/restart WARN suppression decision.
|
|
326
|
+
*
|
|
327
|
+
* Encodes the SAME two gates, in the SAME precedence, that dailyTask
|
|
328
|
+
* applies inline (B2 before B4). Extracted as a pure function purely so
|
|
329
|
+
* the gate COMPOSITION (not just each helper in isolation) is unit
|
|
330
|
+
* testable — the helpers are individually correct but the interaction
|
|
331
|
+
* is where the real-crash-loop-after-a-healthy-period regression lives.
|
|
332
|
+
*
|
|
333
|
+
* Returns the suppression reason, or "none" when the WARN must fire.
|
|
334
|
+
*
|
|
335
|
+
* - "representative-uptime" (B2): a deliberate-restart / sampling
|
|
336
|
+
* artifact — the AI saw ~62s uptimes but a snapshot peak proves the
|
|
337
|
+
* process actually lived past the startup transient. ONLY applies
|
|
338
|
+
* when there is no real crash evidence: a genuine crash loop after a
|
|
339
|
+
* prior healthy period still carries the persisted high peak, so
|
|
340
|
+
* without the crash-evidence guard B2 would permanently and silently
|
|
341
|
+
* swallow it. With the guard, crashes_24h>0 falls through to B4.
|
|
342
|
+
* - "no-crash-evidence" (B4): crash/restart pattern but crashes_24h===0
|
|
343
|
+
* everywhere (deliberate-restart-only, not a real crash loop).
|
|
344
|
+
* - "none": the WARN is real and must be emitted.
|
|
345
|
+
*
|
|
346
|
+
* Pure function, exported for unit testing.
|
|
347
|
+
*/
|
|
348
|
+
export function evaluateCrashRestartSuppression(isCrashRestartPattern, snaps) {
|
|
349
|
+
if (!isCrashRestartPattern)
|
|
350
|
+
return "none";
|
|
351
|
+
const realCrash = hasRealCrashEvidence(snaps);
|
|
352
|
+
// B2: only the deliberate-restart / sampling-artifact case. A real
|
|
353
|
+
// crash loop (crashes_24h>0) must NOT be suppressed here even though
|
|
354
|
+
// the persisted uptime high-water mark still reads representative.
|
|
355
|
+
if (!realCrash && hasRepresentativeUptime(snaps))
|
|
356
|
+
return "representative-uptime";
|
|
357
|
+
// B4: crash/restart pattern with zero real crash evidence.
|
|
358
|
+
if (!realCrash)
|
|
359
|
+
return "no-crash-evidence";
|
|
360
|
+
return "none";
|
|
361
|
+
}
|
|
362
|
+
/** Test-only: take a snapshot without writing to trends.jsonl. */
|
|
363
|
+
export function __takeSnapshotForTest(activeProvider) {
|
|
364
|
+
return takeSnapshot(activeProvider);
|
|
365
|
+
}
|
|
198
366
|
function parseTrendResponse(text) {
|
|
199
367
|
if (/^ANOMALY:\s*NONE/im.test(text)) {
|
|
200
368
|
return {
|
|
@@ -296,6 +464,38 @@ async function dailyTask(registry) {
|
|
|
296
464
|
console.log(`📊 Trends AI: no anomaly detected`);
|
|
297
465
|
return;
|
|
298
466
|
}
|
|
467
|
+
const recentSnaps = readSnapshots(30);
|
|
468
|
+
const isCrashRestartPattern = /crash|restart|loop|uptime/i.test(result.description);
|
|
469
|
+
// B2 gate: suppress an "uptime stuck at ~62s / restart loop" WARN when
|
|
470
|
+
// the snapshots PROVE the process actually lived past the startup
|
|
471
|
+
// transient. The first per-boot snapshot is structurally sampled ~60s
|
|
472
|
+
// after boot, so raw uptime_s reads ≈62 even for a perfectly healthy
|
|
473
|
+
// bot that has been up for hours by the time the daily snapshot fires.
|
|
474
|
+
// uptime_peak_s is the high-water mark of real process.uptime() across
|
|
475
|
+
// process generations: if ANY snapshot's peak exceeds the transient,
|
|
476
|
+
// the "~62s loop" conclusion is factually false. A genuine fast-restart
|
|
477
|
+
// loop never lets the peak climb, so it is NOT suppressed here.
|
|
478
|
+
if (isCrashRestartPattern && !hasRealCrashEvidence(recentSnaps) && hasRepresentativeUptime(recentSnaps)) {
|
|
479
|
+
console.log(`📊 Trends AI: suppressed WARN "${result.description}" — ` +
|
|
480
|
+
`uptime/restart pattern flagged but at least one snapshot shows a ` +
|
|
481
|
+
`representative peak uptime (>${STARTUP_TRANSIENT_S}s); the process ` +
|
|
482
|
+
`did live well past the post-restart sampling window, not a ~62s loop`);
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
// B4 gate: suppress WARN when the AI flags a crash/restart-loop pattern
|
|
486
|
+
// but the historical snapshots contain ZERO real crash evidence
|
|
487
|
+
// (crashes_24h === 0 across the board). This happens when the bot was
|
|
488
|
+
// restarted deliberately (launchctl reload / /update / /restart) — those
|
|
489
|
+
// produce low uptimes that the AI reads as "restart loop", but the
|
|
490
|
+
// crash counter stays at 0 because markExpectedRestart() was written
|
|
491
|
+
// on each clean shutdown. A real crash loop WILL have crashes_24h > 0
|
|
492
|
+
// in at least one snapshot and will still fire the WARN.
|
|
493
|
+
if (isCrashRestartPattern && !hasRealCrashEvidence(recentSnaps)) {
|
|
494
|
+
console.log(`📊 Trends AI: suppressed WARN "${result.description}" — ` +
|
|
495
|
+
`crash/restart pattern detected but crashes_24h=0 across all snapshots ` +
|
|
496
|
+
`(deliberate-restart-only, not a real crash loop)`);
|
|
497
|
+
return;
|
|
498
|
+
}
|
|
299
499
|
console.log(`📊 Trends AI: ANOMALY (${result.severity}) — ${result.description}`);
|
|
300
500
|
emitCritical({
|
|
301
501
|
category: "custom",
|