alvin-bot 5.3.0 → 5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +100 -0
- package/CHANGELOG.md +68 -3
- package/README.md +2 -0
- package/alvin-bot.config.example.json +1 -1
- package/dist/config.js +7 -4
- package/dist/handlers/commands.js +10 -2
- package/dist/handlers/document.js +8 -1
- package/dist/handlers/message.js +173 -30
- package/dist/i18n.js +21 -0
- package/dist/index.js +19 -1
- package/dist/init-data-dir.js +17 -0
- package/dist/middleware/auth.js +19 -1
- package/dist/providers/tool-executor.js +29 -4
- package/dist/services/async-agent-watcher.js +105 -14
- package/dist/services/browser-manager.js +11 -9
- package/dist/services/browser-webfetch.js +47 -13
- package/dist/services/cron-scheduling.js +79 -19
- package/dist/services/cron.js +205 -16
- package/dist/services/delivery-queue.js +19 -0
- package/dist/services/embeddings/index.js +2 -5
- package/dist/services/env-file.js +4 -0
- package/dist/services/personality.js +40 -37
- package/dist/services/session-persistence.js +21 -3
- package/dist/services/session.js +3 -0
- package/dist/services/ssrf-guard.js +162 -0
- package/dist/services/steer-channel.js +7 -2
- package/dist/services/subagent-delivery.js +31 -8
- package/dist/services/telegram.js +9 -0
- package/dist/services/trends.js +202 -2
- package/dist/services/voice.js +0 -3
- package/dist/web/server.js +155 -5
- package/package.json +8 -7
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSRF Guard — rejects requests to private/internal network destinations.
|
|
3
|
+
*
|
|
4
|
+
* Blocks:
|
|
5
|
+
* - Non-http(s) schemes (file://, ftp://, etc.)
|
|
6
|
+
* - IPv4 loopback (127.0.0.0/8), link-local (169.254.0.0/16 — incl. cloud
|
|
7
|
+
* metadata endpoint 169.254.169.254), RFC-1918 private ranges
|
|
8
|
+
* (10/8, 172.16/12, 192.168/16), and the catch-all 0.0.0.0
|
|
9
|
+
* - IPv6 loopback (::1), ULA (fc00::/7), link-local (fe80::/10)
|
|
10
|
+
*
|
|
11
|
+
* Opt-out: set ALLOW_PRIVATE_FETCH=1 to disable blocking (for local dev /
|
|
12
|
+
* self-hosted setups). Default = blocked.
|
|
13
|
+
*
|
|
14
|
+
* No new runtime dependencies — uses Node's built-in `dns` and `net` modules.
|
|
15
|
+
*/
|
|
16
|
+
import dns from "dns";
|
|
17
|
+
import net from "net";
|
|
18
|
+
export class SsrfBlockedError extends Error {
|
|
19
|
+
url;
|
|
20
|
+
constructor(url, reason) {
|
|
21
|
+
super(`SSRF blocked: ${reason} (url: ${url})`);
|
|
22
|
+
this.name = "SsrfBlockedError";
|
|
23
|
+
this.url = url;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Return true if the given IPv4 address string falls into a private/reserved
|
|
28
|
+
* range (RFC-1918, loopback, link-local, unspecified).
|
|
29
|
+
*
|
|
30
|
+
* Ranges blocked:
|
|
31
|
+
* 0.0.0.0/8 — "this" network
|
|
32
|
+
* 10.0.0.0/8 — RFC-1918 class A
|
|
33
|
+
* 127.0.0.0/8 — loopback
|
|
34
|
+
* 169.254.0.0/16 — link-local (incl. IMDS 169.254.169.254)
|
|
35
|
+
* 172.16.0.0/12 — RFC-1918 class B (172.16–172.31)
|
|
36
|
+
* 192.168.0.0/16 — RFC-1918 class C
|
|
37
|
+
*/
|
|
38
|
+
function isPrivateIPv4(ip) {
|
|
39
|
+
const parts = ip.split(".").map(Number);
|
|
40
|
+
if (parts.length !== 4 || parts.some(p => isNaN(p) || p < 0 || p > 255)) {
|
|
41
|
+
return false; // not a valid IPv4 — let DNS resolve decide
|
|
42
|
+
}
|
|
43
|
+
const [a, b] = parts;
|
|
44
|
+
if (a === 0)
|
|
45
|
+
return true; // 0.0.0.0/8
|
|
46
|
+
if (a === 10)
|
|
47
|
+
return true; // 10.0.0.0/8
|
|
48
|
+
if (a === 127)
|
|
49
|
+
return true; // 127.0.0.0/8 loopback
|
|
50
|
+
if (a === 169 && b === 254)
|
|
51
|
+
return true; // 169.254.0.0/16 link-local
|
|
52
|
+
if (a === 172 && b >= 16 && b <= 31)
|
|
53
|
+
return true; // 172.16.0.0/12
|
|
54
|
+
if (a === 192 && b === 168)
|
|
55
|
+
return true; // 192.168.0.0/16
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Return true if the given IPv6 address string falls into a blocked range.
|
|
60
|
+
*
|
|
61
|
+
* Ranges blocked:
|
|
62
|
+
* ::1 — loopback
|
|
63
|
+
* fc00::/7 — ULA (fc00:: – fdff::)
|
|
64
|
+
* fe80::/10 — link-local
|
|
65
|
+
*/
|
|
66
|
+
function isPrivateIPv6(ip) {
|
|
67
|
+
// Node net.isIPv6 normalises the address but we need the raw string
|
|
68
|
+
// for prefix checks. Use the compressed form from net.
|
|
69
|
+
const normalized = ip.toLowerCase().replace(/^\[|\]$/g, "");
|
|
70
|
+
// Loopback
|
|
71
|
+
if (normalized === "::1")
|
|
72
|
+
return true;
|
|
73
|
+
// For prefix checks, expand just the first 16-bit group
|
|
74
|
+
const firstGroup = normalized.split(":")[0];
|
|
75
|
+
const value = parseInt(firstGroup || "0", 16);
|
|
76
|
+
// fc00::/7 — fc00 to fdff (bit 7 of first byte = 1, bit 6 = 1, bit 5 doesn't matter… easier: 0xfc00–0xfdff range for the first 16 bits)
|
|
77
|
+
// The /7 prefix means: binary prefix 1111110x — so 0xfc00 to 0xfdff
|
|
78
|
+
if (value >= 0xfc00 && value <= 0xfdff)
|
|
79
|
+
return true;
|
|
80
|
+
// fe80::/10 — fe80 to febf
|
|
81
|
+
if (value >= 0xfe80 && value <= 0xfebf)
|
|
82
|
+
return true;
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Check whether a raw IP string (v4 or v6) is a private/internal address.
|
|
87
|
+
*/
|
|
88
|
+
function isPrivateIP(ip) {
|
|
89
|
+
const stripped = ip.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
|
|
90
|
+
if (net.isIPv4(stripped))
|
|
91
|
+
return isPrivateIPv4(stripped);
|
|
92
|
+
if (net.isIPv6(stripped))
|
|
93
|
+
return isPrivateIPv6(stripped);
|
|
94
|
+
return false;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Check the hostname from a URL — if it's a literal IP, classify immediately.
|
|
98
|
+
* If it's a hostname, resolve it via DNS and check every returned address.
|
|
99
|
+
*
|
|
100
|
+
* Throws SsrfBlockedError if any resolved address is private.
|
|
101
|
+
* Resolves void if all addresses are public (or DNS fails — fail-open on DNS
|
|
102
|
+
* errors so a temporary resolver blip doesn't DoS the bot; the risk is low
|
|
103
|
+
* because the per-host literal-IP checks run before DNS).
|
|
104
|
+
*/
|
|
105
|
+
async function checkHost(hostname, url) {
|
|
106
|
+
const bare = hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets for net.isIP
|
|
107
|
+
// Literal IP — no DNS needed
|
|
108
|
+
if (net.isIP(bare)) {
|
|
109
|
+
if (isPrivateIP(bare)) {
|
|
110
|
+
throw new SsrfBlockedError(url, `destination ${bare} is a private/loopback/link-local address`);
|
|
111
|
+
}
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
// Named hostname — also catch obvious loopback hostnames without DNS
|
|
115
|
+
const lower = bare.toLowerCase();
|
|
116
|
+
if (lower === "localhost" || lower.endsWith(".localhost") || lower === "::1") {
|
|
117
|
+
throw new SsrfBlockedError(url, `destination hostname '${bare}' resolves to loopback`);
|
|
118
|
+
}
|
|
119
|
+
// DNS resolution — check every returned address
|
|
120
|
+
try {
|
|
121
|
+
const { promises: dnsPromises } = dns;
|
|
122
|
+
const addresses = await dnsPromises.resolve(bare);
|
|
123
|
+
for (const addr of addresses) {
|
|
124
|
+
if (isPrivateIP(addr)) {
|
|
125
|
+
throw new SsrfBlockedError(url, `destination hostname '${bare}' resolves to private address ${addr}`);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
catch (err) {
|
|
130
|
+
// Re-throw our own error; swallow DNS failures (fail-open)
|
|
131
|
+
if (err instanceof SsrfBlockedError)
|
|
132
|
+
throw err;
|
|
133
|
+
// DNS error (ENOTFOUND, etc.) — let the actual fetch fail naturally
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Assert that `url` is safe to fetch (not SSRF-risky).
|
|
138
|
+
*
|
|
139
|
+
* - Rejects non-http(s) schemes immediately (synchronous check).
|
|
140
|
+
* - Resolves hostnames to detect private IP destinations.
|
|
141
|
+
* - Respects ALLOW_PRIVATE_FETCH=1 for operator opt-out.
|
|
142
|
+
*
|
|
143
|
+
* Throws SsrfBlockedError when blocked.
|
|
144
|
+
* Resolves void when safe.
|
|
145
|
+
*/
|
|
146
|
+
export async function assertSsrfSafe(url) {
|
|
147
|
+
// Opt-out for trusted local / self-hosted environments
|
|
148
|
+
if (process.env.ALLOW_PRIVATE_FETCH === "1")
|
|
149
|
+
return;
|
|
150
|
+
let parsed;
|
|
151
|
+
try {
|
|
152
|
+
parsed = new URL(url);
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
throw new SsrfBlockedError(url, "invalid URL");
|
|
156
|
+
}
|
|
157
|
+
const scheme = parsed.protocol; // includes trailing ':'
|
|
158
|
+
if (scheme !== "http:" && scheme !== "https:") {
|
|
159
|
+
throw new SsrfBlockedError(url, `scheme '${parsed.protocol}' is not allowed (only http/https)`);
|
|
160
|
+
}
|
|
161
|
+
await checkHost(parsed.hostname, url);
|
|
162
|
+
}
|
|
@@ -7,17 +7,22 @@ export class SteerChannel {
|
|
|
7
7
|
constructor(cap = DEFAULT_CAP) {
|
|
8
8
|
this.cap = cap;
|
|
9
9
|
}
|
|
10
|
+
/** Push a message into the channel.
|
|
11
|
+
* Returns true if the message was accepted, false if it was dropped
|
|
12
|
+
* (channel closed or buffer cap reached). Callers must check the
|
|
13
|
+
* return value to decide whether to send a 📨 ack or a bufferFull notice. */
|
|
10
14
|
push(text) {
|
|
11
15
|
if (this.closed)
|
|
12
|
-
return;
|
|
16
|
+
return false;
|
|
13
17
|
if (this.buf.length >= this.cap) {
|
|
14
18
|
console.warn(`[steer-channel] cap ${this.cap} reached — dropping steer message`);
|
|
15
|
-
return;
|
|
19
|
+
return false;
|
|
16
20
|
}
|
|
17
21
|
this.buf.push({ type: "user", message: { role: "user", content: text }, parent_tool_use_id: null });
|
|
18
22
|
const r = this.resolveNext;
|
|
19
23
|
this.resolveNext = null;
|
|
20
24
|
r?.();
|
|
25
|
+
return true;
|
|
21
26
|
}
|
|
22
27
|
close() {
|
|
23
28
|
if (this.closed)
|
|
@@ -24,6 +24,22 @@ function isTelegramParseError(err) {
|
|
|
24
24
|
const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
|
|
25
25
|
return /can't parse entities|can't find end of the entity/i.test(haystack);
|
|
26
26
|
}
|
|
27
|
+
/**
|
|
28
|
+
* B3 — A Telegram send rejected because the TARGET CHAT DOES NOT EXIST
|
|
29
|
+
* (HTTP 400 "Bad Request: chat not found"). This is a permanent,
|
|
30
|
+
* non-recoverable condition: the chat id is invalid (e.g. the stale
|
|
31
|
+
* chat_id:1 test agent), so every retry will fail identically and just
|
|
32
|
+
* spam stderr. Distinct from transient failures (network, rate-limit)
|
|
33
|
+
* which ARE worth retrying. Matched narrowly on the chat-not-found
|
|
34
|
+
* signature only — never on generic Bad Request.
|
|
35
|
+
*/
|
|
36
|
+
export function isChatNotFoundError(err) {
|
|
37
|
+
if (!err || typeof err !== "object")
|
|
38
|
+
return false;
|
|
39
|
+
const e = err;
|
|
40
|
+
const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
|
|
41
|
+
return /chat not found/i.test(haystack);
|
|
42
|
+
}
|
|
27
43
|
/**
|
|
28
44
|
* Send a Markdown message with an automatic plain-text retry on parse
|
|
29
45
|
* errors. Any other error propagates to the caller's outer catch.
|
|
@@ -251,28 +267,29 @@ export function createLiveStream(chatId, agentName) {
|
|
|
251
267
|
* - "slack" / "discord" / "whatsapp" → delivery-registry lookup
|
|
252
268
|
*/
|
|
253
269
|
export async function deliverSubAgentResult(info, result, opts = {}) {
|
|
270
|
+
const OK = { chatNotFound: false };
|
|
254
271
|
// Implicit spawns: the Task-tool bridge in the main stream has already
|
|
255
272
|
// surfaced the output; extra delivery would be duplication.
|
|
256
273
|
if (info.source === "implicit")
|
|
257
|
-
return;
|
|
274
|
+
return OK;
|
|
258
275
|
const effective = opts.visibility ?? getVisibility();
|
|
259
276
|
if (effective === "silent")
|
|
260
|
-
return;
|
|
277
|
+
return OK;
|
|
261
278
|
if (!info.parentChatId) {
|
|
262
279
|
console.warn(`[subagent-delivery] missing parentChatId for ${info.name} (source=${info.source})`);
|
|
263
|
-
return;
|
|
280
|
+
return OK;
|
|
264
281
|
}
|
|
265
282
|
// v4.14 — Platform routing. Telegram is the default path (unchanged).
|
|
266
283
|
const platform = info.platform ?? "telegram";
|
|
267
284
|
if (platform !== "telegram") {
|
|
268
285
|
await deliverViaRegistry(platform, info, result);
|
|
269
|
-
return;
|
|
286
|
+
return OK;
|
|
270
287
|
}
|
|
271
288
|
// ── Telegram path (v4.12.x behavior, unchanged) ──────────────────
|
|
272
289
|
const api = getBotApi();
|
|
273
290
|
if (!api) {
|
|
274
291
|
console.warn(`[subagent-delivery] no bot api available for ${info.name}`);
|
|
275
|
-
return;
|
|
292
|
+
return OK;
|
|
276
293
|
}
|
|
277
294
|
// Telegram's chatId is always a number at runtime; defensive cast.
|
|
278
295
|
const tgChatId = typeof info.parentChatId === "number"
|
|
@@ -280,7 +297,7 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
|
|
|
280
297
|
: Number(info.parentChatId);
|
|
281
298
|
if (!Number.isFinite(tgChatId)) {
|
|
282
299
|
console.warn(`[subagent-delivery] invalid telegram chatId for ${info.name}`);
|
|
283
|
-
return;
|
|
300
|
+
return OK;
|
|
284
301
|
}
|
|
285
302
|
const banner = buildBanner(info, result);
|
|
286
303
|
const body = result.output?.trim() || `(empty output)`;
|
|
@@ -297,12 +314,12 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
|
|
|
297
314
|
console.error(`[subagent-delivery] file upload failed:`, err);
|
|
298
315
|
await api.sendMessage(tgChatId, body.slice(0, MAX_TG_CHUNK));
|
|
299
316
|
}
|
|
300
|
-
return;
|
|
317
|
+
return OK;
|
|
301
318
|
}
|
|
302
319
|
// Case 2: fits in a single message → banner + body joined
|
|
303
320
|
if (body.length + banner.length + 2 <= MAX_TG_CHUNK) {
|
|
304
321
|
await sendWithMarkdownFallback(api, tgChatId, `${banner}\n\n${body}`);
|
|
305
|
-
return;
|
|
322
|
+
return OK;
|
|
306
323
|
}
|
|
307
324
|
// Case 3: medium output → banner as its own message, body chunked
|
|
308
325
|
await sendWithMarkdownFallback(api, tgChatId, banner);
|
|
@@ -311,9 +328,15 @@ export async function deliverSubAgentResult(info, result, opts = {}) {
|
|
|
311
328
|
// arbitrary chunk boundaries would be inconsistent anyway.
|
|
312
329
|
await api.sendMessage(tgChatId, body.slice(i, i + MAX_TG_CHUNK));
|
|
313
330
|
}
|
|
331
|
+
return OK;
|
|
314
332
|
}
|
|
315
333
|
catch (err) {
|
|
316
334
|
console.error(`[subagent-delivery] send failed for ${info.name}:`, err);
|
|
335
|
+
// B3 — report a permanent invalid-target failure so the watcher can
|
|
336
|
+
// abandon this agent instead of retrying it forever. Any other error
|
|
337
|
+
// (network, rate-limit, parse) is NOT reported as chatNotFound, so the
|
|
338
|
+
// agent's normal retry/timeout lifecycle is unchanged.
|
|
339
|
+
return { chatNotFound: isChatNotFoundError(err) };
|
|
317
340
|
}
|
|
318
341
|
}
|
|
319
342
|
/**
|
|
@@ -17,6 +17,15 @@ export class TelegramStreamer {
|
|
|
17
17
|
this.api = api;
|
|
18
18
|
this.replyTo = replyToMessageId;
|
|
19
19
|
}
|
|
20
|
+
/**
|
|
21
|
+
* True when at least one message has been sent to the user (i.e. messageId
|
|
22
|
+
* is set). Used by the A3 suppress-undelivered guard in message.ts to
|
|
23
|
+
* determine whether visible text has already reached the user — if so, the
|
|
24
|
+
* no-retract invariant prevents suppressing the final send.
|
|
25
|
+
*/
|
|
26
|
+
get hasSentText() {
|
|
27
|
+
return this.messageId !== null;
|
|
28
|
+
}
|
|
20
29
|
/**
|
|
21
30
|
* Set a transient status line (e.g. "📖 Read file.html…") that gets
|
|
22
31
|
* appended to the current accumulated text. Passing null clears it.
|
package/dist/services/trends.js
CHANGED
|
@@ -33,12 +33,81 @@
|
|
|
33
33
|
* ALVIN_TRENDS_INTERVAL_HOURS=24 → snapshot cadence
|
|
34
34
|
* ALVIN_TRENDS_AI_AFTER_DAYS=7 → days of data before AI analysis kicks in
|
|
35
35
|
*/
|
|
36
|
-
import { appendFileSync, existsSync, readFileSync, mkdirSync } from "fs";
|
|
36
|
+
import { appendFileSync, existsSync, readFileSync, writeFileSync, mkdirSync } from "fs";
|
|
37
37
|
import { join, dirname } from "path";
|
|
38
38
|
import { homedir } from "os";
|
|
39
39
|
import { BOT_VERSION } from "../version.js";
|
|
40
40
|
import { emitCritical } from "./critical-notify.js";
|
|
41
41
|
const TRENDS_PATH = join(homedir(), ".alvin-bot", "state", "trends.jsonl");
|
|
42
|
+
/**
|
|
43
|
+
* B2 — peak-uptime high-water mark. The trends collector takes its FIRST
|
|
44
|
+
* snapshot ~60s after every boot (startTrendsCollector schedules it at
|
|
45
|
+
* 60_000ms). takeSnapshot() records uptime_s = process.uptime(), so the
|
|
46
|
+
* first post-restart sample is structurally ≈ 62s. With deliberate
|
|
47
|
+
* restarts (/update, launchctl reload) those ~62s samples dominate
|
|
48
|
+
* trends.jsonl, so the 30-day AI pass perpetually concludes "restart
|
|
49
|
+
* loop, never lives past ~62s" even when the process has actually been
|
|
50
|
+
* continuously up for hours by the time the daily snapshot fires.
|
|
51
|
+
*
|
|
52
|
+
* Fix: persist the MAXIMUM real uptime this bot has ever observed (across
|
|
53
|
+
* process generations) and record it on every snapshot as uptime_peak_s.
|
|
54
|
+
* The peak only ever derives from process.uptime() — it is never
|
|
55
|
+
* fabricated or extrapolated. The anomaly evaluation then keys on the
|
|
56
|
+
* peak (hasRepresentativeUptime), so a process that genuinely lived for
|
|
57
|
+
* hours is not flagged as a ~62s loop, while a genuine fast-restart loop
|
|
58
|
+
* (peak never climbs past the startup transient) still fires.
|
|
59
|
+
*
|
|
60
|
+
* Stored next to trends.jsonl (state/), honoring ALVIN_DATA_DIR so tests
|
|
61
|
+
* and non-default installs work. Survives restarts by design — that is
|
|
62
|
+
* the whole point of a high-water mark.
|
|
63
|
+
*/
|
|
64
|
+
function trendsStateDir() {
|
|
65
|
+
const base = process.env.ALVIN_DATA_DIR || join(homedir(), ".alvin-bot");
|
|
66
|
+
return join(base, "state");
|
|
67
|
+
}
|
|
68
|
+
function uptimePeakPath() {
|
|
69
|
+
return join(trendsStateDir(), "uptime-peak.json");
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* The startup transient: takeSnapshot's first sample is taken ~60s after
|
|
73
|
+
* boot, so any uptime at/under this is indistinguishable from "just
|
|
74
|
+
* restarted". An uptime ABOVE this proves the process actually lived past
|
|
75
|
+
* the post-restart sampling window. 600s (10 min) is comfortably above
|
|
76
|
+
* the 60s first-sample delay + scheduling jitter and far below the 24h
|
|
77
|
+
* cron cadence, so a healthy bot trivially clears it while a real
|
|
78
|
+
* crash-loop (exits within seconds/a couple minutes) never does.
|
|
79
|
+
*/
|
|
80
|
+
export const STARTUP_TRANSIENT_S = 600;
|
|
81
|
+
/**
|
|
82
|
+
* Read the persisted peak uptime, fold in the CURRENT real uptime, persist
|
|
83
|
+
* the (possibly larger) high-water mark, and return it. Pure w.r.t. time
|
|
84
|
+
* sources: the only uptime input is process.uptime() — nothing invented.
|
|
85
|
+
* Disk failures degrade gracefully to the current real uptime.
|
|
86
|
+
*/
|
|
87
|
+
function bumpAndReadUptimePeak() {
|
|
88
|
+
const currentReal = Math.round(process.uptime());
|
|
89
|
+
let stored = 0;
|
|
90
|
+
try {
|
|
91
|
+
const raw = readFileSync(uptimePeakPath(), "utf-8");
|
|
92
|
+
const parsed = JSON.parse(raw);
|
|
93
|
+
if (typeof parsed.peak_s === "number" && Number.isFinite(parsed.peak_s) && parsed.peak_s > 0) {
|
|
94
|
+
stored = parsed.peak_s;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
// No file yet / unreadable — start the high-water mark from the
|
|
99
|
+
// current real uptime. Not an error.
|
|
100
|
+
}
|
|
101
|
+
const peak = Math.max(stored, currentReal);
|
|
102
|
+
try {
|
|
103
|
+
mkdirSync(trendsStateDir(), { recursive: true });
|
|
104
|
+
writeFileSync(uptimePeakPath(), JSON.stringify({ peak_s: peak }), "utf-8");
|
|
105
|
+
}
|
|
106
|
+
catch {
|
|
107
|
+
// Disk full / permissions — non-fatal; we still return the in-memory peak.
|
|
108
|
+
}
|
|
109
|
+
return peak;
|
|
110
|
+
}
|
|
42
111
|
const DEFAULT_INTERVAL_HOURS = 24;
|
|
43
112
|
const DEFAULT_AI_THRESHOLD_DAYS = 7;
|
|
44
113
|
const MAX_RETAIN_DAYS = 90;
|
|
@@ -54,6 +123,18 @@ const MAX_RETAIN_DAYS = 90;
|
|
|
54
123
|
* (a successful, expected fallback — not an error)
|
|
55
124
|
* - critical-notify's own delivery-outcome line, kept on stderr on
|
|
56
125
|
* purpose so it stays visible even in brake/crash context
|
|
126
|
+
* - B3: subagent-delivery's "send failed … chat not found" line for a
|
|
127
|
+
* stale/test async-agent whose delivery target chat no longer exists
|
|
128
|
+
* (e.g. the recurring chat_id:1 test agent). This is benign noise,
|
|
129
|
+
* not a real fault: the target chat is invalid, the watcher now
|
|
130
|
+
* abandons such agents (see async-agent-watcher.ts), and counting it
|
|
131
|
+
* made errors_24h creep upward indefinitely on every poll cycle.
|
|
132
|
+
* The match is DELIBERATELY narrow — it requires BOTH the
|
|
133
|
+
* `[subagent-delivery] send failed` prefix AND a `chat not found`
|
|
134
|
+
* cause on the same line. A subagent-delivery failure for ANY other
|
|
135
|
+
* reason (network, rate-limit, parse) is still counted, and a
|
|
136
|
+
* `chat not found` from ANY OTHER subsystem (a real misconfigured
|
|
137
|
+
* target) is still counted.
|
|
57
138
|
*
|
|
58
139
|
* Counting those turned this very monitor into a false-alarm generator:
|
|
59
140
|
* it flagged its OWN log lines plus every release's restart churn, so
|
|
@@ -65,7 +146,7 @@ const MAX_RETAIN_DAYS = 90;
|
|
|
65
146
|
* any, get added here in one place instead of being chased across the
|
|
66
147
|
* codebase.
|
|
67
148
|
*/
|
|
68
|
-
export const ERR_LOG_PATTERN = /^(?!.*(?:\[critical-notify\]|\[subagent-delivery\] Markdown parse failed)).+/;
|
|
149
|
+
export const ERR_LOG_PATTERN = /^(?!.*(?:\[critical-notify\]|\[subagent-delivery\] Markdown parse failed|\[subagent-delivery\] send failed.*chat not found)).+/;
|
|
69
150
|
let trendsTimer = null;
|
|
70
151
|
function isDisabled() {
|
|
71
152
|
return (process.env.ALVIN_DISABLE_TRENDS === "true" ||
|
|
@@ -134,6 +215,7 @@ function takeSnapshot(activeProvider) {
|
|
|
134
215
|
return {
|
|
135
216
|
ts: new Date().toISOString(),
|
|
136
217
|
uptime_s: Math.round(process.uptime()),
|
|
218
|
+
uptime_peak_s: bumpAndReadUptimePeak(),
|
|
137
219
|
rss_mb: Math.round(mem.rss / 1024 / 1024),
|
|
138
220
|
heap_mb: Math.round(mem.heapUsed / 1024 / 1024),
|
|
139
221
|
crashes_24h: readWatchdogCrashes24h(),
|
|
@@ -195,6 +277,92 @@ SUGGESTION: <one shell command OR observation for the operator>
|
|
|
195
277
|
--- LAST {N} DAYS OF SNAPSHOTS ---
|
|
196
278
|
{SNAPSHOTS}
|
|
197
279
|
--- END ---`;
|
|
280
|
+
/**
|
|
281
|
+
* Returns true if at least one snapshot in `snaps` has a non-zero
|
|
282
|
+
* crashes_24h value, meaning a REAL crash (not an expected/deliberate
|
|
283
|
+
* restart) was recorded on that day.
|
|
284
|
+
*
|
|
285
|
+
* After the B1 fix, deliberate restarts (SIGTERM / launchctl reload /
|
|
286
|
+
* /restart / /update) write the expectedRestart beacon flag and are NOT
|
|
287
|
+
* counted in dailyCrashCount. So crashes_24h === 0 across all snapshots
|
|
288
|
+
* means the bot was only restarted intentionally — no real crash evidence.
|
|
289
|
+
*
|
|
290
|
+
* Pure function, exported for unit testing.
|
|
291
|
+
*/
|
|
292
|
+
export function hasRealCrashEvidence(snaps) {
|
|
293
|
+
return snaps.some((s) => typeof s.crashes_24h === "number" && s.crashes_24h > 0);
|
|
294
|
+
}
|
|
295
|
+
/**
|
|
296
|
+
* B2 — Returns true if AT LEAST ONE snapshot proves the bot process
|
|
297
|
+
* genuinely lived past the startup transient (i.e. it is NOT a ~62s
|
|
298
|
+
* restart loop).
|
|
299
|
+
*
|
|
300
|
+
* The first per-boot snapshot is structurally taken ~60s after boot, so
|
|
301
|
+
* its raw uptime_s is always ≈ 62 regardless of how long the process
|
|
302
|
+
* subsequently runs. uptime_peak_s is the high-water mark of REAL
|
|
303
|
+
* process.uptime() carried across process generations, so a single
|
|
304
|
+
* snapshot whose peak exceeds STARTUP_TRANSIENT_S is hard evidence the
|
|
305
|
+
* process did live for a representative duration. Legacy pre-B2 lines
|
|
306
|
+
* have no uptime_peak_s — we fall back to their raw uptime_s, so a legacy
|
|
307
|
+
* 24h cron snapshot still counts as representative on its own.
|
|
308
|
+
*
|
|
309
|
+
* A genuine fast-restart loop never lets the peak climb past the
|
|
310
|
+
* transient, so it correctly returns false and the WARN still fires.
|
|
311
|
+
*
|
|
312
|
+
* Pure function, exported for unit testing.
|
|
313
|
+
*/
|
|
314
|
+
export function hasRepresentativeUptime(snaps) {
|
|
315
|
+
return snaps.some((s) => {
|
|
316
|
+
const peak = typeof s.uptime_peak_s === "number" && Number.isFinite(s.uptime_peak_s)
|
|
317
|
+
? s.uptime_peak_s
|
|
318
|
+
: typeof s.uptime_s === "number" && Number.isFinite(s.uptime_s)
|
|
319
|
+
? s.uptime_s
|
|
320
|
+
: 0;
|
|
321
|
+
return peak > STARTUP_TRANSIENT_S;
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* B2/B4 — Pure crash/restart WARN suppression decision.
|
|
326
|
+
*
|
|
327
|
+
* Encodes the SAME two gates, in the SAME precedence, that dailyTask
|
|
328
|
+
* applies inline (B2 before B4). Extracted as a pure function purely so
|
|
329
|
+
* the gate COMPOSITION (not just each helper in isolation) is unit
|
|
330
|
+
* testable — the helpers are individually correct but the interaction
|
|
331
|
+
* is where the real-crash-loop-after-a-healthy-period regression lives.
|
|
332
|
+
*
|
|
333
|
+
* Returns the suppression reason, or "none" when the WARN must fire.
|
|
334
|
+
*
|
|
335
|
+
* - "representative-uptime" (B2): a deliberate-restart / sampling
|
|
336
|
+
* artifact — the AI saw ~62s uptimes but a snapshot peak proves the
|
|
337
|
+
* process actually lived past the startup transient. ONLY applies
|
|
338
|
+
* when there is no real crash evidence: a genuine crash loop after a
|
|
339
|
+
* prior healthy period still carries the persisted high peak, so
|
|
340
|
+
* without the crash-evidence guard B2 would permanently and silently
|
|
341
|
+
* swallow it. With the guard, crashes_24h>0 falls through to B4.
|
|
342
|
+
* - "no-crash-evidence" (B4): crash/restart pattern but crashes_24h===0
|
|
343
|
+
* everywhere (deliberate-restart-only, not a real crash loop).
|
|
344
|
+
* - "none": the WARN is real and must be emitted.
|
|
345
|
+
*
|
|
346
|
+
* Pure function, exported for unit testing.
|
|
347
|
+
*/
|
|
348
|
+
export function evaluateCrashRestartSuppression(isCrashRestartPattern, snaps) {
|
|
349
|
+
if (!isCrashRestartPattern)
|
|
350
|
+
return "none";
|
|
351
|
+
const realCrash = hasRealCrashEvidence(snaps);
|
|
352
|
+
// B2: only the deliberate-restart / sampling-artifact case. A real
|
|
353
|
+
// crash loop (crashes_24h>0) must NOT be suppressed here even though
|
|
354
|
+
// the persisted uptime high-water mark still reads representative.
|
|
355
|
+
if (!realCrash && hasRepresentativeUptime(snaps))
|
|
356
|
+
return "representative-uptime";
|
|
357
|
+
// B4: crash/restart pattern with zero real crash evidence.
|
|
358
|
+
if (!realCrash)
|
|
359
|
+
return "no-crash-evidence";
|
|
360
|
+
return "none";
|
|
361
|
+
}
|
|
362
|
+
/** Test-only: take a snapshot without writing to trends.jsonl. */
|
|
363
|
+
export function __takeSnapshotForTest(activeProvider) {
|
|
364
|
+
return takeSnapshot(activeProvider);
|
|
365
|
+
}
|
|
198
366
|
function parseTrendResponse(text) {
|
|
199
367
|
if (/^ANOMALY:\s*NONE/im.test(text)) {
|
|
200
368
|
return {
|
|
@@ -296,6 +464,38 @@ async function dailyTask(registry) {
|
|
|
296
464
|
console.log(`📊 Trends AI: no anomaly detected`);
|
|
297
465
|
return;
|
|
298
466
|
}
|
|
467
|
+
const recentSnaps = readSnapshots(30);
|
|
468
|
+
const isCrashRestartPattern = /crash|restart|loop|uptime/i.test(result.description);
|
|
469
|
+
// B2 gate: suppress an "uptime stuck at ~62s / restart loop" WARN when
|
|
470
|
+
// the snapshots PROVE the process actually lived past the startup
|
|
471
|
+
// transient. The first per-boot snapshot is structurally sampled ~60s
|
|
472
|
+
// after boot, so raw uptime_s reads ≈62 even for a perfectly healthy
|
|
473
|
+
// bot that has been up for hours by the time the daily snapshot fires.
|
|
474
|
+
// uptime_peak_s is the high-water mark of real process.uptime() across
|
|
475
|
+
// process generations: if ANY snapshot's peak exceeds the transient,
|
|
476
|
+
// the "~62s loop" conclusion is factually false. A genuine fast-restart
|
|
477
|
+
// loop never lets the peak climb, so it is NOT suppressed here.
|
|
478
|
+
if (isCrashRestartPattern && !hasRealCrashEvidence(recentSnaps) && hasRepresentativeUptime(recentSnaps)) {
|
|
479
|
+
console.log(`📊 Trends AI: suppressed WARN "${result.description}" — ` +
|
|
480
|
+
`uptime/restart pattern flagged but at least one snapshot shows a ` +
|
|
481
|
+
`representative peak uptime (>${STARTUP_TRANSIENT_S}s); the process ` +
|
|
482
|
+
`did live well past the post-restart sampling window, not a ~62s loop`);
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
// B4 gate: suppress WARN when the AI flags a crash/restart-loop pattern
|
|
486
|
+
// but the historical snapshots contain ZERO real crash evidence
|
|
487
|
+
// (crashes_24h === 0 across the board). This happens when the bot was
|
|
488
|
+
// restarted deliberately (launchctl reload / /update / /restart) — those
|
|
489
|
+
// produce low uptimes that the AI reads as "restart loop", but the
|
|
490
|
+
// crash counter stays at 0 because markExpectedRestart() was written
|
|
491
|
+
// on each clean shutdown. A real crash loop WILL have crashes_24h > 0
|
|
492
|
+
// in at least one snapshot and will still fire the WARN.
|
|
493
|
+
if (isCrashRestartPattern && !hasRealCrashEvidence(recentSnaps)) {
|
|
494
|
+
console.log(`📊 Trends AI: suppressed WARN "${result.description}" — ` +
|
|
495
|
+
`crash/restart pattern detected but crashes_24h=0 across all snapshots ` +
|
|
496
|
+
`(deliberate-restart-only, not a real crash loop)`);
|
|
497
|
+
return;
|
|
498
|
+
}
|
|
299
499
|
console.log(`📊 Trends AI: ANOMALY (${result.severity}) — ${result.description}`);
|
|
300
500
|
emitCritical({
|
|
301
501
|
category: "custom",
|
package/dist/services/voice.js
CHANGED
|
@@ -21,9 +21,6 @@ export async function transcribeAudio(audioPath) {
|
|
|
21
21
|
const bodyEnd = Buffer.from(`\r\n--${boundary}\r\n` +
|
|
22
22
|
`Content-Disposition: form-data; name="model"\r\n\r\n` +
|
|
23
23
|
`whisper-large-v3-turbo\r\n` +
|
|
24
|
-
`--${boundary}\r\n` +
|
|
25
|
-
`Content-Disposition: form-data; name="language"\r\n\r\n` +
|
|
26
|
-
`de\r\n` +
|
|
27
24
|
`--${boundary}--\r\n`, "utf-8");
|
|
28
25
|
const fullBody = Buffer.concat([bodyStart, fileBuffer, bodyEnd]);
|
|
29
26
|
return new Promise((resolve, reject) => {
|