alvin-bot 5.3.0 → 5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,11 +40,8 @@ function loadSqlite() {
40
40
  }
41
41
  catch (err) {
42
42
  sqliteLoadError = err instanceof Error ? err : new Error(String(err));
43
- console.warn("⚠️ better-sqlite3 native binary unavailable — embeddings disabled. " +
44
- "Bot continues without semantic memory search. Fix: rebuild deps with " +
45
- "`cd $(npm root -g)/alvin-bot && npm rebuild better-sqlite3` or reinstall " +
46
- "alvin-bot. Underlying error: " +
47
- sqliteLoadError.message);
43
+ console.log("ℹ️ Semantic memory (better-sqlite3) unavailable — using keyword search (FTS5). " +
44
+ "Reinstall or run `npm rebuild better-sqlite3` to enable.");
48
45
  return null;
49
46
  }
50
47
  }
@@ -26,6 +26,10 @@ export function readEnv() {
26
26
  }
27
27
  /** Upsert a key=value pair in the env file, preserving all other lines. */
28
28
  export function writeEnvVar(key, value) {
29
+ // M6 (centralized): reject values containing newline characters — they allow
30
+ // injecting extra .env lines (e.g. value="good\nEVIL=injected").
31
+ if (/[\n\r]/.test(value))
32
+ throw new Error("env value must not contain newline characters");
29
33
  let content = fs.existsSync(ENV_FILE) ? fs.readFileSync(ENV_FILE, "utf-8") : "";
30
34
  const regex = new RegExp(`^${key}=.*$`, "m");
31
35
  if (regex.test(content)) {
@@ -35,7 +35,7 @@ try {
35
35
  soulContent = readFileSync(SOUL_FILE, "utf-8");
36
36
  }
37
37
  catch {
38
- console.warn("SOUL.md not found — using default personality");
38
+ console.log("ℹ️ soul.md not found — using built-in default personality. Create ~/.alvin-bot/soul.md to customize.");
39
39
  }
40
40
  loadStandingOrders();
41
41
  /** Base system prompt — adapts to user language */
@@ -49,9 +49,14 @@ const SDK_ADDON = `When you run commands or edit files, briefly explain what you
49
49
  /**
50
50
  * Stage 1 of Fix #17 — async sub-agents.
51
51
  *
52
- * Tells Claude to use the SDK's `run_in_background` flag for long-running
53
- * Agent tool calls so the main Telegram session doesn't stay locked for
54
- * 10+ minutes while sub-agents crawl the web, run audits, or build reports.
52
+ * Tells Claude that `mcp__alvin__dispatch_agent` is the ONLY sanctioned
53
+ * path for long-running work on chat platforms. The built-in Task/Agent
54
+ * tool is explicitly disallowed for anything that takes more than ~30 s
55
+ * because it blocks the session: isProcessing stays true, the typing
56
+ * indicator keeps firing, and the user cannot send a new message until
57
+ * the sub-agent finishes. Path A (mcp__alvin__dispatch_agent) spawns a
58
+ * truly detached subprocess and returns in milliseconds so the session
59
+ * is freed immediately.
55
60
  *
56
61
  * Only injected into the prompt when isSDK === true (non-SDK providers
57
62
  * have no Agent tool). The bot's async-agent-watcher (Stage 2) picks up
@@ -63,34 +68,29 @@ const SDK_ADDON = `When you run commands or edit files, briefly explain what you
63
68
  */
64
69
  const BACKGROUND_SUBAGENT_HINT = `## ⚠️ CRITICAL: Background Sub-Agents on Telegram/WhatsApp/Slack
65
70
 
66
- **THE RULE (v4.13 update)**: For ANY long-running work (research,
67
- audits, multi-step analysis, >30 seconds), you MUST prefer the
68
- \`mcp__alvin__dispatch_agent\` tool over the built-in \`Task\` tool.
71
+ **THE RULE**: For ANY long-running work (research, audits, multi-step
72
+ analysis, >30 seconds), you MUST use \`mcp__alvin__dispatch_agent\`.
73
+ The built-in \`Task\`/\`Agent\` tool is BANNED for long work on chat
74
+ platforms (Telegram/WhatsApp/Slack/Discord). Do not use it for anything
75
+ that takes more than ~30 seconds.
69
76
 
70
- **Why alvin_dispatch over Task(run_in_background: true)?**
77
+ **Why mcp__alvin__dispatch_agent is the ONLY background path:**
71
78
 
72
- The built-in Task tool (even with \`run_in_background: true\`) keeps
73
- the sub-agent tied to THIS session's CLI subprocess. If the user sends
74
- me a new message mid-work, aborting this session would kill the
75
- sub-agent half-way through and leave garbage partial output. Alvin
76
- learned this the hard way (v4.12.1–v4.12.3 all got this wrong).
79
+ The built-in Task/Agent tool even with \`run_in_background: true\`
80
+ keeps the sub-agent tied to THIS session's CLI subprocess. The session
81
+ stays blocked: \`isProcessing\` remains true, the typing indicator
82
+ keeps firing, and the user's next message is queued until the
83
+ synchronous Task finishes. There is no way to free the session while a
84
+ built-in Task is running — that is a hard platform constraint.
77
85
 
78
86
  \`mcp__alvin__dispatch_agent\` spawns a COMPLETELY INDEPENDENT
79
87
  \`claude -p\` subprocess with its own PID, own process group. It
80
- survives ANY abort of this session. The user can interrupt me freely
81
- and keep chatting while the sub-agent finishes cleanly. The bot
82
- delivers the result as a separate message when ready.
88
+ returns in milliseconds, the session is freed immediately, and the user
89
+ can keep chatting. The bot delivers the result as a separate message
90
+ when ready.
83
91
 
84
92
  **THIS IS THE SINGLE MOST IMPORTANT RULE FOR USER RESPONSIVENESS.**
85
93
 
86
- **Why it matters**: During a synchronous Agent tool call the parent
87
- session has no way to know the sub-agent is still working. It appears
88
- hung. After a long silence the session can be aborted, destroying the
89
- work. Using \`run_in_background: true\` solves this: the tool returns
90
- an \`agentId\` + \`outputFile\` path IMMEDIATELY, your turn ends in
91
- seconds, the user can keep chatting with me, and the bot automatically
92
- delivers the sub-agent's final result as a separate message when ready.
93
-
94
94
  **Decision tree** (apply every time you consider any sub-agent tool):
95
95
 
96
96
  Does the task involve ANY of the following?
@@ -103,16 +103,11 @@ delivers the sub-agent's final result as a separate message when ready.
103
103
  • Crawling, scraping, or fetching multiple resources
104
104
  • Research across multiple sources or domains
105
105
 
106
- YES → use \`mcp__alvin__dispatch_agent\` (truly detached, preferred)
106
+ YES → use \`mcp__alvin__dispatch_agent\` (the ONLY sanctioned path)
107
107
  NO → foreground is fine (single quick sub-query under 30s, answer
108
108
  yourself if possible)
109
109
 
110
- NOTE: The built-in Task tool with run_in_background: true still works
111
- but is now deprecated on Telegram/Slack/Discord/WhatsApp because it
112
- ties sub-agent lifetime to this session. Only use Task directly when
113
- you explicitly need the sub-agent's result IN THIS SAME TURN (rare).
114
-
115
- **Examples where you MUST use \`run_in_background: true\`:**
110
+ **Examples where you MUST use \`mcp__alvin__dispatch_agent\`:**
116
111
  - ANY audit (SEO, security, code quality, performance, accessibility, GEO)
117
112
  - Research visiting more than 1-2 web pages
118
113
  - Code reviews on more than a single file
@@ -122,12 +117,12 @@ you explicitly need the sub-agent's result IN THIS SAME TURN (rare).
122
117
  - Long data-processing jobs
123
118
  - Anything involving the word "analyze", "audit", "review", "scan", "research"
124
119
 
125
- **Examples where foreground is fine:**
120
+ **Examples where foreground is fine (no sub-agent needed):**
126
121
  - "Read this file and summarize it" (single file, <10s)
127
122
  - "What's 2+2?" (no sub-agent needed — answer yourself)
128
123
  - "Check if package.json has foo" (one quick tool call)
129
124
 
130
- **After launching a background agent (either tool), you MUST:**
125
+ **After calling \`mcp__alvin__dispatch_agent\` you MUST:**
131
126
  1. Tell the user in ONE short sentence what you kicked off.
132
127
  Example: "Starting SEO audit for example.com in the background —
133
128
  I'll send the report when it's done."
@@ -135,6 +130,14 @@ you explicitly need the sub-agent's result IN THIS SAME TURN (rare).
135
130
  3. The bot will deliver the result as a separate message when ready.
136
131
  You don't need to poll the outputFile proactively.
137
132
 
133
+ Only say "running in the background — you can keep chatting" if you
134
+ actually called \`mcp__alvin__dispatch_agent\` in this turn. If the
135
+ task ran inline (foreground tool calls, no dispatch), it blocked the
136
+ session and the user could NOT chat during it — do NOT claim otherwise.
137
+ If a task was too long for foreground but you didn't dispatch it, tell
138
+ the user truthfully: "That ran inline and took a while. Your messages
139
+ were queued until it finished."
140
+
138
141
  **For PARALLEL dispatch** (e.g. user says "research X and Y in parallel"):
139
142
  Call \`mcp__alvin__dispatch_agent\` multiple times in the SAME assistant
140
143
  turn, once per sub-task. Each returns its own agentId immediately. Your
@@ -145,10 +148,10 @@ If the user asks "is it done yet?" before the bot delivers the result,
145
148
  you MAY read the agent's \`outputFile\` (from the original tool result)
146
149
  using the Read tool to peek at progress — but don't block on it.
147
150
 
148
- **Never** call the Agent/Task tool without \`run_in_background: true\`
149
- for anything you're not 100% sure completes in under 30 seconds. The
150
- cost of unnecessary background mode is zero. The cost of blocking the
151
- Telegram user for 20 minutes on a synchronous call is very high.`;
151
+ **Never** call the built-in Agent/Task tool for anything you're not
152
+ 100% sure completes in under 30 seconds. The cost of dispatch is zero.
153
+ The cost of blocking the chat user for 20 minutes on a synchronous
154
+ inline Task is very high.`;
152
155
  /**
153
156
  * Self-Awareness Core — Dynamic introspection block.
154
157
  *
@@ -142,6 +142,12 @@ export function loadPersistedSessions() {
142
142
  if (!raw_parsed || typeof raw_parsed !== "object")
143
143
  return 0;
144
144
  // v4.12.0 — Detect envelope format vs legacy v4.11.0 flat format
145
+ // M4: Validate the top-level shape before trusting any field.
146
+ if (Array.isArray(raw_parsed)) {
147
+ // An array at root is not a valid sessions file
148
+ console.warn("⚠️ session-persistence: sessions file contains an array at root, starting fresh");
149
+ return 0;
150
+ }
145
151
  let parsed;
146
152
  let tgWorkspaces = {};
147
153
  if (raw_parsed &&
@@ -149,11 +155,22 @@ export function loadPersistedSessions() {
149
155
  "version" in raw_parsed &&
150
156
  "sessions" in raw_parsed) {
151
157
  const env = raw_parsed;
152
- parsed = env.sessions ?? {};
153
- tgWorkspaces = env.telegramWorkspaces ?? {};
158
+ // M4: sessions field must be a non-null object; degrade gracefully if tampered
159
+ if (!env.sessions || typeof env.sessions !== "object" || Array.isArray(env.sessions)) {
160
+ console.warn("⚠️ session-persistence: 'sessions' field is not an object, starting fresh");
161
+ return 0;
162
+ }
163
+ parsed = env.sessions;
164
+ tgWorkspaces = (env.telegramWorkspaces && typeof env.telegramWorkspaces === "object" && !Array.isArray(env.telegramWorkspaces))
165
+ ? env.telegramWorkspaces
166
+ : {};
154
167
  }
155
168
  else {
156
- // Legacy flat format (v4.11.0)
169
+ // Legacy flat format (v4.11.0) — must also be an object
170
+ if (!raw_parsed || typeof raw_parsed !== "object") {
171
+ console.warn("⚠️ session-persistence: sessions file is not a valid object, starting fresh");
172
+ return 0;
173
+ }
157
174
  parsed = raw_parsed;
158
175
  }
159
176
  // Rehydrate Telegram workspace map
@@ -184,6 +201,7 @@ export function loadPersistedSessions() {
184
201
  _qHandle: null,
185
202
  _steerChannel: null,
186
203
  _steerAckSentThisTurn: false,
204
+ _turnId: null,
187
205
  lastActivity: persisted.lastActivity ?? Date.now(),
188
206
  startedAt: persisted.startedAt ?? Date.now(),
189
207
  totalCost: persisted.totalCost ?? 0,
@@ -84,6 +84,7 @@ export function getSession(key) {
84
84
  _qHandle: null,
85
85
  _steerChannel: null,
86
86
  _steerAckSentThisTurn: false,
87
+ _turnId: null,
87
88
  lastActivity: Date.now(),
88
89
  startedAt: Date.now(),
89
90
  totalCost: 0,
@@ -120,6 +121,8 @@ export function getSession(key) {
120
121
  session._steerChannel = null;
121
122
  if (session._steerAckSentThisTurn === undefined)
122
123
  session._steerAckSentThisTurn = false;
124
+ if (session._turnId === undefined)
125
+ session._turnId = null;
123
126
  }
124
127
  return session;
125
128
  }
@@ -0,0 +1,162 @@
1
+ /**
2
+ * SSRF Guard — rejects requests to private/internal network destinations.
3
+ *
4
+ * Blocks:
5
+ * - Non-http(s) schemes (file://, ftp://, etc.)
6
+ * - IPv4 loopback (127.0.0.0/8), link-local (169.254.0.0/16 — incl. cloud
7
+ * metadata endpoint 169.254.169.254), RFC-1918 private ranges
8
+ * (10/8, 172.16/12, 192.168/16), and the catch-all 0.0.0.0
9
+ * - IPv6 loopback (::1), ULA (fc00::/7), link-local (fe80::/10)
10
+ *
11
+ * Opt-out: set ALLOW_PRIVATE_FETCH=1 to disable blocking (for local dev /
12
+ * self-hosted setups). Default = blocked.
13
+ *
14
+ * No new runtime dependencies — uses Node's built-in `dns` and `net` modules.
15
+ */
16
+ import dns from "dns";
17
+ import net from "net";
18
+ export class SsrfBlockedError extends Error {
19
+ url;
20
+ constructor(url, reason) {
21
+ super(`SSRF blocked: ${reason} (url: ${url})`);
22
+ this.name = "SsrfBlockedError";
23
+ this.url = url;
24
+ }
25
+ }
26
+ /**
27
+ * Return true if the given IPv4 address string falls into a private/reserved
28
+ * range (RFC-1918, loopback, link-local, unspecified).
29
+ *
30
+ * Ranges blocked:
31
+ * 0.0.0.0/8 — "this" network
32
+ * 10.0.0.0/8 — RFC-1918 class A
33
+ * 127.0.0.0/8 — loopback
34
+ * 169.254.0.0/16 — link-local (incl. IMDS 169.254.169.254)
35
+ * 172.16.0.0/12 — RFC-1918 class B (172.16–172.31)
36
+ * 192.168.0.0/16 — RFC-1918 class C
37
+ */
38
+ function isPrivateIPv4(ip) {
39
+ const parts = ip.split(".").map(Number);
40
+ if (parts.length !== 4 || parts.some(p => isNaN(p) || p < 0 || p > 255)) {
41
+ return false; // not a valid IPv4 — let DNS resolve decide
42
+ }
43
+ const [a, b] = parts;
44
+ if (a === 0)
45
+ return true; // 0.0.0.0/8
46
+ if (a === 10)
47
+ return true; // 10.0.0.0/8
48
+ if (a === 127)
49
+ return true; // 127.0.0.0/8 loopback
50
+ if (a === 169 && b === 254)
51
+ return true; // 169.254.0.0/16 link-local
52
+ if (a === 172 && b >= 16 && b <= 31)
53
+ return true; // 172.16.0.0/12
54
+ if (a === 192 && b === 168)
55
+ return true; // 192.168.0.0/16
56
+ return false;
57
+ }
58
+ /**
59
+ * Return true if the given IPv6 address string falls into a blocked range.
60
+ *
61
+ * Ranges blocked:
62
+ * ::1 — loopback
63
+ * fc00::/7 — ULA (fc00:: – fdff::)
64
+ * fe80::/10 — link-local
65
+ */
66
+ function isPrivateIPv6(ip) {
67
+ // Node net.isIPv6 normalises the address but we need the raw string
68
+ // for prefix checks. Use the compressed form from net.
69
+ const normalized = ip.toLowerCase().replace(/^\[|\]$/g, "");
70
+ // Loopback
71
+ if (normalized === "::1")
72
+ return true;
73
+ // For prefix checks, expand just the first 16-bit group
74
+ const firstGroup = normalized.split(":")[0];
75
+ const value = parseInt(firstGroup || "0", 16);
76
+ // fc00::/7 — fc00 to fdff (bit 7 of first byte = 1, bit 6 = 1, bit 5 doesn't matter… easier: 0xfc00–0xfdff range for the first 16 bits)
77
+ // The /7 prefix means: binary prefix 1111110x — so 0xfc00 to 0xfdff
78
+ if (value >= 0xfc00 && value <= 0xfdff)
79
+ return true;
80
+ // fe80::/10 — fe80 to febf
81
+ if (value >= 0xfe80 && value <= 0xfebf)
82
+ return true;
83
+ return false;
84
+ }
85
+ /**
86
+ * Check whether a raw IP string (v4 or v6) is a private/internal address.
87
+ */
88
+ function isPrivateIP(ip) {
89
+ const stripped = ip.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
90
+ if (net.isIPv4(stripped))
91
+ return isPrivateIPv4(stripped);
92
+ if (net.isIPv6(stripped))
93
+ return isPrivateIPv6(stripped);
94
+ return false;
95
+ }
96
+ /**
97
+ * Check the hostname from a URL — if it's a literal IP, classify immediately.
98
+ * If it's a hostname, resolve it via DNS and check every returned address.
99
+ *
100
+ * Throws SsrfBlockedError if any resolved address is private.
101
+ * Resolves void if all addresses are public (or DNS fails — fail-open on DNS
102
+ * errors so a temporary resolver blip doesn't DoS the bot; the risk is low
103
+ * because the per-host literal-IP checks run before DNS).
104
+ */
105
+ async function checkHost(hostname, url) {
106
+ const bare = hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets for net.isIP
107
+ // Literal IP — no DNS needed
108
+ if (net.isIP(bare)) {
109
+ if (isPrivateIP(bare)) {
110
+ throw new SsrfBlockedError(url, `destination ${bare} is a private/loopback/link-local address`);
111
+ }
112
+ return;
113
+ }
114
+ // Named hostname — also catch obvious loopback hostnames without DNS
115
+ const lower = bare.toLowerCase();
116
+ if (lower === "localhost" || lower.endsWith(".localhost") || lower === "::1") {
117
+ throw new SsrfBlockedError(url, `destination hostname '${bare}' resolves to loopback`);
118
+ }
119
+ // DNS resolution — check every returned address
120
+ try {
121
+ const { promises: dnsPromises } = dns;
122
+ const addresses = await dnsPromises.resolve(bare);
123
+ for (const addr of addresses) {
124
+ if (isPrivateIP(addr)) {
125
+ throw new SsrfBlockedError(url, `destination hostname '${bare}' resolves to private address ${addr}`);
126
+ }
127
+ }
128
+ }
129
+ catch (err) {
130
+ // Re-throw our own error; swallow DNS failures (fail-open)
131
+ if (err instanceof SsrfBlockedError)
132
+ throw err;
133
+ // DNS error (ENOTFOUND, etc.) — let the actual fetch fail naturally
134
+ }
135
+ }
136
+ /**
137
+ * Assert that `url` is safe to fetch (not SSRF-risky).
138
+ *
139
+ * - Rejects non-http(s) schemes immediately (synchronous check).
140
+ * - Resolves hostnames to detect private IP destinations.
141
+ * - Respects ALLOW_PRIVATE_FETCH=1 for operator opt-out.
142
+ *
143
+ * Throws SsrfBlockedError when blocked.
144
+ * Resolves void when safe.
145
+ */
146
+ export async function assertSsrfSafe(url) {
147
+ // Opt-out for trusted local / self-hosted environments
148
+ if (process.env.ALLOW_PRIVATE_FETCH === "1")
149
+ return;
150
+ let parsed;
151
+ try {
152
+ parsed = new URL(url);
153
+ }
154
+ catch {
155
+ throw new SsrfBlockedError(url, "invalid URL");
156
+ }
157
+ const scheme = parsed.protocol; // includes trailing ':'
158
+ if (scheme !== "http:" && scheme !== "https:") {
159
+ throw new SsrfBlockedError(url, `scheme '${parsed.protocol}' is not allowed (only http/https)`);
160
+ }
161
+ await checkHost(parsed.hostname, url);
162
+ }
@@ -7,17 +7,22 @@ export class SteerChannel {
7
7
  constructor(cap = DEFAULT_CAP) {
8
8
  this.cap = cap;
9
9
  }
10
+ /** Push a message into the channel.
11
+ * Returns true if the message was accepted, false if it was dropped
12
+ * (channel closed or buffer cap reached). Callers must check the
13
+ * return value to decide whether to send a 📨 ack or a bufferFull notice. */
10
14
  push(text) {
11
15
  if (this.closed)
12
- return;
16
+ return false;
13
17
  if (this.buf.length >= this.cap) {
14
18
  console.warn(`[steer-channel] cap ${this.cap} reached — dropping steer message`);
15
- return;
19
+ return false;
16
20
  }
17
21
  this.buf.push({ type: "user", message: { role: "user", content: text }, parent_tool_use_id: null });
18
22
  const r = this.resolveNext;
19
23
  this.resolveNext = null;
20
24
  r?.();
25
+ return true;
21
26
  }
22
27
  close() {
23
28
  if (this.closed)
@@ -21,9 +21,6 @@ export async function transcribeAudio(audioPath) {
21
21
  const bodyEnd = Buffer.from(`\r\n--${boundary}\r\n` +
22
22
  `Content-Disposition: form-data; name="model"\r\n\r\n` +
23
23
  `whisper-large-v3-turbo\r\n` +
24
- `--${boundary}\r\n` +
25
- `Content-Disposition: form-data; name="language"\r\n\r\n` +
26
- `de\r\n` +
27
24
  `--${boundary}--\r\n`, "utf-8");
28
25
  const fullBody = Buffer.concat([bodyStart, fileBuffer, bodyEnd]);
29
26
  return new Promise((resolve, reject) => {