alvin-bot 5.2.0 → 5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,8 +9,10 @@
9
9
  */
10
10
  import { execSync } from "child_process";
11
11
  import fs from "fs";
12
- import { resolve } from "path";
12
+ import os from "os";
13
+ import { resolve, join as pathJoin } from "path";
13
14
  import { isSelfRestartCommand, scheduleGracefulRestart } from "../services/restart.js";
15
+ import { checkExecAllowed } from "../services/exec-guard.js";
14
16
  // ── Tool Definitions (OpenAI function calling format) ───────────────────────
15
17
  export const AGENT_TOOLS = [
16
18
  {
@@ -227,7 +229,18 @@ function executeShell(command, cwd) {
227
229
  scheduleGracefulRestart();
228
230
  return { name: "run_shell", result: "Bot restart scheduled. Grammy will commit the Telegram offset before exiting." };
229
231
  }
230
- // Security: block obviously dangerous commands
232
+ // Exec-guard: enforce EXEC_SECURITY on this non-SDK provider path.
233
+ // checkExecAllowed reads config.execSecurity (deny → reject all;
234
+ // allowlist → reject metachars + non-allowlisted bins; full → pass).
235
+ const guardResult = checkExecAllowed(command);
236
+ if (!guardResult.allowed) {
237
+ return {
238
+ name: "run_shell",
239
+ result: `Command not allowed: ${guardResult.reason ?? "exec execution denied"}`,
240
+ error: true,
241
+ };
242
+ }
243
+ // Security: block obviously dangerous commands (belt-and-suspenders)
231
244
  const blocked = ["rm -rf /", "mkfs", "dd if=/dev/zero", "> /dev/sda"];
232
245
  if (blocked.some(b => command.includes(b))) {
233
246
  return { name: "run_shell", result: "Command blocked for safety.", error: true };
@@ -395,9 +408,21 @@ function executeListDirectory(dirPath, recursive, cwd) {
395
408
  }
396
409
  }
397
410
  function executePython(code, cwd) {
411
+ // Exec-guard: enforce EXEC_SECURITY before writing or executing anything.
412
+ // Use "python3" as the representative binary — deny blocks all execution;
413
+ // allowlist allows python3 (it is in SAFE_BINS) unless globally denied.
414
+ const guardResult = checkExecAllowed("python3");
415
+ if (!guardResult.allowed) {
416
+ return {
417
+ name: "python_execute",
418
+ result: `Python execution not allowed: ${guardResult.reason ?? "exec execution denied"}`,
419
+ error: true,
420
+ };
421
+ }
398
422
  try {
399
- // Write code to temp file to avoid shell escaping issues
400
- const tmpFile = `/tmp/alvin-bot-py-${Date.now()}.py`;
423
+ // Write code to temp file to avoid shell escaping issues.
424
+ // os.tmpdir() is cross-platform (works on Windows/Linux/macOS).
425
+ const tmpFile = pathJoin(os.tmpdir(), `alvin-bot-py-${Date.now()}.py`);
401
426
  fs.writeFileSync(tmpFile, code);
402
427
  try {
403
428
  const output = execSync(`python3 "${tmpFile}"`, {
@@ -62,6 +62,13 @@ function getMissingFileFailureMs() {
62
62
  const pending = new Map();
63
63
  let pollTimer = null;
64
64
  let started = false;
65
+ /**
66
+ * C-M2 — Set of agent IDs registered in THIS boot (not loaded from disk).
67
+ * Only in-memory-registered agents have a pid we can safely attribute to
68
+ * our own subprocess — disk-loaded pids may have been reused by the OS
69
+ * after a restart. We never kill a disk-loaded pid; only pids in this set.
70
+ */
71
+ const thisBootAgentIds = new Set();
65
72
  /**
66
73
  * Hard cap on the pending-agents map. Without this, a bot that runs many
67
74
  * async agents but sees some fail to write their outputFile would see
@@ -135,6 +142,9 @@ export function registerPendingAgent(input) {
135
142
  };
136
143
  enforcePendingCap();
137
144
  pending.set(input.agentId, entry);
145
+ // C-M2: mark this agent as registered in the current boot.
146
+ // Only this-boot agents have pids we can safely attribute to our own subprocess.
147
+ thisBootAgentIds.add(input.agentId);
138
148
  saveToDisk();
139
149
  }
140
150
  /**
@@ -295,11 +305,32 @@ async function deliverAsFailure(entry, status, error) {
295
305
  *
296
306
  * Never throws — all per-entry errors are swallowed.
297
307
  */
298
- export function killSessionDetachedAgents(session, killFn = (p) => {
308
+ /**
309
+ * C-M1 — Compute the signal target for a detached subprocess pid.
310
+ *
311
+ * Since agents are spawned `detached:true` they become process-group
312
+ * leaders. `claude -p` typically forks further (sub-agents), leaving
313
+ * grandchildren in the same group. Signalling only the group-leader PID
314
+ * lets those grandchildren survive. Instead, we signal the entire group
315
+ * by negating the pid (POSIX: kill(-pgid, sig) = signal the group).
316
+ *
317
+ * Windows does not support negative-pid group signals; on win32 we fall
318
+ * back to the positive pid (signals the leader only). A full win32 group-
319
+ * kill would require `taskkill /T /PID` — that can be layered later if
320
+ * Windows support becomes important.
321
+ *
322
+ * The injectable `killFn` always receives the already-transformed value
323
+ * (negative on POSIX, positive on win32) so tests can assert the correct
324
+ * target without needing platform-specific logic in test code.
325
+ */
326
+ function resolveKillTarget(pid) {
327
+ return process.platform !== "win32" ? -pid : pid;
328
+ }
329
+ export function killSessionDetachedAgents(session, killFn = (target) => {
299
330
  try {
300
- process.kill(p, "SIGTERM");
331
+ process.kill(target, "SIGTERM");
301
332
  }
302
- catch { /* already gone */ }
333
+ catch { /* already gone — ESRCH is fine */ }
303
334
  }) {
304
335
  // Use session.sessionKey — the real canonical key stamped by getSession().
305
336
  // Before v5.1.x this field did not exist on UserSession, causing a silent
@@ -310,12 +341,24 @@ export function killSessionDetachedAgents(session, killFn = (p) => {
310
341
  for (const entry of pending.values()) {
311
342
  if (entry.sessionKey !== key)
312
343
  continue;
313
- if (typeof entry.pid === "number") {
314
- try {
315
- killFn(entry.pid);
316
- }
317
- catch { /* best-effort */ }
344
+ if (typeof entry.pid !== "number")
345
+ continue;
346
+ // C-M2: only kill pids that are attributable to our own subprocess.
347
+ // Pids loaded from disk on a previous boot may have been reused by
348
+ // the OS for an unrelated process. We guard by only killing agents
349
+ // registered in THIS boot (thisBootAgentIds). Disk-loaded entries
350
+ // (those not in the set) are skipped — their subprocess may have
351
+ // already exited and the pid may point at an innocent process.
352
+ if (!thisBootAgentIds.has(entry.agentId)) {
353
+ console.log(`[async-watcher] skipping kill for disk-loaded agent ${entry.agentId} ` +
354
+ `(pid=${entry.pid}) — cannot safely attribute pid after restart`);
355
+ continue;
356
+ }
357
+ // C-M1: pass the group-kill target (negative pid on POSIX) to killFn.
358
+ try {
359
+ killFn(resolveKillTarget(entry.pid));
318
360
  }
361
+ catch { /* best-effort */ }
319
362
  }
320
363
  }
321
364
  /**
@@ -345,6 +388,7 @@ export function cancelPendingForSession(sessionKey) {
345
388
  /** Test-only: drop in-memory state. Doesn't touch disk. */
346
389
  export function __resetForTest() {
347
390
  pending.clear();
391
+ thisBootAgentIds.clear();
348
392
  if (pollTimer)
349
393
  clearInterval(pollTimer);
350
394
  pollTimer = null;
@@ -10,7 +10,7 @@
10
10
  * If a strategy is unavailable, we automatically cascade to the next one
11
11
  * and log a warning so failures are visible, not silent.
12
12
  */
13
- import { execSync, spawn } from "child_process";
13
+ import { execSync, execFileSync, spawn } from "child_process";
14
14
  import http from "http";
15
15
  import fs from "fs";
16
16
  import { config } from "../config.js";
@@ -22,7 +22,7 @@ const CDP_PORT = 9222;
22
22
  const EXEC_TIMEOUT = 60_000; // 60s for page loads via shell
23
23
  // ── Logging ──────────────────────────────────────────────────────────
24
24
  function log(msg) {
25
- console.warn(`[browser-manager] ${msg}`);
25
+ console.log(`[browser-manager] ${msg}`);
26
26
  }
27
27
  // ── Availability Checks ──────────────────────────────────────────────
28
28
  function isGatewayScriptPresent() {
@@ -170,9 +170,11 @@ export async function resolveStrategy(preferred) {
170
170
  }
171
171
  return "cli";
172
172
  }
173
- function execHub(args) {
173
+ function execHub(argv) {
174
174
  try {
175
- const result = execSync(`"${HUB_BROWSER_SH}" ${args}`, {
175
+ // H3: use execFileSync with discrete argv array — no shell interpolation,
176
+ // so attacker-controlled URLs cannot inject shell metacharacters.
177
+ const result = execFileSync(HUB_BROWSER_SH, argv, {
176
178
  stdio: "pipe",
177
179
  timeout: EXEC_TIMEOUT,
178
180
  env: { ...process.env, PATH: process.env.PATH },
@@ -310,7 +312,7 @@ async function navigateOne(strategy, url) {
310
312
  case "cdp": {
311
313
  // Try hub CDP first
312
314
  if (isHubBrowserAvailable()) {
313
- const result = execHub(`cdp goto "${url}"`);
315
+ const result = execHub(["cdp", "goto", url]);
314
316
  if (result && !result.error) {
315
317
  return { title: result.title || "", url: result.url || url };
316
318
  }
@@ -329,7 +331,7 @@ async function navigateOne(strategy, url) {
329
331
  log(`Direct CDP failed: ${err.message}`);
330
332
  // Last resort: try stealth
331
333
  if (isHubBrowserAvailable()) {
332
- const stealthResult = execHub(`stealth "${url}"`);
334
+ const stealthResult = execHub(["stealth", url]);
333
335
  if (stealthResult) {
334
336
  return { title: stealthResult.title || "", url: stealthResult.url || url };
335
337
  }
@@ -338,7 +340,7 @@ async function navigateOne(strategy, url) {
338
340
  }
339
341
  }
340
342
  case "hub-stealth": {
341
- const result = execHub(`stealth "${url}"`);
343
+ const result = execHub(["stealth", url]);
342
344
  if (result && !result.error) {
343
345
  return { title: result.title || "", url: result.url || url };
344
346
  }
@@ -369,7 +371,7 @@ export async function screenshot(url, options = {}) {
369
371
  case "cdp": {
370
372
  if (isHubBrowserAvailable()) {
371
373
  const tmpName = `shot_${Date.now()}.png`;
372
- const result = execHub(`cdp shot "${url}" ${tmpName}`);
374
+ const result = execHub(["cdp", "shot", url, tmpName]);
373
375
  if (result?.screenshot)
374
376
  return result.screenshot;
375
377
  }
@@ -378,7 +380,7 @@ export async function screenshot(url, options = {}) {
378
380
  }
379
381
  case "hub-stealth": {
380
382
  const tmpName = `shot_${Date.now()}.png`;
381
- const result = execHub(`stealth "${url}" --screenshot=${tmpName}`);
383
+ const result = execHub(["stealth", url, `--screenshot=${tmpName}`]);
382
384
  if (result?.screenshot)
383
385
  return result.screenshot;
384
386
  // Fallback
@@ -11,8 +11,18 @@
11
11
  * See browser-manager.ts for the full cascade; this module is the
12
12
  * leaf-level primitive with no dependencies on that file so both can
13
13
  * be unit-tested in isolation.
14
+ *
15
+ * SSRF hardening (M1): assertSsrfSafe() is called before every fetch hop to
16
+ * reject loopback / link-local / RFC-1918 / metadata / non-http(s)
17
+ * destinations. Redirects are followed manually (redirect:"manual") so every
18
+ * hop's Location header is re-validated before following — a public host that
19
+ * returns 302 → 169.254.169.254 is therefore blocked. Redirects are capped at
20
+ * 10 hops; an operator who needs redirect-to-internal can set
21
+ * ALLOW_PRIVATE_FETCH=1.
14
22
  */
23
+ import { assertSsrfSafe, SsrfBlockedError } from "./ssrf-guard.js";
15
24
  const DEFAULT_TIMEOUT_MS = 15_000;
25
+ const MAX_REDIRECTS = 10;
16
26
  const DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_0) AppleWebKit/605.1.15 " +
17
27
  "(KHTML, like Gecko) Version/17.0 Safari/605.1.15 AlvinBot/webfetch";
18
28
  export class WebfetchFailed extends Error {
@@ -53,24 +63,48 @@ export function parseTitle(html) {
53
63
  return decodeEntities(inner);
54
64
  }
55
65
  export async function webfetchNavigate(url, options = {}) {
66
+ // M1: SSRF guard — reject private/internal destinations before fetching.
67
+ // SsrfBlockedError is intentionally not wrapped in WebfetchFailed so
68
+ // callers can distinguish "blocked by policy" from "server error".
69
+ // We validate EVERY redirect hop manually (redirect:"manual") so a
70
+ // public host cannot 302 us into an internal address.
71
+ await assertSsrfSafe(url);
56
72
  const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
57
73
  const controller = new AbortController();
58
74
  const timer = setTimeout(() => controller.abort(), timeoutMs);
59
75
  try {
76
+ let currentUrl = url;
60
77
  let response;
61
- try {
62
- response = await fetch(url, {
63
- method: "GET",
64
- headers: {
65
- "User-Agent": options.userAgent ?? DEFAULT_USER_AGENT,
66
- Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
67
- },
68
- redirect: "follow",
69
- signal: controller.signal,
70
- });
71
- }
72
- catch (err) {
73
- throw new WebfetchFailed(url, err.message, { cause: err });
78
+ for (let hop = 0;; hop++) {
79
+ try {
80
+ response = await fetch(currentUrl, {
81
+ method: "GET",
82
+ headers: {
83
+ "User-Agent": options.userAgent ?? DEFAULT_USER_AGENT,
84
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
85
+ },
86
+ redirect: "manual",
87
+ signal: controller.signal,
88
+ });
89
+ }
90
+ catch (err) {
91
+ throw new WebfetchFailed(url, err.message, { cause: err });
92
+ }
93
+ // Not a redirect — we have the final response
94
+ if (response.status < 300 || response.status >= 400)
95
+ break;
96
+ const loc = response.headers.get("location");
97
+ if (!loc)
98
+ break; // no Location header — treat as final response
99
+ if (hop >= MAX_REDIRECTS) {
100
+ throw new SsrfBlockedError(url, `too many redirects (> ${MAX_REDIRECTS})`);
101
+ }
102
+ const next = new URL(loc, currentUrl).href;
103
+ // Re-validate each redirect target before following — closes the
104
+ // post-redirect SSRF bypass where fetch would silently follow a
105
+ // 302 pointing at 169.254.169.254 / loopback / RFC-1918.
106
+ await assertSsrfSafe(next);
107
+ currentUrl = next;
74
108
  }
75
109
  if (!response.ok) {
76
110
  throw new WebfetchFailed(url, `HTTP ${response.status}`, { status: response.status });
@@ -29,34 +29,94 @@ function parseInterval(input) {
29
29
  };
30
30
  return value * (mult[unit] || 60_000);
31
31
  }
32
- function parseField(expr, min, max) {
33
- if (expr === "*")
34
- return Array.from({ length: max - min + 1 }, (_, i) => i + min);
35
- if (expr.includes("/")) {
36
- const [, step] = expr.split("/");
37
- const s = parseInt(step);
38
- return Array.from({ length: max - min + 1 }, (_, i) => i + min).filter((v) => v % s === 0);
32
+ /**
33
+ * Parse a single cron field token (no commas — commas are handled by parseField).
34
+ * Supports: `*`, `a`, `a-b`, `a/s`, `a-b/s`, `*\/s`.
35
+ * Returns an array of valid integers in [min,max], or null if the token is invalid/garbage.
36
+ */
37
+ function parseFieldToken(token, min, max) {
38
+ const fullRange = () => Array.from({ length: max - min + 1 }, (_, i) => i + min);
39
+ if (token.includes("/")) {
40
+ const slashIdx = token.indexOf("/");
41
+ const basePart = token.slice(0, slashIdx);
42
+ const stepPart = token.slice(slashIdx + 1);
43
+ const step = parseInt(stepPart, 10);
44
+ if (!Number.isFinite(step) || step <= 0)
45
+ return null;
46
+ let base;
47
+ if (basePart === "*") {
48
+ base = fullRange();
49
+ }
50
+ else if (basePart.includes("-")) {
51
+ const [aPart, bPart] = basePart.split("-");
52
+ const a = parseInt(aPart, 10);
53
+ const b = parseInt(bPart, 10);
54
+ if (!Number.isFinite(a) || !Number.isFinite(b) || a > b || a < min || b > max)
55
+ return null;
56
+ base = Array.from({ length: b - a + 1 }, (_, i) => i + a);
57
+ }
58
+ else {
59
+ const a = parseInt(basePart, 10);
60
+ if (!Number.isFinite(a) || a < min || a > max)
61
+ return null;
62
+ base = [a];
63
+ }
64
+ // Filter by step aligned to base start
65
+ const baseStart = base[0];
66
+ return base.filter((v) => (v - baseStart) % step === 0);
39
67
  }
40
- if (expr.includes(","))
41
- return expr.split(",").map(Number);
42
- if (expr.includes("-")) {
43
- const [a, b] = expr.split("-").map(Number);
68
+ if (token === "*")
69
+ return fullRange();
70
+ if (token.includes("-")) {
71
+ const parts = token.split("-");
72
+ if (parts.length !== 2)
73
+ return null;
74
+ const a = parseInt(parts[0], 10);
75
+ const b = parseInt(parts[1], 10);
76
+ if (!Number.isFinite(a) || !Number.isFinite(b) || a > b || a < min || b > max)
77
+ return null;
44
78
  return Array.from({ length: b - a + 1 }, (_, i) => i + a);
45
79
  }
46
- return [parseInt(expr)];
80
+ const v = parseInt(token, 10);
81
+ if (!Number.isFinite(v) || v < min || v > max)
82
+ return null;
83
+ return [v];
84
+ }
85
+ /**
86
+ * Parse a cron field expression (may contain commas) into a sorted array of valid integers.
87
+ * Supports comma-separated combinations of: `*`, `a`, `a-b`, `a-b/s`, `*\/s`.
88
+ * Returns null if any token is invalid/garbage (signals an invalid schedule).
89
+ */
90
+ function parseField(expr, min, max) {
91
+ // Split on commas; filter empty strings (handles "1,,3" gracefully — skip empty)
92
+ const tokens = expr.split(",").filter((t) => t.length > 0);
93
+ if (tokens.length === 0)
94
+ return null;
95
+ const result = new Set();
96
+ for (const token of tokens) {
97
+ const vals = parseFieldToken(token, min, max);
98
+ if (vals === null)
99
+ return null; // propagate invalid token as parse failure
100
+ for (const v of vals)
101
+ result.add(v);
102
+ }
103
+ const arr = [...result].sort((a, b) => a - b);
104
+ return arr.length > 0 ? arr : null;
47
105
  }
48
106
  function parseCronFields(expression) {
49
107
  const parts = expression.trim().split(/\s+/);
50
108
  if (parts.length !== 5)
51
109
  return null;
52
110
  const [minExpr, hourExpr, dayExpr, monthExpr, weekdayExpr] = parts;
53
- return {
54
- minutes: parseField(minExpr, 0, 59),
55
- hours: parseField(hourExpr, 0, 23),
56
- days: parseField(dayExpr, 1, 31),
57
- months: parseField(monthExpr, 1, 12),
58
- weekdays: parseField(weekdayExpr, 0, 6),
59
- };
111
+ const minutes = parseField(minExpr, 0, 59);
112
+ const hours = parseField(hourExpr, 0, 23);
113
+ const days = parseField(dayExpr, 1, 31);
114
+ const months = parseField(monthExpr, 1, 12);
115
+ const weekdays = parseField(weekdayExpr, 0, 6);
116
+ // Any field returning null means the expression is invalid → reject it
117
+ if (!minutes || !hours || !days || !months || !weekdays)
118
+ return null;
119
+ return { minutes, hours, days, months, weekdays };
60
120
  }
61
121
  function nextCronRun(expression, after) {
62
122
  const fields = parseCronFields(expression);