alvin-bot 5.3.0 → 5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +100 -0
- package/CHANGELOG.md +68 -3
- package/README.md +2 -0
- package/alvin-bot.config.example.json +1 -1
- package/dist/config.js +7 -4
- package/dist/handlers/commands.js +10 -2
- package/dist/handlers/document.js +8 -1
- package/dist/handlers/message.js +173 -30
- package/dist/i18n.js +21 -0
- package/dist/index.js +19 -1
- package/dist/init-data-dir.js +17 -0
- package/dist/middleware/auth.js +19 -1
- package/dist/providers/tool-executor.js +29 -4
- package/dist/services/async-agent-watcher.js +105 -14
- package/dist/services/browser-manager.js +11 -9
- package/dist/services/browser-webfetch.js +47 -13
- package/dist/services/cron-scheduling.js +79 -19
- package/dist/services/cron.js +205 -16
- package/dist/services/delivery-queue.js +19 -0
- package/dist/services/embeddings/index.js +2 -5
- package/dist/services/env-file.js +4 -0
- package/dist/services/personality.js +40 -37
- package/dist/services/session-persistence.js +21 -3
- package/dist/services/session.js +3 -0
- package/dist/services/ssrf-guard.js +162 -0
- package/dist/services/steer-channel.js +7 -2
- package/dist/services/subagent-delivery.js +31 -8
- package/dist/services/telegram.js +9 -0
- package/dist/services/trends.js +202 -2
- package/dist/services/voice.js +0 -3
- package/dist/web/server.js +155 -5
- package/package.json +8 -7
package/dist/init-data-dir.js
CHANGED
|
@@ -9,6 +9,12 @@ import { DATA_DIR, MEMORY_DIR, USERS_DIR, RUNTIME_DIR, WHATSAPP_AUTH, BACKUP_DIR
|
|
|
9
9
|
/**
|
|
10
10
|
* Create the directory structure only (no file seeding).
|
|
11
11
|
* Must run BEFORE migration so directories exist for copying.
|
|
12
|
+
*
|
|
13
|
+
* M5: DATA_DIR is created with mode 0700 (owner-only traverse) so that
|
|
14
|
+
* even before the per-file chmod audit runs, any file written by the bot
|
|
15
|
+
* is not accessible by other users on multi-user systems. On Windows,
|
|
16
|
+
* chmod is a no-op — we skip it silently to avoid alarming log output,
|
|
17
|
+
* mirroring how the file-permissions audit handles win32.
|
|
12
18
|
*/
|
|
13
19
|
export function ensureDataDirs() {
|
|
14
20
|
const dirs = [
|
|
@@ -27,6 +33,17 @@ export function ensureDataDirs() {
|
|
|
27
33
|
fs.mkdirSync(dir, { recursive: true });
|
|
28
34
|
}
|
|
29
35
|
}
|
|
36
|
+
// M5: Ensure the DATA_DIR itself is 0700 (owner-only). New dirs are
|
|
37
|
+
// created without an explicit mode above (inherits umask), so we chmod
|
|
38
|
+
// after creation. Windows doesn't support POSIX modes — skip silently.
|
|
39
|
+
if (process.platform !== "win32") {
|
|
40
|
+
try {
|
|
41
|
+
fs.chmodSync(DATA_DIR, 0o700);
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
// Best-effort — some network filesystems may not support chmod
|
|
45
|
+
}
|
|
46
|
+
}
|
|
30
47
|
}
|
|
31
48
|
/**
|
|
32
49
|
* Seed default files for a fresh install (only if they don't exist yet).
|
package/dist/middleware/auth.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import fs from "fs";
|
|
2
|
+
import crypto from "crypto";
|
|
2
3
|
import { InlineKeyboard } from "grammy";
|
|
3
4
|
import { config } from "../config.js";
|
|
4
5
|
import { APPROVED_USERS_FILE } from "../paths.js";
|
|
@@ -43,7 +44,7 @@ export function isApprovedUser(userId) {
|
|
|
43
44
|
const MAX_PENDING = 3;
|
|
44
45
|
const pendingPairings = new Map(); // code → pairing
|
|
45
46
|
function generateCode() {
|
|
46
|
-
return String(
|
|
47
|
+
return String(crypto.randomInt(100000, 1000000));
|
|
47
48
|
}
|
|
48
49
|
function cleanExpired() {
|
|
49
50
|
const now = Date.now();
|
|
@@ -211,5 +212,22 @@ export async function authMiddleware(ctx, next) {
|
|
|
211
212
|
return;
|
|
212
213
|
}
|
|
213
214
|
// ── Callback queries (inline keyboards) ─────────
|
|
215
|
+
// Only allowedUsers may trigger admin action callbacks (approve/deny).
|
|
216
|
+
// Other callbacks (e.g. pairing-mode approved users) continue through.
|
|
217
|
+
if (userId && config.allowedUsers.includes(userId)) {
|
|
218
|
+
await next();
|
|
219
|
+
return;
|
|
220
|
+
}
|
|
221
|
+
// Unknown users: silently drop admin-action callbacks to prevent
|
|
222
|
+
// approval forgery / self-approval. Non-admin callbacks from pairing-
|
|
223
|
+
// approved users in "pairing" mode are also gated here intentionally;
|
|
224
|
+
// the approve flow is an admin-only action.
|
|
225
|
+
const callbackData = ctx.callbackQuery?.data || "";
|
|
226
|
+
const isAdminCallback = /^(pair|access|wa):(approve|deny|block):/.test(callbackData);
|
|
227
|
+
if (isAdminCallback) {
|
|
228
|
+
// Silently drop — no answer (grammy will time-out the spinner client-side)
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
// Non-admin callbacks from unknown users: pass through (e.g. inline mode)
|
|
214
232
|
await next();
|
|
215
233
|
}
|
|
@@ -9,8 +9,10 @@
|
|
|
9
9
|
*/
|
|
10
10
|
import { execSync } from "child_process";
|
|
11
11
|
import fs from "fs";
|
|
12
|
-
import
|
|
12
|
+
import os from "os";
|
|
13
|
+
import { resolve, join as pathJoin } from "path";
|
|
13
14
|
import { isSelfRestartCommand, scheduleGracefulRestart } from "../services/restart.js";
|
|
15
|
+
import { checkExecAllowed } from "../services/exec-guard.js";
|
|
14
16
|
// ── Tool Definitions (OpenAI function calling format) ───────────────────────
|
|
15
17
|
export const AGENT_TOOLS = [
|
|
16
18
|
{
|
|
@@ -227,7 +229,18 @@ function executeShell(command, cwd) {
|
|
|
227
229
|
scheduleGracefulRestart();
|
|
228
230
|
return { name: "run_shell", result: "Bot restart scheduled. Grammy will commit the Telegram offset before exiting." };
|
|
229
231
|
}
|
|
230
|
-
//
|
|
232
|
+
// Exec-guard: enforce EXEC_SECURITY on this non-SDK provider path.
|
|
233
|
+
// checkExecAllowed reads config.execSecurity (deny → reject all;
|
|
234
|
+
// allowlist → reject metachars + non-allowlisted bins; full → pass).
|
|
235
|
+
const guardResult = checkExecAllowed(command);
|
|
236
|
+
if (!guardResult.allowed) {
|
|
237
|
+
return {
|
|
238
|
+
name: "run_shell",
|
|
239
|
+
result: `Command not allowed: ${guardResult.reason ?? "exec execution denied"}`,
|
|
240
|
+
error: true,
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
// Security: block obviously dangerous commands (belt-and-suspenders)
|
|
231
244
|
const blocked = ["rm -rf /", "mkfs", "dd if=/dev/zero", "> /dev/sda"];
|
|
232
245
|
if (blocked.some(b => command.includes(b))) {
|
|
233
246
|
return { name: "run_shell", result: "Command blocked for safety.", error: true };
|
|
@@ -395,9 +408,21 @@ function executeListDirectory(dirPath, recursive, cwd) {
|
|
|
395
408
|
}
|
|
396
409
|
}
|
|
397
410
|
function executePython(code, cwd) {
|
|
411
|
+
// Exec-guard: enforce EXEC_SECURITY before writing or executing anything.
|
|
412
|
+
// Use "python3" as the representative binary — deny blocks all execution;
|
|
413
|
+
// allowlist allows python3 (it is in SAFE_BINS) unless globally denied.
|
|
414
|
+
const guardResult = checkExecAllowed("python3");
|
|
415
|
+
if (!guardResult.allowed) {
|
|
416
|
+
return {
|
|
417
|
+
name: "python_execute",
|
|
418
|
+
result: `Python execution not allowed: ${guardResult.reason ?? "exec execution denied"}`,
|
|
419
|
+
error: true,
|
|
420
|
+
};
|
|
421
|
+
}
|
|
398
422
|
try {
|
|
399
|
-
// Write code to temp file to avoid shell escaping issues
|
|
400
|
-
|
|
423
|
+
// Write code to temp file to avoid shell escaping issues.
|
|
424
|
+
// os.tmpdir() is cross-platform (works on Windows/Linux/macOS).
|
|
425
|
+
const tmpFile = pathJoin(os.tmpdir(), `alvin-bot-py-${Date.now()}.py`);
|
|
401
426
|
fs.writeFileSync(tmpFile, code);
|
|
402
427
|
try {
|
|
403
428
|
const output = execSync(`python3 "${tmpFile}"`, {
|
|
@@ -27,6 +27,25 @@ import { dirname } from "path";
|
|
|
27
27
|
import { parseOutputFileStatus } from "./async-agent-parser.js";
|
|
28
28
|
import { ASYNC_AGENTS_STATE_FILE } from "../paths.js";
|
|
29
29
|
import { getAllSessions } from "./session.js";
|
|
30
|
+
/**
|
|
31
|
+
* B3 — Detect a permanent "target chat does not exist" delivery failure
|
|
32
|
+
* (Telegram 400 "Bad Request: chat not found"), e.g. the stale chat_id:1
|
|
33
|
+
* test agent. Such an agent must be abandoned, not retried forever.
|
|
34
|
+
*
|
|
35
|
+
* Kept as a local predicate (mirrors isChatNotFoundError in
|
|
36
|
+
* subagent-delivery.ts) so the watcher does NOT take a new hard
|
|
37
|
+
* dependency on a fresh subagent-delivery export — many test suites mock
|
|
38
|
+
* that module with only deliverSubAgentResult, and a destructured import
|
|
39
|
+
* of a non-mocked symbol would throw. Matched narrowly on the
|
|
40
|
+
* chat-not-found signature only.
|
|
41
|
+
*/
|
|
42
|
+
function isChatNotFoundError(err) {
|
|
43
|
+
if (!err || typeof err !== "object")
|
|
44
|
+
return false;
|
|
45
|
+
const e = err;
|
|
46
|
+
const haystack = `${e.message ?? ""} ${e.description ?? ""}`;
|
|
47
|
+
return /chat not found/i.test(haystack);
|
|
48
|
+
}
|
|
30
49
|
/** How often the polling loop runs against each pending agent. */
|
|
31
50
|
const POLL_INTERVAL_MS = 15_000;
|
|
32
51
|
/** Hard ceiling per agent — 12h. After this, give up and deliver
|
|
@@ -62,6 +81,13 @@ function getMissingFileFailureMs() {
|
|
|
62
81
|
const pending = new Map();
|
|
63
82
|
let pollTimer = null;
|
|
64
83
|
let started = false;
|
|
84
|
+
/**
|
|
85
|
+
* C-M2 — Set of agent IDs registered in THIS boot (not loaded from disk).
|
|
86
|
+
* Only in-memory-registered agents have a pid we can safely attribute to
|
|
87
|
+
* our own subprocess — disk-loaded pids may have been reused by the OS
|
|
88
|
+
* after a restart. We never kill a disk-loaded pid; only pids in this set.
|
|
89
|
+
*/
|
|
90
|
+
const thisBootAgentIds = new Set();
|
|
65
91
|
/**
|
|
66
92
|
* Hard cap on the pending-agents map. Without this, a bot that runs many
|
|
67
93
|
* async agents but sees some fail to write their outputFile would see
|
|
@@ -135,6 +161,9 @@ export function registerPendingAgent(input) {
|
|
|
135
161
|
};
|
|
136
162
|
enforcePendingCap();
|
|
137
163
|
pending.set(input.agentId, entry);
|
|
164
|
+
// C-M2: mark this agent as registered in the current boot.
|
|
165
|
+
// Only this-boot agents have pids we can safely attribute to our own subprocess.
|
|
166
|
+
thisBootAgentIds.add(input.agentId);
|
|
138
167
|
saveToDisk();
|
|
139
168
|
}
|
|
140
169
|
/**
|
|
@@ -189,22 +218,38 @@ export async function pollOnce() {
|
|
|
189
218
|
const now = Date.now();
|
|
190
219
|
const toRemove = [];
|
|
191
220
|
const missingFileFailureMs = getMissingFileFailureMs();
|
|
221
|
+
// B3 — when a delivery attempt proves the target chat is permanently
|
|
222
|
+
// invalid ("chat not found", e.g. the stale chat_id:1 test agent),
|
|
223
|
+
// abandon the agent so the watcher never retries it. Without this, a
|
|
224
|
+
// pending agent with an invalid target spams stderr on every poll
|
|
225
|
+
// cycle (inflating errors_24h) and lingers until the 12h giveUpAt.
|
|
226
|
+
const abandonIfInvalidTarget = (entry, outcome) => {
|
|
227
|
+
if (!outcome.chatNotFound)
|
|
228
|
+
return;
|
|
229
|
+
if (!toRemove.includes(entry.agentId))
|
|
230
|
+
toRemove.push(entry.agentId);
|
|
231
|
+
console.warn(`[async-watcher] abandoning agent ${entry.agentId} — delivery target ` +
|
|
232
|
+
`chat ${String(entry.chatId)} not found (invalid/stale); will not retry`);
|
|
233
|
+
};
|
|
192
234
|
for (const entry of pending.values()) {
|
|
193
235
|
entry.lastCheckedAt = now;
|
|
194
236
|
// Timeout check first — if the agent is past its giveUpAt, give up
|
|
195
237
|
// regardless of whether the file shows progress.
|
|
196
238
|
if (now >= entry.giveUpAt) {
|
|
197
|
-
await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
|
|
239
|
+
const outcome = await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
|
|
240
|
+
abandonIfInvalidTarget(entry, outcome);
|
|
198
241
|
toRemove.push(entry.agentId);
|
|
199
242
|
continue;
|
|
200
243
|
}
|
|
201
244
|
const status = await parseOutputFileStatus(entry.outputFile);
|
|
202
245
|
if (status.state === "completed") {
|
|
203
|
-
await deliverAsCompleted(entry, status.output, status.tokensUsed);
|
|
246
|
+
const outcome = await deliverAsCompleted(entry, status.output, status.tokensUsed);
|
|
247
|
+
abandonIfInvalidTarget(entry, outcome);
|
|
204
248
|
toRemove.push(entry.agentId);
|
|
205
249
|
}
|
|
206
250
|
else if (status.state === "failed") {
|
|
207
|
-
await deliverAsFailure(entry, "error", status.error);
|
|
251
|
+
const outcome = await deliverAsFailure(entry, "error", status.error);
|
|
252
|
+
abandonIfInvalidTarget(entry, outcome);
|
|
208
253
|
toRemove.push(entry.agentId);
|
|
209
254
|
}
|
|
210
255
|
else if (status.state === "missing" &&
|
|
@@ -212,7 +257,8 @@ export async function pollOnce() {
|
|
|
212
257
|
// v4.14.2 — Zombie guard: the subprocess never created its
|
|
213
258
|
// output file within `missingFileFailureMs` (default 10 min).
|
|
214
259
|
// Declare failed instead of polling until the 12h giveUpAt.
|
|
215
|
-
await deliverAsFailure(entry, "error", `Dispatched subprocess never wrote its output file (${Math.round((now - entry.startedAt) / 60_000)}m after start). Likely crashed before initializing, or the file was removed externally.`);
|
|
260
|
+
const outcome = await deliverAsFailure(entry, "error", `Dispatched subprocess never wrote its output file (${Math.round((now - entry.startedAt) / 60_000)}m after start). Likely crashed before initializing, or the file was removed externally.`);
|
|
261
|
+
abandonIfInvalidTarget(entry, outcome);
|
|
216
262
|
toRemove.push(entry.agentId);
|
|
217
263
|
}
|
|
218
264
|
// running / missing-but-young → keep polling next cycle
|
|
@@ -244,13 +290,20 @@ async function deliverAsCompleted(entry, output, tokensUsed) {
|
|
|
244
290
|
tokensUsed: tokensUsed ?? { input: 0, output: 0 },
|
|
245
291
|
duration: Date.now() - entry.startedAt,
|
|
246
292
|
};
|
|
293
|
+
let chatNotFound = false;
|
|
247
294
|
try {
|
|
248
|
-
await deliverSubAgentResult(info, result);
|
|
295
|
+
const outcome = await deliverSubAgentResult(info, result);
|
|
296
|
+
chatNotFound = !!outcome?.chatNotFound;
|
|
249
297
|
}
|
|
250
298
|
catch (err) {
|
|
251
299
|
console.error(`[async-watcher] delivery failed for ${entry.agentId}:`, err);
|
|
300
|
+
// deliverSubAgentResult normally swallows send errors and reports
|
|
301
|
+
// chatNotFound via its return value; if it ever throws, still detect
|
|
302
|
+
// the permanent invalid-target case here.
|
|
303
|
+
chatNotFound = isChatNotFoundError(err);
|
|
252
304
|
}
|
|
253
305
|
decrementPendingCount(entry.sessionKey);
|
|
306
|
+
return { chatNotFound };
|
|
254
307
|
}
|
|
255
308
|
async function deliverAsFailure(entry, status, error) {
|
|
256
309
|
const { deliverSubAgentResult } = await import("./subagent-delivery.js");
|
|
@@ -273,13 +326,17 @@ async function deliverAsFailure(entry, status, error) {
|
|
|
273
326
|
duration: Date.now() - entry.startedAt,
|
|
274
327
|
error,
|
|
275
328
|
};
|
|
329
|
+
let chatNotFound = false;
|
|
276
330
|
try {
|
|
277
|
-
await deliverSubAgentResult(info, result);
|
|
331
|
+
const outcome = await deliverSubAgentResult(info, result);
|
|
332
|
+
chatNotFound = !!outcome?.chatNotFound;
|
|
278
333
|
}
|
|
279
334
|
catch (err) {
|
|
280
335
|
console.error(`[async-watcher] failure delivery failed for ${entry.agentId}:`, err);
|
|
336
|
+
chatNotFound = isChatNotFoundError(err);
|
|
281
337
|
}
|
|
282
338
|
decrementPendingCount(entry.sessionKey);
|
|
339
|
+
return { chatNotFound };
|
|
283
340
|
}
|
|
284
341
|
// ── Test helpers ──────────────────────────────────────────────────
|
|
285
342
|
/**
|
|
@@ -295,11 +352,32 @@ async function deliverAsFailure(entry, status, error) {
|
|
|
295
352
|
*
|
|
296
353
|
* Never throws — all per-entry errors are swallowed.
|
|
297
354
|
*/
|
|
298
|
-
|
|
355
|
+
/**
|
|
356
|
+
* C-M1 — Compute the signal target for a detached subprocess pid.
|
|
357
|
+
*
|
|
358
|
+
* Since agents are spawned `detached:true` they become process-group
|
|
359
|
+
* leaders. `claude -p` typically forks further (sub-agents), leaving
|
|
360
|
+
* grandchildren in the same group. Signalling only the group-leader PID
|
|
361
|
+
* lets those grandchildren survive. Instead, we signal the entire group
|
|
362
|
+
* by negating the pid (POSIX: kill(-pgid, sig) = signal the group).
|
|
363
|
+
*
|
|
364
|
+
* Windows does not support negative-pid group signals; on win32 we fall
|
|
365
|
+
* back to the positive pid (signals the leader only). A full win32 group-
|
|
366
|
+
* kill would require `taskkill /T /PID` — that can be layered later if
|
|
367
|
+
* Windows support becomes important.
|
|
368
|
+
*
|
|
369
|
+
* The injectable `killFn` always receives the already-transformed value
|
|
370
|
+
* (negative on POSIX, positive on win32) so tests can assert the correct
|
|
371
|
+
* target without needing platform-specific logic in test code.
|
|
372
|
+
*/
|
|
373
|
+
function resolveKillTarget(pid) {
|
|
374
|
+
return process.platform !== "win32" ? -pid : pid;
|
|
375
|
+
}
|
|
376
|
+
export function killSessionDetachedAgents(session, killFn = (target) => {
|
|
299
377
|
try {
|
|
300
|
-
process.kill(
|
|
378
|
+
process.kill(target, "SIGTERM");
|
|
301
379
|
}
|
|
302
|
-
catch { /* already gone */ }
|
|
380
|
+
catch { /* already gone — ESRCH is fine */ }
|
|
303
381
|
}) {
|
|
304
382
|
// Use session.sessionKey — the real canonical key stamped by getSession().
|
|
305
383
|
// Before v5.1.x this field did not exist on UserSession, causing a silent
|
|
@@ -310,12 +388,24 @@ export function killSessionDetachedAgents(session, killFn = (p) => {
|
|
|
310
388
|
for (const entry of pending.values()) {
|
|
311
389
|
if (entry.sessionKey !== key)
|
|
312
390
|
continue;
|
|
313
|
-
if (typeof entry.pid
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
391
|
+
if (typeof entry.pid !== "number")
|
|
392
|
+
continue;
|
|
393
|
+
// C-M2: only kill pids that are attributable to our own subprocess.
|
|
394
|
+
// Pids loaded from disk on a previous boot may have been reused by
|
|
395
|
+
// the OS for an unrelated process. We guard by only killing agents
|
|
396
|
+
// registered in THIS boot (thisBootAgentIds). Disk-loaded entries
|
|
397
|
+
// (those not in the set) are skipped — their subprocess may have
|
|
398
|
+
// already exited and the pid may point at an innocent process.
|
|
399
|
+
if (!thisBootAgentIds.has(entry.agentId)) {
|
|
400
|
+
console.log(`[async-watcher] skipping kill for disk-loaded agent ${entry.agentId} ` +
|
|
401
|
+
`(pid=${entry.pid}) — cannot safely attribute pid after restart`);
|
|
402
|
+
continue;
|
|
403
|
+
}
|
|
404
|
+
// C-M1: pass the group-kill target (negative pid on POSIX) to killFn.
|
|
405
|
+
try {
|
|
406
|
+
killFn(resolveKillTarget(entry.pid));
|
|
318
407
|
}
|
|
408
|
+
catch { /* best-effort */ }
|
|
319
409
|
}
|
|
320
410
|
}
|
|
321
411
|
/**
|
|
@@ -345,6 +435,7 @@ export function cancelPendingForSession(sessionKey) {
|
|
|
345
435
|
/** Test-only: drop in-memory state. Doesn't touch disk. */
|
|
346
436
|
export function __resetForTest() {
|
|
347
437
|
pending.clear();
|
|
438
|
+
thisBootAgentIds.clear();
|
|
348
439
|
if (pollTimer)
|
|
349
440
|
clearInterval(pollTimer);
|
|
350
441
|
pollTimer = null;
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* If a strategy is unavailable, we automatically cascade to the next one
|
|
11
11
|
* and log a warning so failures are visible, not silent.
|
|
12
12
|
*/
|
|
13
|
-
import { execSync, spawn } from "child_process";
|
|
13
|
+
import { execSync, execFileSync, spawn } from "child_process";
|
|
14
14
|
import http from "http";
|
|
15
15
|
import fs from "fs";
|
|
16
16
|
import { config } from "../config.js";
|
|
@@ -22,7 +22,7 @@ const CDP_PORT = 9222;
|
|
|
22
22
|
const EXEC_TIMEOUT = 60_000; // 60s for page loads via shell
|
|
23
23
|
// ── Logging ──────────────────────────────────────────────────────────
|
|
24
24
|
function log(msg) {
|
|
25
|
-
console.
|
|
25
|
+
console.log(`[browser-manager] ${msg}`);
|
|
26
26
|
}
|
|
27
27
|
// ── Availability Checks ──────────────────────────────────────────────
|
|
28
28
|
function isGatewayScriptPresent() {
|
|
@@ -170,9 +170,11 @@ export async function resolveStrategy(preferred) {
|
|
|
170
170
|
}
|
|
171
171
|
return "cli";
|
|
172
172
|
}
|
|
173
|
-
function execHub(
|
|
173
|
+
function execHub(argv) {
|
|
174
174
|
try {
|
|
175
|
-
|
|
175
|
+
// H3: use execFileSync with discrete argv array — no shell interpolation,
|
|
176
|
+
// so attacker-controlled URLs cannot inject shell metacharacters.
|
|
177
|
+
const result = execFileSync(HUB_BROWSER_SH, argv, {
|
|
176
178
|
stdio: "pipe",
|
|
177
179
|
timeout: EXEC_TIMEOUT,
|
|
178
180
|
env: { ...process.env, PATH: process.env.PATH },
|
|
@@ -310,7 +312,7 @@ async function navigateOne(strategy, url) {
|
|
|
310
312
|
case "cdp": {
|
|
311
313
|
// Try hub CDP first
|
|
312
314
|
if (isHubBrowserAvailable()) {
|
|
313
|
-
const result = execHub(
|
|
315
|
+
const result = execHub(["cdp", "goto", url]);
|
|
314
316
|
if (result && !result.error) {
|
|
315
317
|
return { title: result.title || "", url: result.url || url };
|
|
316
318
|
}
|
|
@@ -329,7 +331,7 @@ async function navigateOne(strategy, url) {
|
|
|
329
331
|
log(`Direct CDP failed: ${err.message}`);
|
|
330
332
|
// Last resort: try stealth
|
|
331
333
|
if (isHubBrowserAvailable()) {
|
|
332
|
-
const stealthResult = execHub(
|
|
334
|
+
const stealthResult = execHub(["stealth", url]);
|
|
333
335
|
if (stealthResult) {
|
|
334
336
|
return { title: stealthResult.title || "", url: stealthResult.url || url };
|
|
335
337
|
}
|
|
@@ -338,7 +340,7 @@ async function navigateOne(strategy, url) {
|
|
|
338
340
|
}
|
|
339
341
|
}
|
|
340
342
|
case "hub-stealth": {
|
|
341
|
-
const result = execHub(
|
|
343
|
+
const result = execHub(["stealth", url]);
|
|
342
344
|
if (result && !result.error) {
|
|
343
345
|
return { title: result.title || "", url: result.url || url };
|
|
344
346
|
}
|
|
@@ -369,7 +371,7 @@ export async function screenshot(url, options = {}) {
|
|
|
369
371
|
case "cdp": {
|
|
370
372
|
if (isHubBrowserAvailable()) {
|
|
371
373
|
const tmpName = `shot_${Date.now()}.png`;
|
|
372
|
-
const result = execHub(
|
|
374
|
+
const result = execHub(["cdp", "shot", url, tmpName]);
|
|
373
375
|
if (result?.screenshot)
|
|
374
376
|
return result.screenshot;
|
|
375
377
|
}
|
|
@@ -378,7 +380,7 @@ export async function screenshot(url, options = {}) {
|
|
|
378
380
|
}
|
|
379
381
|
case "hub-stealth": {
|
|
380
382
|
const tmpName = `shot_${Date.now()}.png`;
|
|
381
|
-
const result = execHub(
|
|
383
|
+
const result = execHub(["stealth", url, `--screenshot=${tmpName}`]);
|
|
382
384
|
if (result?.screenshot)
|
|
383
385
|
return result.screenshot;
|
|
384
386
|
// Fallback
|
|
@@ -11,8 +11,18 @@
|
|
|
11
11
|
* See browser-manager.ts for the full cascade; this module is the
|
|
12
12
|
* leaf-level primitive with no dependencies on that file so both can
|
|
13
13
|
* be unit-tested in isolation.
|
|
14
|
+
*
|
|
15
|
+
* SSRF hardening (M1): assertSsrfSafe() is called before every fetch hop to
|
|
16
|
+
* reject loopback / link-local / RFC-1918 / metadata / non-http(s)
|
|
17
|
+
* destinations. Redirects are followed manually (redirect:"manual") so every
|
|
18
|
+
* hop's Location header is re-validated before following — a public host that
|
|
19
|
+
* returns 302 → 169.254.169.254 is therefore blocked. Redirects are capped at
|
|
20
|
+
* 10 hops; an operator who needs redirect-to-internal can set
|
|
21
|
+
* ALLOW_PRIVATE_FETCH=1.
|
|
14
22
|
*/
|
|
23
|
+
import { assertSsrfSafe, SsrfBlockedError } from "./ssrf-guard.js";
|
|
15
24
|
const DEFAULT_TIMEOUT_MS = 15_000;
|
|
25
|
+
const MAX_REDIRECTS = 10;
|
|
16
26
|
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_0) AppleWebKit/605.1.15 " +
|
|
17
27
|
"(KHTML, like Gecko) Version/17.0 Safari/605.1.15 AlvinBot/webfetch";
|
|
18
28
|
export class WebfetchFailed extends Error {
|
|
@@ -53,24 +63,48 @@ export function parseTitle(html) {
|
|
|
53
63
|
return decodeEntities(inner);
|
|
54
64
|
}
|
|
55
65
|
export async function webfetchNavigate(url, options = {}) {
|
|
66
|
+
// M1: SSRF guard — reject private/internal destinations before fetching.
|
|
67
|
+
// SsrfBlockedError is intentionally not wrapped in WebfetchFailed so
|
|
68
|
+
// callers can distinguish "blocked by policy" from "server error".
|
|
69
|
+
// We validate EVERY redirect hop manually (redirect:"manual") so a
|
|
70
|
+
// public host cannot 302 us into an internal address.
|
|
71
|
+
await assertSsrfSafe(url);
|
|
56
72
|
const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
57
73
|
const controller = new AbortController();
|
|
58
74
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
59
75
|
try {
|
|
76
|
+
let currentUrl = url;
|
|
60
77
|
let response;
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
78
|
+
for (let hop = 0;; hop++) {
|
|
79
|
+
try {
|
|
80
|
+
response = await fetch(currentUrl, {
|
|
81
|
+
method: "GET",
|
|
82
|
+
headers: {
|
|
83
|
+
"User-Agent": options.userAgent ?? DEFAULT_USER_AGENT,
|
|
84
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
85
|
+
},
|
|
86
|
+
redirect: "manual",
|
|
87
|
+
signal: controller.signal,
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
catch (err) {
|
|
91
|
+
throw new WebfetchFailed(url, err.message, { cause: err });
|
|
92
|
+
}
|
|
93
|
+
// Not a redirect — we have the final response
|
|
94
|
+
if (response.status < 300 || response.status >= 400)
|
|
95
|
+
break;
|
|
96
|
+
const loc = response.headers.get("location");
|
|
97
|
+
if (!loc)
|
|
98
|
+
break; // no Location header — treat as final response
|
|
99
|
+
if (hop >= MAX_REDIRECTS) {
|
|
100
|
+
throw new SsrfBlockedError(url, `too many redirects (> ${MAX_REDIRECTS})`);
|
|
101
|
+
}
|
|
102
|
+
const next = new URL(loc, currentUrl).href;
|
|
103
|
+
// Re-validate each redirect target before following — closes the
|
|
104
|
+
// post-redirect SSRF bypass where fetch would silently follow a
|
|
105
|
+
// 302 pointing at 169.254.169.254 / loopback / RFC-1918.
|
|
106
|
+
await assertSsrfSafe(next);
|
|
107
|
+
currentUrl = next;
|
|
74
108
|
}
|
|
75
109
|
if (!response.ok) {
|
|
76
110
|
throw new WebfetchFailed(url, `HTTP ${response.status}`, { status: response.status });
|
|
@@ -29,34 +29,94 @@ function parseInterval(input) {
|
|
|
29
29
|
};
|
|
30
30
|
return value * (mult[unit] || 60_000);
|
|
31
31
|
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
32
|
+
/**
|
|
33
|
+
* Parse a single cron field token (no commas — commas are handled by parseField).
|
|
34
|
+
* Supports: `*`, `a`, `a-b`, `a/s`, `a-b/s`, `*\/s`.
|
|
35
|
+
* Returns an array of valid integers in [min,max], or null if the token is invalid/garbage.
|
|
36
|
+
*/
|
|
37
|
+
function parseFieldToken(token, min, max) {
|
|
38
|
+
const fullRange = () => Array.from({ length: max - min + 1 }, (_, i) => i + min);
|
|
39
|
+
if (token.includes("/")) {
|
|
40
|
+
const slashIdx = token.indexOf("/");
|
|
41
|
+
const basePart = token.slice(0, slashIdx);
|
|
42
|
+
const stepPart = token.slice(slashIdx + 1);
|
|
43
|
+
const step = parseInt(stepPart, 10);
|
|
44
|
+
if (!Number.isFinite(step) || step <= 0)
|
|
45
|
+
return null;
|
|
46
|
+
let base;
|
|
47
|
+
if (basePart === "*") {
|
|
48
|
+
base = fullRange();
|
|
49
|
+
}
|
|
50
|
+
else if (basePart.includes("-")) {
|
|
51
|
+
const [aPart, bPart] = basePart.split("-");
|
|
52
|
+
const a = parseInt(aPart, 10);
|
|
53
|
+
const b = parseInt(bPart, 10);
|
|
54
|
+
if (!Number.isFinite(a) || !Number.isFinite(b) || a > b || a < min || b > max)
|
|
55
|
+
return null;
|
|
56
|
+
base = Array.from({ length: b - a + 1 }, (_, i) => i + a);
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
const a = parseInt(basePart, 10);
|
|
60
|
+
if (!Number.isFinite(a) || a < min || a > max)
|
|
61
|
+
return null;
|
|
62
|
+
base = [a];
|
|
63
|
+
}
|
|
64
|
+
// Filter by step aligned to base start
|
|
65
|
+
const baseStart = base[0];
|
|
66
|
+
return base.filter((v) => (v - baseStart) % step === 0);
|
|
39
67
|
}
|
|
40
|
-
if (
|
|
41
|
-
return
|
|
42
|
-
if (
|
|
43
|
-
const
|
|
68
|
+
if (token === "*")
|
|
69
|
+
return fullRange();
|
|
70
|
+
if (token.includes("-")) {
|
|
71
|
+
const parts = token.split("-");
|
|
72
|
+
if (parts.length !== 2)
|
|
73
|
+
return null;
|
|
74
|
+
const a = parseInt(parts[0], 10);
|
|
75
|
+
const b = parseInt(parts[1], 10);
|
|
76
|
+
if (!Number.isFinite(a) || !Number.isFinite(b) || a > b || a < min || b > max)
|
|
77
|
+
return null;
|
|
44
78
|
return Array.from({ length: b - a + 1 }, (_, i) => i + a);
|
|
45
79
|
}
|
|
46
|
-
|
|
80
|
+
const v = parseInt(token, 10);
|
|
81
|
+
if (!Number.isFinite(v) || v < min || v > max)
|
|
82
|
+
return null;
|
|
83
|
+
return [v];
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Parse a cron field expression (may contain commas) into a sorted array of valid integers.
|
|
87
|
+
* Supports comma-separated combinations of: `*`, `a`, `a-b`, `a-b/s`, `*\/s`.
|
|
88
|
+
* Returns null if any token is invalid/garbage (signals an invalid schedule).
|
|
89
|
+
*/
|
|
90
|
+
function parseField(expr, min, max) {
|
|
91
|
+
// Split on commas; filter empty strings (handles "1,,3" gracefully — skip empty)
|
|
92
|
+
const tokens = expr.split(",").filter((t) => t.length > 0);
|
|
93
|
+
if (tokens.length === 0)
|
|
94
|
+
return null;
|
|
95
|
+
const result = new Set();
|
|
96
|
+
for (const token of tokens) {
|
|
97
|
+
const vals = parseFieldToken(token, min, max);
|
|
98
|
+
if (vals === null)
|
|
99
|
+
return null; // propagate invalid token as parse failure
|
|
100
|
+
for (const v of vals)
|
|
101
|
+
result.add(v);
|
|
102
|
+
}
|
|
103
|
+
const arr = [...result].sort((a, b) => a - b);
|
|
104
|
+
return arr.length > 0 ? arr : null;
|
|
47
105
|
}
|
|
48
106
|
function parseCronFields(expression) {
|
|
49
107
|
const parts = expression.trim().split(/\s+/);
|
|
50
108
|
if (parts.length !== 5)
|
|
51
109
|
return null;
|
|
52
110
|
const [minExpr, hourExpr, dayExpr, monthExpr, weekdayExpr] = parts;
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
111
|
+
const minutes = parseField(minExpr, 0, 59);
|
|
112
|
+
const hours = parseField(hourExpr, 0, 23);
|
|
113
|
+
const days = parseField(dayExpr, 1, 31);
|
|
114
|
+
const months = parseField(monthExpr, 1, 12);
|
|
115
|
+
const weekdays = parseField(weekdayExpr, 0, 6);
|
|
116
|
+
// Any field returning null means the expression is invalid → reject it
|
|
117
|
+
if (!minutes || !hours || !days || !months || !weekdays)
|
|
118
|
+
return null;
|
|
119
|
+
return { minutes, hours, days, months, weekdays };
|
|
60
120
|
}
|
|
61
121
|
function nextCronRun(expression, after) {
|
|
62
122
|
const fields = parseCronFields(expression);
|