alvin-bot 5.2.0 → 5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +100 -0
- package/CHANGELOG.md +76 -3
- package/README.md +2 -0
- package/alvin-bot.config.example.json +1 -1
- package/dist/config.js +15 -4
- package/dist/handlers/document.js +8 -1
- package/dist/handlers/message.js +165 -7
- package/dist/i18n.js +22 -0
- package/dist/index.js +12 -0
- package/dist/init-data-dir.js +17 -0
- package/dist/middleware/auth.js +19 -1
- package/dist/providers/claude-sdk-provider.js +3 -1
- package/dist/providers/tool-executor.js +29 -4
- package/dist/services/async-agent-watcher.js +52 -8
- package/dist/services/browser-manager.js +11 -9
- package/dist/services/browser-webfetch.js +47 -13
- package/dist/services/cron-scheduling.js +79 -19
- package/dist/services/cron.js +205 -16
- package/dist/services/delivery-queue.js +19 -0
- package/dist/services/embeddings/index.js +2 -5
- package/dist/services/env-file.js +4 -0
- package/dist/services/personality.js +40 -37
- package/dist/services/session-persistence.js +23 -3
- package/dist/services/session.js +9 -0
- package/dist/services/ssrf-guard.js +162 -0
- package/dist/services/steer-channel.js +46 -0
- package/dist/services/voice.js +0 -3
- package/dist/web/server.js +155 -5
- package/package.json +8 -7
package/.env.example
CHANGED
|
@@ -41,3 +41,103 @@ WEB_PORT=3100
|
|
|
41
41
|
|
|
42
42
|
# === Custom Chrome (for WhatsApp, if not auto-detected) ===
|
|
43
43
|
# CHROME_PATH=/usr/bin/google-chrome
|
|
44
|
+
|
|
45
|
+
# ===================================================================
|
|
46
|
+
# OPTIONAL — Security & Auth
|
|
47
|
+
# ===================================================================
|
|
48
|
+
|
|
49
|
+
# Auth mode for new users trying to talk to the bot.
|
|
50
|
+
# allowlist (default) — only ALLOWED_USERS can use the bot
|
|
51
|
+
# pairing — new users get a 6-digit pairing code; owner approves
|
|
52
|
+
# open — anyone can chat (for public bots)
|
|
53
|
+
# AUTH_MODE=allowlist
|
|
54
|
+
|
|
55
|
+
# Session isolation (how context is scoped):
|
|
56
|
+
# per-user (default) — each user gets their own session
|
|
57
|
+
# per-channel — everyone in the same channel shares a session
|
|
58
|
+
# per-channel-peer — per (channel, user) pair
|
|
59
|
+
# SESSION_MODE=per-user
|
|
60
|
+
|
|
61
|
+
# ===================================================================
|
|
62
|
+
# OPTIONAL — Text-to-Speech (TTS)
|
|
63
|
+
# ===================================================================
|
|
64
|
+
|
|
65
|
+
# TTS backend: "edge" (free, default) or "elevenlabs" (paid, higher quality)
|
|
66
|
+
# TTS_PROVIDER=edge
|
|
67
|
+
|
|
68
|
+
# ElevenLabs — set all three to use ElevenLabs TTS
|
|
69
|
+
# ELEVENLABS_API_KEY=
|
|
70
|
+
# ELEVENLABS_VOICE_ID=iP95p4xoKVk53GoZ742B
|
|
71
|
+
# ELEVENLABS_MODEL_ID=eleven_v3
|
|
72
|
+
|
|
73
|
+
# ===================================================================
|
|
74
|
+
# OPTIONAL — Webhooks
|
|
75
|
+
# ===================================================================
|
|
76
|
+
|
|
77
|
+
# Enable inbound webhook endpoint (POST /api/webhook) for external triggers
|
|
78
|
+
# WEBHOOK_ENABLED=false
|
|
79
|
+
# WEBHOOK_TOKEN=change-me-to-a-random-secret
|
|
80
|
+
|
|
81
|
+
# ===================================================================
|
|
82
|
+
# OPTIONAL — Sub-Agents & Compaction
|
|
83
|
+
# ===================================================================
|
|
84
|
+
|
|
85
|
+
# Maximum number of sub-agents that can run in parallel (default: 4)
|
|
86
|
+
# MAX_SUBAGENTS=4
|
|
87
|
+
|
|
88
|
+
# Sub-agent hard timeout in ms. -1 = unlimited (default: -1)
|
|
89
|
+
# SUBAGENT_TIMEOUT=-1
|
|
90
|
+
|
|
91
|
+
# Context compaction threshold in tokens (default: 80000)
|
|
92
|
+
# COMPACTION_THRESHOLD=80000
|
|
93
|
+
|
|
94
|
+
# ===================================================================
|
|
95
|
+
# OPTIONAL — Browser Automation
|
|
96
|
+
# ===================================================================
|
|
97
|
+
|
|
98
|
+
# Connect to an existing Chrome DevTools Protocol endpoint instead of
|
|
99
|
+
# launching a new browser instance.
|
|
100
|
+
# CDP_URL=ws://localhost:9222
|
|
101
|
+
|
|
102
|
+
# Port for the optional browser HTTP gateway (default: 3800)
|
|
103
|
+
# BROWSE_SERVER_PORT=3800
|
|
104
|
+
|
|
105
|
+
# ===================================================================
|
|
106
|
+
# OPTIONAL — Data Directory
|
|
107
|
+
# ===================================================================
|
|
108
|
+
|
|
109
|
+
# Override where alvin-bot stores its data (default: ~/.alvin-bot)
|
|
110
|
+
# ALVIN_DATA_DIR=/custom/path/to/data
|
|
111
|
+
|
|
112
|
+
# Live steering — inject follow-up instructions mid-generation (default: on)
|
|
113
|
+
# STEERING_ENABLED=true
|
|
114
|
+
|
|
115
|
+
# ===================================================================
|
|
116
|
+
# POWER / OWNER OPT-INS — unlock full capability
|
|
117
|
+
#
|
|
118
|
+
# These are safe-by-default for unconfigured installs. As the owner
|
|
119
|
+
# you can opt in to the full power mode for each feature.
|
|
120
|
+
# ===================================================================
|
|
121
|
+
|
|
122
|
+
# Shell & Python execution security:
|
|
123
|
+
# allowlist (default) — only a curated set of safe binaries (ls, cat, git,
|
|
124
|
+
# python3, node, etc.) can be executed by the bot
|
|
125
|
+
# full — unrestricted shell/Python — full agent power mode; set this
|
|
126
|
+
# when you want the bot to run arbitrary commands on your machine
|
|
127
|
+
# deny — block all exec/python tool calls (read-only agent)
|
|
128
|
+
# EXEC_SECURITY=allowlist
|
|
129
|
+
|
|
130
|
+
# Web UI host binding:
|
|
131
|
+
# 127.0.0.1 (default) — loopback only, not reachable from LAN or internet
|
|
132
|
+
# 0.0.0.0 — listen on all interfaces (expose to LAN/VPS/remote)
|
|
133
|
+
# If you set WEB_HOST=0.0.0.0 (or any non-loopback address), also set
|
|
134
|
+
# WEB_PASSWORD to protect the UI:
|
|
135
|
+
# WEB_HOST=127.0.0.1
|
|
136
|
+
# WEB_PASSWORD=your-strong-password
|
|
137
|
+
|
|
138
|
+
# Allow the bot to fetch localhost / LAN / internal URLs (SSRF guard):
|
|
139
|
+
# unset or 0 (default) — private IPs and loopback are blocked to prevent
|
|
140
|
+
# SSRF attacks from untrusted prompt content
|
|
141
|
+
# 1 — enable, so the bot can reach your local services, dev
|
|
142
|
+
# servers, and internal APIs (owner workflow on your own machine)
|
|
143
|
+
# ALLOW_PRIVATE_FETCH=0
|
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,79 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to Alvin Bot are documented here.
|
|
4
4
|
|
|
5
|
+
## [5.4.0] — 2026-05-18
|
|
6
|
+
|
|
7
|
+
### Smoother background tasks — and Alvin always tells you the truth
|
|
8
|
+
|
|
9
|
+
When you ask Alvin to go off and do something longer — research, a
|
|
10
|
+
multi-step job — it now reliably hands control straight back to you so
|
|
11
|
+
you can keep chatting while it works, then delivers the result as its
|
|
12
|
+
own message. And if a task does need to run inline for a moment,
|
|
13
|
+
Alvin says so honestly instead of implying you're free when you're
|
|
14
|
+
not. Talking to Alvin now feels exactly like working with a colleague
|
|
15
|
+
who's already on it: you're never left waiting or guessing.
|
|
16
|
+
|
|
17
|
+
### Safer out of the box — with your full power one setting away
|
|
18
|
+
|
|
19
|
+
Alvin now ships with sensible, safe defaults so a fresh install is
|
|
20
|
+
solid for everyone, including people who just want to try it quickly.
|
|
21
|
+
Nothing about Alvin's capabilities has been taken away: if you want
|
|
22
|
+
the full, unrestricted superadmin experience it's a single documented
|
|
23
|
+
setting — your machine, your rules, your call. The new `.env.example`
|
|
24
|
+
spells out every option, including the "power" switches, in plain
|
|
25
|
+
language. You stay completely in control.
|
|
26
|
+
|
|
27
|
+
### Reliability & robustness across the board
|
|
28
|
+
|
|
29
|
+
A broad pass to make Alvin steadier on long-running setups: no more
|
|
30
|
+
duplicate messages under load, cleaner interplay between stopping,
|
|
31
|
+
steering and background work, more accurate scheduling for custom
|
|
32
|
+
cron expressions, and tighter handling of edge cases throughout.
|
|
33
|
+
Verified end-to-end with a stress test on a clean separate machine.
|
|
34
|
+
|
|
35
|
+
### A leaner, tidier install
|
|
36
|
+
|
|
37
|
+
Roughly 20 MB lighter to install, a calmer first-run experience
|
|
38
|
+
(optional features that aren't configured no longer look like
|
|
39
|
+
errors), better behavior on Windows and for non-German voice notes,
|
|
40
|
+
and a zero-config friendly default so a minimal setup just works.
|
|
41
|
+
|
|
42
|
+
As always, this shipped only after a full multi-pass review and a
|
|
43
|
+
fresh-install + stress verification on a clean second machine.
|
|
44
|
+
|
|
45
|
+
## [5.3.0] — 2026-05-18
|
|
46
|
+
|
|
47
|
+
### Talk to Alvin while it's working — no more interrupting yourself
|
|
48
|
+
|
|
49
|
+
Until now, a message you sent while Alvin was busy had only two
|
|
50
|
+
outcomes: it waited in line until the current task finished, or it
|
|
51
|
+
threw the task away and started over. Now there's a third, much
|
|
52
|
+
better one. Drop a quick *"btw, also check the other folder"* or
|
|
53
|
+
*"actually, use the live data not the test data"* mid-task and Alvin
|
|
54
|
+
takes it in **while keeping everything it has already done** — exactly
|
|
55
|
+
like leaning over to a colleague who's already working and adding one
|
|
56
|
+
more thing. No restart, no lost progress.
|
|
57
|
+
|
|
58
|
+
### A quiet 📨 so you know it landed
|
|
59
|
+
|
|
60
|
+
When your mid-task note is picked up, Alvin reacts with a 📨 on your
|
|
61
|
+
message and, the first time per task, adds one short line so you know
|
|
62
|
+
it was taken on board without derailing what it's doing. After that
|
|
63
|
+
it's just the reaction — no chatter, no spam while it works.
|
|
64
|
+
|
|
65
|
+
### Stop still always wins
|
|
66
|
+
|
|
67
|
+
Steering never overrides stopping. The ⛔ Stop button, `/cancel` and
|
|
68
|
+
`/stopall` behave exactly as before and always take precedence — a
|
|
69
|
+
mid-task note can never bring back a task you've stopped. If you'd
|
|
70
|
+
rather keep the old "queue it until done" behaviour, you can switch
|
|
71
|
+
steering off with a single setting; it's on by default for everyone.
|
|
72
|
+
|
|
73
|
+
Live steering works with the Claude engine; with other AI providers
|
|
74
|
+
your message safely falls back to the previous queue behaviour, so
|
|
75
|
+
nothing breaks. As always, this shipped only after a full end-to-end
|
|
76
|
+
verification and a stress test on a clean separate machine.
|
|
77
|
+
|
|
5
78
|
## [5.2.0] — 2026-05-17
|
|
6
79
|
|
|
7
80
|
### Stop now actually means stop — instantly
|
|
@@ -396,8 +469,8 @@ A maintainer's local Mac that had been running alvin-bot under PM2 *before* the
|
|
|
396
469
|
```bash
|
|
397
470
|
pm2 delete polyseus # any other PM2 entries
|
|
398
471
|
pm2 save --force # empty dump
|
|
399
|
-
launchctl unload ~/Library/LaunchAgents/pm2.
|
|
400
|
-
rm -f ~/Library/LaunchAgents/pm2.
|
|
472
|
+
launchctl unload ~/Library/LaunchAgents/pm2.youruser.plist 2>/dev/null
|
|
473
|
+
rm -f ~/Library/LaunchAgents/pm2.youruser.plist
|
|
401
474
|
pm2 kill
|
|
402
475
|
npm uninstall -g pm2
|
|
403
476
|
rm -rf ~/.pm2
|
|
@@ -2299,7 +2372,7 @@ Example:
|
|
|
2299
2372
|
🤖 Alvin Bot v4.8.3
|
|
2300
2373
|
Node v25.9.0 · darwin/arm64
|
|
2301
2374
|
|
|
2302
|
-
📁 Data dir:
|
|
2375
|
+
📁 Data dir: ~/.alvin-bot
|
|
2303
2376
|
.env: ✅ present
|
|
2304
2377
|
Provider: claude-sdk
|
|
2305
2378
|
|
package/README.md
CHANGED
|
@@ -62,6 +62,8 @@ That's it. The setup wizard validates everything:
|
|
|
62
62
|
|
|
63
63
|
**Requires:** Node.js 18+ ([nodejs.org](https://nodejs.org)) · Telegram bot token ([@BotFather](https://t.me/BotFather)) · Your Telegram user ID ([@userinfobot](https://t.me/userinfobot))
|
|
64
64
|
|
|
65
|
+
> **Native build note:** Alvin Bot uses `better-sqlite3` for indexed memory. Prebuilt binaries are included for common macOS and Linux environments so most installs need nothing extra. If your platform doesn't have a prebuilt binary and the optional native compilation is skipped, the bot still runs — semantic memory falls back gracefully to keyword search. A C++ toolchain (Xcode Command Line Tools on macOS, `build-essential` on Ubuntu) and Python 3 are only needed if you hit a build-from-source fallback.
|
|
66
|
+
|
|
65
67
|
Free AI providers available — no credit card needed. **Privacy-first?** Pick the 🔒 **Offline — Gemma 4 E4B** option in setup for a fully local LLM via Ollama (macOS/Linux: automated install; Windows: manual).
|
|
66
68
|
|
|
67
69
|
### 🔐 A note on permission prompts
|
package/dist/config.js
CHANGED
|
@@ -26,8 +26,10 @@ export const config = {
|
|
|
26
26
|
// Agent
|
|
27
27
|
defaultWorkingDir: process.env.WORKING_DIR || os.homedir(),
|
|
28
28
|
maxBudgetUsd: Number(process.env.MAX_BUDGET_USD) || 5.0,
|
|
29
|
-
// Model provider (primary)
|
|
30
|
-
|
|
29
|
+
// Model provider (primary). Default is "groq" — works on a fresh install
|
|
30
|
+
// with only BOT_TOKEN + GROQ_API_KEY. Set PRIMARY_PROVIDER=claude-sdk to
|
|
31
|
+
// use the Claude SDK (requires `claude login` / Claude Max subscription).
|
|
32
|
+
primaryProvider: process.env.PRIMARY_PROVIDER || "groq",
|
|
31
33
|
fallbackProviders: (process.env.FALLBACK_PROVIDERS || "")
|
|
32
34
|
.split(",")
|
|
33
35
|
.map(s => s.trim())
|
|
@@ -80,6 +82,15 @@ export const config = {
|
|
|
80
82
|
// Browser
|
|
81
83
|
cdpUrl: process.env.CDP_URL || "",
|
|
82
84
|
browseServerPort: Number(process.env.BROWSE_SERVER_PORT) || 3800,
|
|
83
|
-
// Exec Security
|
|
84
|
-
|
|
85
|
+
// Exec Security — default is "allowlist" (safe). Set EXEC_SECURITY=full to
|
|
86
|
+
// allow shell pipelines, metacharacters, and arbitrary binaries (opt-in).
|
|
87
|
+
execSecurity: (process.env.EXEC_SECURITY || "allowlist"),
|
|
85
88
|
};
|
|
89
|
+
/**
|
|
90
|
+
* Feature flag: btw live-steering. Default ON — only "false" or "0" disables.
|
|
91
|
+
* Re-reads process.env each call so tests can override without module reloads.
|
|
92
|
+
*/
|
|
93
|
+
export function isSteeringEnabled() {
|
|
94
|
+
const v = process.env.STEERING_ENABLED;
|
|
95
|
+
return v !== "false" && v !== "0";
|
|
96
|
+
}
|
|
@@ -74,7 +74,14 @@ export async function handleDocument(ctx) {
|
|
|
74
74
|
// Download the file
|
|
75
75
|
const file = await ctx.api.getFile(doc.file_id);
|
|
76
76
|
const fileUrl = `https://api.telegram.org/file/bot${config.botToken}/${file.file_path}`;
|
|
77
|
-
|
|
77
|
+
// H2: strip any path components from the attacker-controlled file_name
|
|
78
|
+
// to prevent writing outside TEMP_DIR (e.g. file_name="../../../x").
|
|
79
|
+
const safeFilename = path.basename(filename);
|
|
80
|
+
const localPath = path.join(TEMP_DIR, `doc_${Date.now()}_${safeFilename}`);
|
|
81
|
+
// Containment assertion: resolved path must stay inside TEMP_DIR.
|
|
82
|
+
if (!path.resolve(localPath).startsWith(path.resolve(TEMP_DIR))) {
|
|
83
|
+
throw new Error("File path containment violation");
|
|
84
|
+
}
|
|
78
85
|
await downloadFile(fileUrl, localPath);
|
|
79
86
|
const caption = ctx.message?.caption || "";
|
|
80
87
|
const userInstruction = caption || `Analysiere diese Datei: ${filename}`;
|
package/dist/handlers/message.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { InputFile, InlineKeyboard } from "grammy";
|
|
2
2
|
import fs from "fs";
|
|
3
|
+
import crypto from "crypto";
|
|
3
4
|
import { getSession, addToHistory, trackProviderUsage, buildSessionKey, getTelegramWorkspace, markSessionDirty } from "../services/session.js";
|
|
4
5
|
import { resolveWorkspaceOrDefault, getWorkspace } from "../services/workspaces.js";
|
|
5
6
|
import { TelegramStreamer } from "../services/telegram.js";
|
|
@@ -19,6 +20,8 @@ import { isHarmlessTelegramError } from "../util/telegram-error-filter.js";
|
|
|
19
20
|
import { handleToolResultChunk } from "./async-agent-chunk-handler.js";
|
|
20
21
|
import { createStuckTimer } from "./stuck-timer.js";
|
|
21
22
|
import { shouldBypassQueue, shouldBypassSdkResume, waitUntilProcessingFalse, } from "./background-bypass.js";
|
|
23
|
+
import { SteerChannel } from "../services/steer-channel.js";
|
|
24
|
+
import { isSteeringEnabled } from "../config.js";
|
|
22
25
|
/**
|
|
23
26
|
* Stuck-only timeout — NO absolute cap.
|
|
24
27
|
*
|
|
@@ -119,6 +122,53 @@ const TOOL_ICONS = {
|
|
|
119
122
|
WebFetch: "📡",
|
|
120
123
|
Task: "🤖",
|
|
121
124
|
};
|
|
125
|
+
// ── v5.2 live steering — pure routing helper ─────────────────────────────────
|
|
126
|
+
/**
|
|
127
|
+
* Decide how a mid-task message (arriving while `session.isProcessing`) should
|
|
128
|
+
* be handled. Evaluated in the `if (session.isProcessing)` guard before any
|
|
129
|
+
* side-effects, so the caller can branch cleanly.
|
|
130
|
+
*
|
|
131
|
+
* Decision priority:
|
|
132
|
+
* 1. "bypass" — background-agent bypass path (pre-existing Cycle-1 logic)
|
|
133
|
+
* 2. "steer" — push into live SteerChannel (claude-sdk + steering on + channel open)
|
|
134
|
+
* 3. "queue" — normal queue behavior (all other cases)
|
|
135
|
+
*
|
|
136
|
+
* Defensive: if `isProcessing` is false the helper is being called incorrectly;
|
|
137
|
+
* it returns "queue" so the caller falls through to existing behavior.
|
|
138
|
+
*/
|
|
139
|
+
export function decideMidTaskRouting(args) {
|
|
140
|
+
if (!args.isProcessing)
|
|
141
|
+
return "queue";
|
|
142
|
+
if (args.shouldBypass)
|
|
143
|
+
return "bypass";
|
|
144
|
+
if (args.providerIsClaudeSdk && args.steeringEnabled && args.hasSteerChannel && args.hasLiveSdkQuery)
|
|
145
|
+
return "steer";
|
|
146
|
+
return "queue";
|
|
147
|
+
}
|
|
148
|
+
// ── Cycle-3 P0 — background honesty guard ────────────────────────────────────
|
|
149
|
+
/**
|
|
150
|
+
* Detect when the bot falsely promised "running in the background — you can
|
|
151
|
+
* keep chatting" but actually ran a sync Task/Agent that blocked the session.
|
|
152
|
+
*
|
|
153
|
+
* Returns true when all of the following hold:
|
|
154
|
+
* 1. A Task/Agent chunk arrived WITHOUT `run_in_background: true` (i.e. the
|
|
155
|
+
* stuck-timer entered sync mode — `taskChunkSeenWithoutRunInBackground`).
|
|
156
|
+
* 2. No real background detach happened this turn:
|
|
157
|
+
* • `mcp__alvin__dispatch_agent` was NOT called (`dispatchAgentFired=false`)
|
|
158
|
+
* • `pendingBackgroundCount` did NOT increase (`pendingBackgroundDelta=0`)
|
|
159
|
+
*
|
|
160
|
+
* Exported so it can be unit-tested without a grammy Context mock.
|
|
161
|
+
*/
|
|
162
|
+
export function detectUndetachedBackgroundClaim(args) {
|
|
163
|
+
if (!args.taskChunkSeenWithoutRunInBackground)
|
|
164
|
+
return false;
|
|
165
|
+
// Dead in production wiring (always false there — PATH A is detected via pendingBackgroundDelta); kept for explicit unit-test truth-table coverage.
|
|
166
|
+
if (args.dispatchAgentFired)
|
|
167
|
+
return false;
|
|
168
|
+
if (args.pendingBackgroundDelta > 0)
|
|
169
|
+
return false;
|
|
170
|
+
return true;
|
|
171
|
+
}
|
|
122
172
|
/** React to a message with an emoji. Silently fails if reactions aren't supported. */
|
|
123
173
|
async function react(ctx, emoji) {
|
|
124
174
|
try {
|
|
@@ -172,11 +222,23 @@ export async function handleMessage(ctx) {
|
|
|
172
222
|
// the new message gets processed immediately. The background task
|
|
173
223
|
// itself continues in its detached subprocess; the async-agent watcher
|
|
174
224
|
// delivers the result via subagent-delivery.ts when ready.
|
|
175
|
-
|
|
225
|
+
//
|
|
226
|
+
// v5.2 — decideMidTaskRouting unifies bypass / steer / queue in one place.
|
|
227
|
+
const _midTaskBypass = shouldBypassQueue({
|
|
176
228
|
isProcessing: session.isProcessing,
|
|
177
229
|
pendingBackgroundCount: session.pendingBackgroundCount,
|
|
178
230
|
abortController: session.abortController,
|
|
179
|
-
})
|
|
231
|
+
});
|
|
232
|
+
const _midTaskProviderIsSdk = getRegistry().getActive().config.type === "claude-sdk";
|
|
233
|
+
const _midTaskRoute = decideMidTaskRouting({
|
|
234
|
+
isProcessing: true,
|
|
235
|
+
providerIsClaudeSdk: _midTaskProviderIsSdk,
|
|
236
|
+
steeringEnabled: isSteeringEnabled(),
|
|
237
|
+
hasSteerChannel: !!session._steerChannel,
|
|
238
|
+
hasLiveSdkQuery: !!session._qHandle, // C-H3: require a live SDK query handle
|
|
239
|
+
shouldBypass: _midTaskBypass,
|
|
240
|
+
});
|
|
241
|
+
if (_midTaskRoute === "bypass") {
|
|
180
242
|
console.log(`[v4.12.3 bypass] aborting blocked query for ${sessionKey} — ` +
|
|
181
243
|
`${session.pendingBackgroundCount} background agent(s) pending`);
|
|
182
244
|
// Mark the abort as a bypass so the old handler's error branch
|
|
@@ -194,6 +256,35 @@ export async function handleMessage(ctx) {
|
|
|
194
256
|
await waitUntilProcessingFalse(session, 5000);
|
|
195
257
|
// Fall through to start a fresh query below.
|
|
196
258
|
}
|
|
259
|
+
else if (_midTaskRoute === "steer") {
|
|
260
|
+
// v5.2 — btw live steering: push mid-task message into the open
|
|
261
|
+
// SteerChannel so the running claude-sdk query picks it up as a
|
|
262
|
+
// streaming-input user message. No abort, no queue.
|
|
263
|
+
// C-L2: push() returns boolean — only 📨/ack when accepted; reply bufferFull otherwise.
|
|
264
|
+
const steerAccepted = session._steerChannel.push(text);
|
|
265
|
+
if (steerAccepted) {
|
|
266
|
+
await react(ctx, "📨");
|
|
267
|
+
if (!session._steerAckSentThisTurn) {
|
|
268
|
+
try {
|
|
269
|
+
await ctx.reply(t("bot.steer.ack", session.language));
|
|
270
|
+
}
|
|
271
|
+
catch {
|
|
272
|
+
/* harmless grammy race */
|
|
273
|
+
}
|
|
274
|
+
session._steerAckSentThisTurn = true;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
else {
|
|
278
|
+
// Buffer full or channel closed — tell the user honestly
|
|
279
|
+
try {
|
|
280
|
+
await ctx.reply(t("bot.steer.bufferFull", session.language));
|
|
281
|
+
}
|
|
282
|
+
catch {
|
|
283
|
+
/* harmless grammy race */
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
197
288
|
else {
|
|
198
289
|
// Normal queue behavior. v4.12.3 — emit a text reply in addition
|
|
199
290
|
// to the reaction so the user actually sees that their message
|
|
@@ -221,6 +312,13 @@ export async function handleMessage(ctx) {
|
|
|
221
312
|
}
|
|
222
313
|
session.isProcessing = true;
|
|
223
314
|
session.abortController = new AbortController();
|
|
315
|
+
// C-H2 — Stamp a per-turn identity token so the finally block can detect
|
|
316
|
+
// whether a NEW turn has already started before it runs. If requestStop
|
|
317
|
+
// fires mid-turn and allows a new message to start a fresh turn (with its
|
|
318
|
+
// own new abortController + _steerChannel), the old turn's finally sees the
|
|
319
|
+
// token mismatch and skips the clobber — preserving the new turn's state.
|
|
320
|
+
const _thisTurnId = crypto.randomUUID();
|
|
321
|
+
session._turnId = _thisTurnId;
|
|
224
322
|
// v4.12.3 — Clear any stale bypass flag from a previous aborted turn.
|
|
225
323
|
// The flag is set by the bypass path right before it calls abort(),
|
|
226
324
|
// read by the OLD handler's error path, and cleared here by the NEW
|
|
@@ -459,6 +557,19 @@ export async function handleMessage(ctx) {
|
|
|
459
557
|
// v5.1 — Store the SDK query handle so requestStop() can interrupt it.
|
|
460
558
|
onQueryHandle: (q) => { session._qHandle = q; },
|
|
461
559
|
};
|
|
560
|
+
// v5.2 — btw live steering: seed SteerChannel at turn start so mid-task
|
|
561
|
+
// user messages can be pushed in while this query is running. Only for
|
|
562
|
+
// claude-sdk (the only provider that supports streaming-input prompts).
|
|
563
|
+
// The initial bridged prompt is pushed first so the channel sequence is:
|
|
564
|
+
// [bridgedPrompt, <any mid-task messages>, <close on finally>]
|
|
565
|
+
// queryOpts.steerChannel is set so the provider uses the channel as the
|
|
566
|
+
// prompt source. queryOpts.prompt is kept as-is for non-SDK fallback paths
|
|
567
|
+
// (providers that don't support steerChannel ignore it and use prompt).
|
|
568
|
+
if (isSDK && isSteeringEnabled()) {
|
|
569
|
+
session._steerChannel = new SteerChannel();
|
|
570
|
+
session._steerChannel.push(bridgedPrompt);
|
|
571
|
+
queryOpts.steerChannel = session._steerChannel;
|
|
572
|
+
}
|
|
462
573
|
// Stream response from provider (with fallback)
|
|
463
574
|
let lastBroadcastLen = 0;
|
|
464
575
|
// Captured during tool_use chunks; consumed by tool_result chunks so
|
|
@@ -472,6 +583,13 @@ export async function handleMessage(ctx) {
|
|
|
472
583
|
// (the empty-stream capturedSessionId) and the next turn loops again.
|
|
473
584
|
// This is the second half of the empty-stream-loop fix.
|
|
474
585
|
let sessionResetInStream = false;
|
|
586
|
+
// Cycle-3 P0 — background honesty guard tracking.
|
|
587
|
+
// `syncTaskSeenWithoutRunInBackground`: lifted from the stuckTimer.enterSync
|
|
588
|
+
// site below — true once a Task/Agent chunk arrives with no runInBackground.
|
|
589
|
+
// `pendingBackgroundCountAtTurnStart`: snapshot before the stream so we can
|
|
590
|
+
// compute the delta at turn end (dispatch_agent increments this counter).
|
|
591
|
+
let syncTaskSeenWithoutRunInBackground = false;
|
|
592
|
+
const pendingBackgroundCountAtTurnStart = session.pendingBackgroundCount ?? 0;
|
|
475
593
|
for await (const chunk of registry.queryWithFallback(queryOpts, workspace.provider)) {
|
|
476
594
|
// v5.1 — Bail as soon as requestStop() marks the session. The registry's
|
|
477
595
|
// outer loop already guards against new provider attempts; this guard
|
|
@@ -488,6 +606,8 @@ export async function handleMessage(ctx) {
|
|
|
488
606
|
chunk.toolUseId &&
|
|
489
607
|
chunk.runInBackground !== true) {
|
|
490
608
|
stuckTimer.enterSync(chunk.toolUseId);
|
|
609
|
+
// Cycle-3 P0 — lift the signal for honesty guard (same condition)
|
|
610
|
+
syncTaskSeenWithoutRunInBackground = true;
|
|
491
611
|
}
|
|
492
612
|
else if (chunk.type === "tool_result" && chunk.toolUseId) {
|
|
493
613
|
// Any tool_result may match a pending sync entry. Set.delete is
|
|
@@ -644,6 +764,27 @@ export async function handleMessage(ctx) {
|
|
|
644
764
|
break;
|
|
645
765
|
}
|
|
646
766
|
}
|
|
767
|
+
// Cycle-3 P0 — background honesty guard.
|
|
768
|
+
// If the turn ran a sync Task/Agent (blocking) and no real detach happened
|
|
769
|
+
// (no dispatch_agent, no pendingBackgroundCount increase), append one
|
|
770
|
+
// truthful notice so the user is never left with a false async promise.
|
|
771
|
+
// This fires only on "normal" turn endings — bypass-abort and user-stop
|
|
772
|
+
// are handled below and don't need the notice (neither promises async).
|
|
773
|
+
if (!bypassAborted &&
|
|
774
|
+
!timedOut &&
|
|
775
|
+
!session._stopRequested &&
|
|
776
|
+
detectUndetachedBackgroundClaim({
|
|
777
|
+
taskChunkSeenWithoutRunInBackground: syncTaskSeenWithoutRunInBackground,
|
|
778
|
+
dispatchAgentFired: false, // used purely via pendingBackgroundDelta below
|
|
779
|
+
pendingBackgroundDelta: (session.pendingBackgroundCount ?? 0) - pendingBackgroundCountAtTurnStart,
|
|
780
|
+
})) {
|
|
781
|
+
try {
|
|
782
|
+
await ctx.reply(t("bot.background.syncNotice", session.language));
|
|
783
|
+
}
|
|
784
|
+
catch {
|
|
785
|
+
/* harmless — notice is best-effort */
|
|
786
|
+
}
|
|
787
|
+
}
|
|
647
788
|
// v5.1 stop: user stopped this query — do NOT finalize partial output
|
|
648
789
|
// as a successful answer, no 👍, no history commit. The stop trigger
|
|
649
790
|
// (/cancel | /stopall | ⛔ button) already acknowledged to the user.
|
|
@@ -724,11 +865,28 @@ export async function handleMessage(ctx) {
|
|
|
724
865
|
finally {
|
|
725
866
|
stuckTimer.cancel();
|
|
726
867
|
clearInterval(typingInterval);
|
|
727
|
-
|
|
728
|
-
session.
|
|
729
|
-
//
|
|
730
|
-
|
|
731
|
-
|
|
868
|
+
// C-H2 — Single-writer guard: only reset lifecycle fields if this turn's
|
|
869
|
+
// token still matches the session's current token. If requestStop fired
|
|
870
|
+
// mid-turn and a NEW turn has already started (and stamped a new _turnId),
|
|
871
|
+
// then _turnId !== _thisTurnId and we SKIP the reset — the new turn owns
|
|
872
|
+
// these fields. _qHandle and _stopRequested are included in the gate:
|
|
873
|
+
// requestStop already nulled _qHandle before returning (after interruptQuery),
|
|
874
|
+
// but if a new turn started and re-populated _qHandle via onQueryHandle we
|
|
875
|
+
// must NOT null it here — that would break Cycle-1 stop teeth for the new turn.
|
|
876
|
+
if (session._turnId === _thisTurnId) {
|
|
877
|
+
session.isProcessing = false;
|
|
878
|
+
session.abortController = null;
|
|
879
|
+
// v5.2 — Close and clear the SteerChannel; reset per-turn ack flag.
|
|
880
|
+
try {
|
|
881
|
+
session._steerChannel?.close();
|
|
882
|
+
}
|
|
883
|
+
catch { /* ignore */ }
|
|
884
|
+
session._steerChannel = null;
|
|
885
|
+
session._steerAckSentThisTurn = false;
|
|
886
|
+
session._qHandle = null; // safe: token matches → no newer turn owns this
|
|
887
|
+
session._stopRequested = null; // safe: token matches → no newer turn has set this
|
|
888
|
+
session._turnId = null;
|
|
889
|
+
}
|
|
732
890
|
// v5.1 — Remove the ⛔ Stop control message (sent at processing start).
|
|
733
891
|
// Best-effort: if it was already deleted or the bot lacks permission, ignore.
|
|
734
892
|
if (stopMsgId !== null) {
|
package/dist/i18n.js
CHANGED
|
@@ -331,6 +331,28 @@ const strings = {
|
|
|
331
331
|
es: "(externo, activo)",
|
|
332
332
|
fr: "(externe, en cours)",
|
|
333
333
|
},
|
|
334
|
+
// background honesty notice — emitted when a sync Task blocked the turn
|
|
335
|
+
// (Cycle-3 P0 fix: don't falsely promise "you can keep chatting")
|
|
336
|
+
"bot.background.syncNotice": {
|
|
337
|
+
en: "ℹ️ That ran inline and took a while — I couldn't take new messages until it finished.",
|
|
338
|
+
de: "ℹ️ Das lief inline und hat eine Weile gedauert — ich konnte währenddessen keine neuen Nachrichten entgegennehmen.",
|
|
339
|
+
es: "ℹ️ Eso se ejecutó en línea y tardó un rato — no pude recibir nuevos mensajes hasta que terminó.",
|
|
340
|
+
fr: "ℹ️ Cela s'est exécuté en ligne et a pris un moment — je ne pouvais pas recevoir de nouveaux messages tant que ce n'était pas terminé.",
|
|
341
|
+
},
|
|
342
|
+
// live steering ack (Task 4 — btw feature)
|
|
343
|
+
"bot.steer.ack": {
|
|
344
|
+
en: "📨 Noted — Alvin will factor that in without restarting.",
|
|
345
|
+
de: "📨 Mitgenommen — Alvin berücksichtigt das, ohne abzubrechen.",
|
|
346
|
+
es: "📨 Anotado — Alvin lo tendrá en cuenta sin reiniciar.",
|
|
347
|
+
fr: "📨 Noté — Alvin en tiendra compte sans redémarrer.",
|
|
348
|
+
},
|
|
349
|
+
// C-L2: steer buffer full — honest reply when the steer cap is reached
|
|
350
|
+
"bot.steer.bufferFull": {
|
|
351
|
+
en: "⚠️ Steer buffer full — this message wasn't queued. Alvin is still running; try again in a moment.",
|
|
352
|
+
de: "⚠️ Steer-Puffer voll — diese Nachricht wurde nicht übernommen. Alvin läuft noch; versuch es gleich nochmal.",
|
|
353
|
+
es: "⚠️ Búfer de dirección lleno — este mensaje no se añadió. Alvin sigue en marcha; inténtalo de nuevo en un momento.",
|
|
354
|
+
fr: "⚠️ Tampon de direction plein — ce message n'a pas été pris en compte. Alvin tourne toujours ; réessaie dans un instant.",
|
|
355
|
+
},
|
|
334
356
|
// /cancel
|
|
335
357
|
"bot.cancel.cancelling": {
|
|
336
358
|
en: "Cancelling request…",
|
package/dist/index.js
CHANGED
|
@@ -81,6 +81,18 @@ import { MEMORY_DIR as SEC_MEM_DIR, DATA_DIR as SEC_DATA_DIR } from "./paths.js"
|
|
|
81
81
|
console.warn(` ${r.path}: ${r.error}`);
|
|
82
82
|
}
|
|
83
83
|
}
|
|
84
|
+
// M5: Ensure DATA_DIR itself is 0700 (owner-only traverse). ensureDataDirs()
|
|
85
|
+
// above handles new installs; this belt-and-suspenders catches the case where
|
|
86
|
+
// the dir was created by a pre-M5 version with 0755 and the bot is restarting.
|
|
87
|
+
if (process.platform !== "win32") {
|
|
88
|
+
try {
|
|
89
|
+
const { chmodSync } = await import("fs");
|
|
90
|
+
chmodSync(SEC_DATA_DIR, 0o700);
|
|
91
|
+
}
|
|
92
|
+
catch {
|
|
93
|
+
// Best-effort — network filesystems may not support chmod
|
|
94
|
+
}
|
|
95
|
+
}
|
|
84
96
|
}
|
|
85
97
|
// 4. Crash-loop brake check — if we've crashed N times in a short window,
|
|
86
98
|
// refuse to start, write an alert file, and unload our LaunchAgent so
|
package/dist/init-data-dir.js
CHANGED
|
@@ -9,6 +9,12 @@ import { DATA_DIR, MEMORY_DIR, USERS_DIR, RUNTIME_DIR, WHATSAPP_AUTH, BACKUP_DIR
|
|
|
9
9
|
/**
|
|
10
10
|
* Create the directory structure only (no file seeding).
|
|
11
11
|
* Must run BEFORE migration so directories exist for copying.
|
|
12
|
+
*
|
|
13
|
+
* M5: DATA_DIR is created with mode 0700 (owner-only traverse) so that
|
|
14
|
+
* even before the per-file chmod audit runs, any file written by the bot
|
|
15
|
+
* is not accessible by other users on multi-user systems. On Windows,
|
|
16
|
+
* chmod is a no-op — we skip it silently to avoid alarming log output,
|
|
17
|
+
* mirroring how the file-permissions audit handles win32.
|
|
12
18
|
*/
|
|
13
19
|
export function ensureDataDirs() {
|
|
14
20
|
const dirs = [
|
|
@@ -27,6 +33,17 @@ export function ensureDataDirs() {
|
|
|
27
33
|
fs.mkdirSync(dir, { recursive: true });
|
|
28
34
|
}
|
|
29
35
|
}
|
|
36
|
+
// M5: Ensure the DATA_DIR itself is 0700 (owner-only). New dirs are
|
|
37
|
+
// created without an explicit mode above (inherits umask), so we chmod
|
|
38
|
+
// after creation. Windows doesn't support POSIX modes — skip silently.
|
|
39
|
+
if (process.platform !== "win32") {
|
|
40
|
+
try {
|
|
41
|
+
fs.chmodSync(DATA_DIR, 0o700);
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
// Best-effort — some network filesystems may not support chmod
|
|
45
|
+
}
|
|
46
|
+
}
|
|
30
47
|
}
|
|
31
48
|
/**
|
|
32
49
|
* Seed default files for a fresh install (only if they don't exist yet).
|
package/dist/middleware/auth.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import fs from "fs";
|
|
2
|
+
import crypto from "crypto";
|
|
2
3
|
import { InlineKeyboard } from "grammy";
|
|
3
4
|
import { config } from "../config.js";
|
|
4
5
|
import { APPROVED_USERS_FILE } from "../paths.js";
|
|
@@ -43,7 +44,7 @@ export function isApprovedUser(userId) {
|
|
|
43
44
|
const MAX_PENDING = 3;
|
|
44
45
|
const pendingPairings = new Map(); // code → pairing
|
|
45
46
|
function generateCode() {
|
|
46
|
-
return String(
|
|
47
|
+
return String(crypto.randomInt(100000, 1000000));
|
|
47
48
|
}
|
|
48
49
|
function cleanExpired() {
|
|
49
50
|
const now = Date.now();
|
|
@@ -211,5 +212,22 @@ export async function authMiddleware(ctx, next) {
|
|
|
211
212
|
return;
|
|
212
213
|
}
|
|
213
214
|
// ── Callback queries (inline keyboards) ─────────
|
|
215
|
+
// Only allowedUsers may trigger admin action callbacks (approve/deny).
|
|
216
|
+
// Other callbacks (e.g. pairing-mode approved users) continue through.
|
|
217
|
+
if (userId && config.allowedUsers.includes(userId)) {
|
|
218
|
+
await next();
|
|
219
|
+
return;
|
|
220
|
+
}
|
|
221
|
+
// Unknown users: silently drop admin-action callbacks to prevent
|
|
222
|
+
// approval forgery / self-approval. Non-admin callbacks from pairing-
|
|
223
|
+
// approved users in "pairing" mode are also gated here intentionally;
|
|
224
|
+
// the approve flow is an admin-only action.
|
|
225
|
+
const callbackData = ctx.callbackQuery?.data || "";
|
|
226
|
+
const isAdminCallback = /^(pair|access|wa):(approve|deny|block):/.test(callbackData);
|
|
227
|
+
if (isAdminCallback) {
|
|
228
|
+
// Silently drop — no answer (grammy will time-out the spinner client-side)
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
// Non-admin callbacks from unknown users: pass through (e.g. inline mode)
|
|
214
232
|
await next();
|
|
215
233
|
}
|
|
@@ -174,7 +174,9 @@ export class ClaudeSDKProvider {
|
|
|
174
174
|
const primaryIsHaiku = (modelOverride ?? "").toLowerCase().includes("haiku");
|
|
175
175
|
const fallbackModel = primaryIsHaiku ? undefined : "haiku";
|
|
176
176
|
const q = query({
|
|
177
|
-
prompt
|
|
177
|
+
prompt: options.steerChannel
|
|
178
|
+
? options.steerChannel
|
|
179
|
+
: prompt,
|
|
178
180
|
options: {
|
|
179
181
|
cwd: options.workingDir || process.cwd(),
|
|
180
182
|
abortController: internalAbortController,
|