alvin-bot 4.12.0 → 4.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +124 -0
- package/README.md +186 -21
- package/dist/handlers/commands.js +6 -0
- package/dist/handlers/message.js +54 -15
- package/dist/handlers/stuck-timer.js +54 -0
- package/dist/index.js +75 -3
- package/dist/providers/claude-sdk-provider.js +29 -1
- package/dist/services/allowed-users-gate.js +56 -0
- package/dist/services/cron.js +17 -0
- package/dist/services/exec-guard.js +26 -1
- package/dist/services/fallback-order.js +4 -1
- package/dist/services/file-permissions.js +93 -0
- package/dist/services/personality.js +55 -30
- package/dist/services/session-persistence.js +14 -2
- package/dist/services/subagents.js +23 -5
- package/dist/services/timing-safe-bearer.js +51 -0
- package/dist/web/doctor-api.js +8 -2
- package/dist/web/server.js +7 -3
- package/dist/web/setup-api.js +5 -2
- package/docs/security.md +279 -0
- package/package.json +4 -1
- package/skills/social-fetch/SKILL.md +385 -0
- package/skills/webcheck/SKILL.md +150 -0
- package/test/allowed-users-gate.test.ts +98 -0
- package/test/claude-sdk-tool-use-id.test.ts +180 -0
- package/test/exec-guard-metachars.test.ts +110 -0
- package/test/file-permissions.test.ts +130 -0
- package/test/stuck-timer.test.ts +116 -0
- package/test/subagent-toolset-allowlist.test.ts +146 -0
- package/test/subagents-toolset.test.ts +22 -2
- package/test/sync-task-timeout.test.ts +153 -0
- package/test/system-prompt-background-hint.test.ts +17 -0
- package/test/timing-safe-bearer.test.ts +65 -0
|
@@ -21,7 +21,7 @@ vi.mock("../src/engine.js", () => ({
|
|
|
21
21
|
}),
|
|
22
22
|
}));
|
|
23
23
|
|
|
24
|
-
describe("sub-agents toolset (G1)", () => {
|
|
24
|
+
describe("sub-agents toolset (G1, extended v4.12.2)", () => {
|
|
25
25
|
it("accepts toolset='full'", async () => {
|
|
26
26
|
const mod = await import("../src/services/subagents.js");
|
|
27
27
|
const id = await mod.spawnSubAgent({
|
|
@@ -38,13 +38,33 @@ describe("sub-agents toolset (G1)", () => {
|
|
|
38
38
|
expect(typeof id).toBe("string");
|
|
39
39
|
});
|
|
40
40
|
|
|
41
|
+
it("accepts toolset='readonly' (v4.12.2 — read-only sub-agents)", async () => {
|
|
42
|
+
const mod = await import("../src/services/subagents.js");
|
|
43
|
+
const id = await mod.spawnSubAgent({
|
|
44
|
+
name: "tool-readonly",
|
|
45
|
+
prompt: "hi",
|
|
46
|
+
toolset: "readonly",
|
|
47
|
+
});
|
|
48
|
+
expect(typeof id).toBe("string");
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("accepts toolset='research' (v4.12.2 — readonly + web)", async () => {
|
|
52
|
+
const mod = await import("../src/services/subagents.js");
|
|
53
|
+
const id = await mod.spawnSubAgent({
|
|
54
|
+
name: "tool-research",
|
|
55
|
+
prompt: "hi",
|
|
56
|
+
toolset: "research",
|
|
57
|
+
});
|
|
58
|
+
expect(typeof id).toBe("string");
|
|
59
|
+
});
|
|
60
|
+
|
|
41
61
|
it("rejects unknown toolset values at runtime", async () => {
|
|
42
62
|
const mod = await import("../src/services/subagents.js");
|
|
43
63
|
await expect(
|
|
44
64
|
mod.spawnSubAgent({
|
|
45
65
|
name: "tool-bogus",
|
|
46
66
|
prompt: "hi",
|
|
47
|
-
toolset: "
|
|
67
|
+
toolset: "nonsense-preset" as unknown as "full",
|
|
48
68
|
}),
|
|
49
69
|
).rejects.toThrow(/toolset/i);
|
|
50
70
|
});
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v4.12.1 — Integration test: sync Agent tool call with long silence
|
|
3
|
+
* does NOT trigger the stuck timeout abort.
|
|
4
|
+
*
|
|
5
|
+
* Before v4.12.1: a Task tool call WITHOUT run_in_background: true
|
|
6
|
+
* running silently for >10 minutes triggered STUCK_TIMEOUT_MS and
|
|
7
|
+
* aborted the main session — even though the sub-agent was working
|
|
8
|
+
* legitimately (it just can't emit intermediate chunks to the parent
|
|
9
|
+
* stream).
|
|
10
|
+
*
|
|
11
|
+
* After v4.12.1: the stuck timer escalates to SYNC_AGENT_IDLE_TIMEOUT_MS
|
|
12
|
+
* (120 min) as soon as the sync tool_use is detected (tracked by
|
|
13
|
+
* toolUseId), and only reverts to the normal timeout after the matching
|
|
14
|
+
* tool_result arrives.
|
|
15
|
+
*
|
|
16
|
+
* This test uses the pure createStuckTimer state machine directly —
|
|
17
|
+
* the real integration into the message handler's for-await loop is
|
|
18
|
+
* covered by the Task A unit tests and manual smoke tests. What this
|
|
19
|
+
* file verifies is the COMBINED flow (normal → enterSync → exitSync →
|
|
20
|
+
* normal) over realistic timing scales.
|
|
21
|
+
*/
|
|
22
|
+
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
|
23
|
+
import { createStuckTimer } from "../src/handlers/stuck-timer.js";
|
|
24
|
+
|
|
25
|
+
describe("sync Task tool call stuck-timer integration (v4.12.1)", () => {
|
|
26
|
+
beforeEach(() => vi.useFakeTimers());
|
|
27
|
+
afterEach(() => vi.useRealTimers());
|
|
28
|
+
|
|
29
|
+
it("30-min silent sync Task gap does NOT fire the 10-min normal timer", () => {
|
|
30
|
+
const onTimeout = vi.fn();
|
|
31
|
+
const t = createStuckTimer({
|
|
32
|
+
normalMs: 10 * 60 * 1000, // 10 min — production default
|
|
33
|
+
extendedMs: 120 * 60 * 1000, // 120 min — production default
|
|
34
|
+
onTimeout,
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
// Simulate: handler begins streaming, first chunk arrives
|
|
38
|
+
t.reset();
|
|
39
|
+
|
|
40
|
+
// Assistant text chunk arrives
|
|
41
|
+
t.reset();
|
|
42
|
+
|
|
43
|
+
// tool_use with Task, runInBackground NOT true → sync path
|
|
44
|
+
t.enterSync("toolu_sync_123");
|
|
45
|
+
|
|
46
|
+
// 30 min of silence (no chunks, no resets) — sub-agent is working
|
|
47
|
+
vi.advanceTimersByTime(30 * 60 * 1000);
|
|
48
|
+
|
|
49
|
+
// MUST NOT have fired — we're in extended mode (120 min cap)
|
|
50
|
+
expect(onTimeout).not.toHaveBeenCalled();
|
|
51
|
+
|
|
52
|
+
// tool_result finally arrives
|
|
53
|
+
t.exitSync("toolu_sync_123");
|
|
54
|
+
t.reset();
|
|
55
|
+
|
|
56
|
+
// Subsequent 10 minutes of silence SHOULD fire (back to normal mode)
|
|
57
|
+
vi.advanceTimersByTime(10 * 60 * 1000);
|
|
58
|
+
expect(onTimeout).toHaveBeenCalledTimes(1);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it("async Task (runInBackground=true) uses normal timeout (handler does NOT call enterSync)", () => {
|
|
62
|
+
// Simulates the decision flow: the handler only calls enterSync
|
|
63
|
+
// when chunk.runInBackground !== true. For async tasks, enterSync
|
|
64
|
+
// is NEVER called, so the normal 10-min timer applies to any gap
|
|
65
|
+
// before the watcher delivers (which is a separate path).
|
|
66
|
+
const onTimeout = vi.fn();
|
|
67
|
+
const t = createStuckTimer({
|
|
68
|
+
normalMs: 10 * 60 * 1000,
|
|
69
|
+
extendedMs: 120 * 60 * 1000,
|
|
70
|
+
onTimeout,
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
t.reset();
|
|
74
|
+
// Async path: the async tool_result arrives almost immediately
|
|
75
|
+
// (the SDK returns "Async agent launched successfully" quickly)
|
|
76
|
+
t.reset();
|
|
77
|
+
// Then the parent turn ends normally within a few seconds
|
|
78
|
+
// ... but if something went wrong and the parent stream hangs,
|
|
79
|
+
// the normal 10-min timeout applies:
|
|
80
|
+
vi.advanceTimersByTime(11 * 60 * 1000);
|
|
81
|
+
expect(onTimeout).toHaveBeenCalledTimes(1);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
it("cancel during extended mode stops cleanly (handler finally block)", () => {
|
|
85
|
+
const onTimeout = vi.fn();
|
|
86
|
+
const t = createStuckTimer({
|
|
87
|
+
normalMs: 10 * 60 * 1000,
|
|
88
|
+
extendedMs: 120 * 60 * 1000,
|
|
89
|
+
onTimeout,
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
t.enterSync("toolu_1");
|
|
93
|
+
|
|
94
|
+
// Simulate: partway through a sync task, something errors out
|
|
95
|
+
// and the handler reaches its finally block
|
|
96
|
+
vi.advanceTimersByTime(60 * 60 * 1000);
|
|
97
|
+
t.cancel();
|
|
98
|
+
|
|
99
|
+
// Another 60 min pass — no firing because cancel cleared the timer
|
|
100
|
+
vi.advanceTimersByTime(60 * 60 * 1000);
|
|
101
|
+
expect(onTimeout).not.toHaveBeenCalled();
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it("multiple parallel sync tasks (nested Agent calls): extended until ALL complete", () => {
|
|
105
|
+
// Edge case: if two parent-level sync tool_use blocks land in
|
|
106
|
+
// the same assistant message, both get tracked. The extended
|
|
107
|
+
// timer must stay armed until BOTH exit.
|
|
108
|
+
const onTimeout = vi.fn();
|
|
109
|
+
const t = createStuckTimer({
|
|
110
|
+
normalMs: 10 * 60 * 1000,
|
|
111
|
+
extendedMs: 120 * 60 * 1000,
|
|
112
|
+
onTimeout,
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
t.enterSync("toolu_parallel_1");
|
|
116
|
+
t.enterSync("toolu_parallel_2");
|
|
117
|
+
expect(t._pendingCount()).toBe(2);
|
|
118
|
+
|
|
119
|
+
// First finishes
|
|
120
|
+
vi.advanceTimersByTime(20 * 60 * 1000);
|
|
121
|
+
t.exitSync("toolu_parallel_1");
|
|
122
|
+
expect(t._pendingCount()).toBe(1);
|
|
123
|
+
|
|
124
|
+
// Second still running — another 30 min of silence
|
|
125
|
+
vi.advanceTimersByTime(30 * 60 * 1000);
|
|
126
|
+
expect(onTimeout).not.toHaveBeenCalled();
|
|
127
|
+
|
|
128
|
+
// Second finishes
|
|
129
|
+
t.exitSync("toolu_parallel_2");
|
|
130
|
+
t.reset();
|
|
131
|
+
|
|
132
|
+
// Now back to normal timeout — should fire after 10 min
|
|
133
|
+
vi.advanceTimersByTime(10 * 60 * 1000);
|
|
134
|
+
expect(onTimeout).toHaveBeenCalledTimes(1);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
it("regression guard: old behavior (no task tracking, flat 10-min) would have false-aborted", () => {
|
|
138
|
+
// This test is a documentation-as-code artifact: it simulates
|
|
139
|
+
// what the OLD code did and verifies it WOULD have false-aborted.
|
|
140
|
+
// If we ever revert the fix, this test will catch the regression
|
|
141
|
+
// by asserting the old behavior fires at exactly 10 min of silence.
|
|
142
|
+
const onTimeout = vi.fn();
|
|
143
|
+
const flatTimer = createStuckTimer({
|
|
144
|
+
normalMs: 10 * 60 * 1000,
|
|
145
|
+
extendedMs: 10 * 60 * 1000, // identical → simulates pre-v4.12.1 behavior
|
|
146
|
+
onTimeout,
|
|
147
|
+
});
|
|
148
|
+
flatTimer.enterSync("toolu_1");
|
|
149
|
+
vi.advanceTimersByTime(10 * 60 * 1000);
|
|
150
|
+
// With the flat timer (pre-fix), a 10-min sync gap DOES fire
|
|
151
|
+
expect(onTimeout).toHaveBeenCalledTimes(1);
|
|
152
|
+
});
|
|
153
|
+
});
|
|
@@ -45,4 +45,21 @@ describe("buildSystemPrompt background-subagent hint (Stage 1)", () => {
|
|
|
45
45
|
expect(de).toMatch(/run_in_background/);
|
|
46
46
|
expect(es).toMatch(/run_in_background/);
|
|
47
47
|
});
|
|
48
|
+
|
|
49
|
+
it("uses CRITICAL framing and decision-tree structure (v4.12.1)", () => {
|
|
50
|
+
const prompt = buildSystemPrompt(true, "en", "1234");
|
|
51
|
+
expect(prompt).toMatch(/CRITICAL/);
|
|
52
|
+
expect(prompt).toMatch(/decision tree/i);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("explicitly warns about Telegram session blocking (v4.12.1)", () => {
|
|
56
|
+
const prompt = buildSystemPrompt(true, "en", "1234");
|
|
57
|
+
expect(prompt.toLowerCase()).toMatch(/blocked|blocking/);
|
|
58
|
+
expect(prompt.toLowerCase()).toMatch(/telegram/);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it("aggressive 30-second threshold (v4.12.1, previously 2 minutes)", () => {
|
|
62
|
+
const prompt = buildSystemPrompt(true, "en", "1234");
|
|
63
|
+
expect(prompt).toMatch(/30\s*seconds?/i);
|
|
64
|
+
});
|
|
48
65
|
});
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v4.12.2 — Timing-safe bearer token comparison.
|
|
3
|
+
*
|
|
4
|
+
* The webhook auth check at src/web/server.ts:127 previously used naive
|
|
5
|
+
* string equality on the Authorization header. That's vulnerable (in
|
|
6
|
+
* principle) to timing side-channel attacks where an attacker measures
|
|
7
|
+
* response times to leak the token character by character.
|
|
8
|
+
*
|
|
9
|
+
* Real-world exploitability over network is low due to jitter, but
|
|
10
|
+
* crypto.timingSafeEqual is the right tool regardless.
|
|
11
|
+
*
|
|
12
|
+
* This test covers the pure helper; the integration is in server.ts.
|
|
13
|
+
*/
|
|
14
|
+
import { describe, it, expect } from "vitest";
|
|
15
|
+
import { timingSafeBearerMatch } from "../src/services/timing-safe-bearer.js";
|
|
16
|
+
|
|
17
|
+
describe("timing-safe bearer token comparison (v4.12.2)", () => {
|
|
18
|
+
it("matches a correct token", () => {
|
|
19
|
+
expect(timingSafeBearerMatch("Bearer abc123xyz", "abc123xyz")).toBe(true);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it("rejects an incorrect token", () => {
|
|
23
|
+
expect(timingSafeBearerMatch("Bearer wrong", "abc123xyz")).toBe(false);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it("rejects when Bearer prefix is missing", () => {
|
|
27
|
+
expect(timingSafeBearerMatch("abc123xyz", "abc123xyz")).toBe(false);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("rejects when auth header is empty", () => {
|
|
31
|
+
expect(timingSafeBearerMatch("", "abc123xyz")).toBe(false);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("rejects when auth header is undefined", () => {
|
|
35
|
+
expect(timingSafeBearerMatch(undefined, "abc123xyz")).toBe(false);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it("rejects when expected token is empty (prevents accidental auth bypass)", () => {
|
|
39
|
+
expect(timingSafeBearerMatch("Bearer anything", "")).toBe(false);
|
|
40
|
+
expect(timingSafeBearerMatch("Bearer ", "")).toBe(false);
|
|
41
|
+
expect(timingSafeBearerMatch("", "")).toBe(false);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it("rejects tokens of different lengths without revealing prefix match", () => {
|
|
45
|
+
expect(timingSafeBearerMatch("Bearer abc", "abcdefg")).toBe(false);
|
|
46
|
+
expect(timingSafeBearerMatch("Bearer abcdefg", "abc")).toBe(false);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("handles unicode tokens (not that we'd use them, but correctness)", () => {
|
|
50
|
+
expect(timingSafeBearerMatch("Bearer 🔒xyz", "🔒xyz")).toBe(true);
|
|
51
|
+
expect(timingSafeBearerMatch("Bearer 🔒xyz", "🔒xYz")).toBe(false);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("case-sensitive comparison (tokens are opaque)", () => {
|
|
55
|
+
expect(timingSafeBearerMatch("Bearer AbCdEf", "abcdef")).toBe(false);
|
|
56
|
+
expect(timingSafeBearerMatch("Bearer AbCdEf", "AbCdEf")).toBe(true);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it("rejects Bearer with leading/trailing whitespace mismatches the expected format", () => {
|
|
60
|
+
// RFC 6750 says: Authorization: Bearer <token>
|
|
61
|
+
// Exactly one space between "Bearer" and the token.
|
|
62
|
+
expect(timingSafeBearerMatch("Bearer abc", "abc")).toBe(false); // double space
|
|
63
|
+
expect(timingSafeBearerMatch(" Bearer abc", "abc")).toBe(false); // leading space
|
|
64
|
+
});
|
|
65
|
+
});
|