alvin-bot 4.12.0 → 4.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,7 +21,7 @@ vi.mock("../src/engine.js", () => ({
21
21
  }),
22
22
  }));
23
23
 
24
- describe("sub-agents toolset (G1)", () => {
24
+ describe("sub-agents toolset (G1, extended v4.12.2)", () => {
25
25
  it("accepts toolset='full'", async () => {
26
26
  const mod = await import("../src/services/subagents.js");
27
27
  const id = await mod.spawnSubAgent({
@@ -38,13 +38,33 @@ describe("sub-agents toolset (G1)", () => {
38
38
  expect(typeof id).toBe("string");
39
39
  });
40
40
 
41
+ it("accepts toolset='readonly' (v4.12.2 — read-only sub-agents)", async () => {
42
+ const mod = await import("../src/services/subagents.js");
43
+ const id = await mod.spawnSubAgent({
44
+ name: "tool-readonly",
45
+ prompt: "hi",
46
+ toolset: "readonly",
47
+ });
48
+ expect(typeof id).toBe("string");
49
+ });
50
+
51
+ it("accepts toolset='research' (v4.12.2 — readonly + web)", async () => {
52
+ const mod = await import("../src/services/subagents.js");
53
+ const id = await mod.spawnSubAgent({
54
+ name: "tool-research",
55
+ prompt: "hi",
56
+ toolset: "research",
57
+ });
58
+ expect(typeof id).toBe("string");
59
+ });
60
+
41
61
  it("rejects unknown toolset values at runtime", async () => {
42
62
  const mod = await import("../src/services/subagents.js");
43
63
  await expect(
44
64
  mod.spawnSubAgent({
45
65
  name: "tool-bogus",
46
66
  prompt: "hi",
47
- toolset: "readonly" as unknown as "full",
67
+ toolset: "nonsense-preset" as unknown as "full",
48
68
  }),
49
69
  ).rejects.toThrow(/toolset/i);
50
70
  });
@@ -0,0 +1,153 @@
1
+ /**
2
+ * v4.12.1 — Integration test: sync Agent tool call with long silence
3
+ * does NOT trigger the stuck timeout abort.
4
+ *
5
+ * Before v4.12.1: a Task tool call WITHOUT run_in_background: true
6
+ * running silently for >10 minutes triggered STUCK_TIMEOUT_MS and
7
+ * aborted the main session — even though the sub-agent was working
8
+ * legitimately (it just can't emit intermediate chunks to the parent
9
+ * stream).
10
+ *
11
+ * After v4.12.1: the stuck timer escalates to SYNC_AGENT_IDLE_TIMEOUT_MS
12
+ * (120 min) as soon as the sync tool_use is detected (tracked by
13
+ * toolUseId), and only reverts to the normal timeout after the matching
14
+ * tool_result arrives.
15
+ *
16
+ * This test uses the pure createStuckTimer state machine directly —
17
+ * the real integration into the message handler's for-await loop is
18
+ * covered by the Task A unit tests and manual smoke tests. What this
19
+ * file verifies is the COMBINED flow (normal → enterSync → exitSync →
20
+ * normal) over realistic timing scales.
21
+ */
22
+ import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
23
+ import { createStuckTimer } from "../src/handlers/stuck-timer.js";
24
+
25
+ describe("sync Task tool call stuck-timer integration (v4.12.1)", () => {
26
+ beforeEach(() => vi.useFakeTimers());
27
+ afterEach(() => vi.useRealTimers());
28
+
29
+ it("30-min silent sync Task gap does NOT fire the 10-min normal timer", () => {
30
+ const onTimeout = vi.fn();
31
+ const t = createStuckTimer({
32
+ normalMs: 10 * 60 * 1000, // 10 min — production default
33
+ extendedMs: 120 * 60 * 1000, // 120 min — production default
34
+ onTimeout,
35
+ });
36
+
37
+ // Simulate: handler begins streaming, first chunk arrives
38
+ t.reset();
39
+
40
+ // Assistant text chunk arrives
41
+ t.reset();
42
+
43
+ // tool_use with Task, runInBackground NOT true → sync path
44
+ t.enterSync("toolu_sync_123");
45
+
46
+ // 30 min of silence (no chunks, no resets) — sub-agent is working
47
+ vi.advanceTimersByTime(30 * 60 * 1000);
48
+
49
+ // MUST NOT have fired — we're in extended mode (120 min cap)
50
+ expect(onTimeout).not.toHaveBeenCalled();
51
+
52
+ // tool_result finally arrives
53
+ t.exitSync("toolu_sync_123");
54
+ t.reset();
55
+
56
+ // Subsequent 10 minutes of silence SHOULD fire (back to normal mode)
57
+ vi.advanceTimersByTime(10 * 60 * 1000);
58
+ expect(onTimeout).toHaveBeenCalledTimes(1);
59
+ });
60
+
61
+ it("async Task (runInBackground=true) uses normal timeout (handler does NOT call enterSync)", () => {
62
+ // Simulates the decision flow: the handler only calls enterSync
63
+ // when chunk.runInBackground !== true. For async tasks, enterSync
64
+ // is NEVER called, so the normal 10-min timer applies to any gap
65
+ // before the watcher delivers (which is a separate path).
66
+ const onTimeout = vi.fn();
67
+ const t = createStuckTimer({
68
+ normalMs: 10 * 60 * 1000,
69
+ extendedMs: 120 * 60 * 1000,
70
+ onTimeout,
71
+ });
72
+
73
+ t.reset();
74
+ // Async path: the async tool_result arrives almost immediately
75
+ // (the SDK returns "Async agent launched successfully" quickly)
76
+ t.reset();
77
+ // Then the parent turn ends normally within a few seconds
78
+ // ... but if something went wrong and the parent stream hangs,
79
+ // the normal 10-min timeout applies:
80
+ vi.advanceTimersByTime(11 * 60 * 1000);
81
+ expect(onTimeout).toHaveBeenCalledTimes(1);
82
+ });
83
+
84
+ it("cancel during extended mode stops cleanly (handler finally block)", () => {
85
+ const onTimeout = vi.fn();
86
+ const t = createStuckTimer({
87
+ normalMs: 10 * 60 * 1000,
88
+ extendedMs: 120 * 60 * 1000,
89
+ onTimeout,
90
+ });
91
+
92
+ t.enterSync("toolu_1");
93
+
94
+ // Simulate: partway through a sync task, something errors out
95
+ // and the handler reaches its finally block
96
+ vi.advanceTimersByTime(60 * 60 * 1000);
97
+ t.cancel();
98
+
99
+ // Another 60 min pass — no firing because cancel cleared the timer
100
+ vi.advanceTimersByTime(60 * 60 * 1000);
101
+ expect(onTimeout).not.toHaveBeenCalled();
102
+ });
103
+
104
+ it("multiple parallel sync tasks (nested Agent calls): extended until ALL complete", () => {
105
+ // Edge case: if two parent-level sync tool_use blocks land in
106
+ // the same assistant message, both get tracked. The extended
107
+ // timer must stay armed until BOTH exit.
108
+ const onTimeout = vi.fn();
109
+ const t = createStuckTimer({
110
+ normalMs: 10 * 60 * 1000,
111
+ extendedMs: 120 * 60 * 1000,
112
+ onTimeout,
113
+ });
114
+
115
+ t.enterSync("toolu_parallel_1");
116
+ t.enterSync("toolu_parallel_2");
117
+ expect(t._pendingCount()).toBe(2);
118
+
119
+ // First finishes
120
+ vi.advanceTimersByTime(20 * 60 * 1000);
121
+ t.exitSync("toolu_parallel_1");
122
+ expect(t._pendingCount()).toBe(1);
123
+
124
+ // Second still running — another 30 min of silence
125
+ vi.advanceTimersByTime(30 * 60 * 1000);
126
+ expect(onTimeout).not.toHaveBeenCalled();
127
+
128
+ // Second finishes
129
+ t.exitSync("toolu_parallel_2");
130
+ t.reset();
131
+
132
+ // Now back to normal timeout — should fire after 10 min
133
+ vi.advanceTimersByTime(10 * 60 * 1000);
134
+ expect(onTimeout).toHaveBeenCalledTimes(1);
135
+ });
136
+
137
+ it("regression guard: old behavior (no task tracking, flat 10-min) would have false-aborted", () => {
138
+ // This test is a documentation-as-code artifact: it simulates
139
+ // what the OLD code did and verifies it WOULD have false-aborted.
140
+ // If we ever revert the fix, this test will catch the regression
141
+ // by asserting the old behavior fires at exactly 10 min of silence.
142
+ const onTimeout = vi.fn();
143
+ const flatTimer = createStuckTimer({
144
+ normalMs: 10 * 60 * 1000,
145
+ extendedMs: 10 * 60 * 1000, // identical → simulates pre-v4.12.1 behavior
146
+ onTimeout,
147
+ });
148
+ flatTimer.enterSync("toolu_1");
149
+ vi.advanceTimersByTime(10 * 60 * 1000);
150
+ // With the flat timer (pre-fix), a 10-min sync gap DOES fire
151
+ expect(onTimeout).toHaveBeenCalledTimes(1);
152
+ });
153
+ });
@@ -45,4 +45,21 @@ describe("buildSystemPrompt background-subagent hint (Stage 1)", () => {
45
45
  expect(de).toMatch(/run_in_background/);
46
46
  expect(es).toMatch(/run_in_background/);
47
47
  });
48
+
49
+ it("uses CRITICAL framing and decision-tree structure (v4.12.1)", () => {
50
+ const prompt = buildSystemPrompt(true, "en", "1234");
51
+ expect(prompt).toMatch(/CRITICAL/);
52
+ expect(prompt).toMatch(/decision tree/i);
53
+ });
54
+
55
+ it("explicitly warns about Telegram session blocking (v4.12.1)", () => {
56
+ const prompt = buildSystemPrompt(true, "en", "1234");
57
+ expect(prompt.toLowerCase()).toMatch(/blocked|blocking/);
58
+ expect(prompt.toLowerCase()).toMatch(/telegram/);
59
+ });
60
+
61
+ it("aggressive 30-second threshold (v4.12.1, previously 2 minutes)", () => {
62
+ const prompt = buildSystemPrompt(true, "en", "1234");
63
+ expect(prompt).toMatch(/30\s*seconds?/i);
64
+ });
48
65
  });
@@ -0,0 +1,65 @@
1
+ /**
2
+ * v4.12.2 — Timing-safe bearer token comparison.
3
+ *
4
+ * The webhook auth check at src/web/server.ts:127 previously used naive
5
+ * string equality on the Authorization header. That's vulnerable (in
6
+ * principle) to timing side-channel attacks where an attacker measures
7
+ * response times to leak the token character by character.
8
+ *
9
+ * Real-world exploitability over network is low due to jitter, but
10
+ * crypto.timingSafeEqual is the right tool regardless.
11
+ *
12
+ * This test covers the pure helper; the integration is in server.ts.
13
+ */
14
+ import { describe, it, expect } from "vitest";
15
+ import { timingSafeBearerMatch } from "../src/services/timing-safe-bearer.js";
16
+
17
+ describe("timing-safe bearer token comparison (v4.12.2)", () => {
18
+ it("matches a correct token", () => {
19
+ expect(timingSafeBearerMatch("Bearer abc123xyz", "abc123xyz")).toBe(true);
20
+ });
21
+
22
+ it("rejects an incorrect token", () => {
23
+ expect(timingSafeBearerMatch("Bearer wrong", "abc123xyz")).toBe(false);
24
+ });
25
+
26
+ it("rejects when Bearer prefix is missing", () => {
27
+ expect(timingSafeBearerMatch("abc123xyz", "abc123xyz")).toBe(false);
28
+ });
29
+
30
+ it("rejects when auth header is empty", () => {
31
+ expect(timingSafeBearerMatch("", "abc123xyz")).toBe(false);
32
+ });
33
+
34
+ it("rejects when auth header is undefined", () => {
35
+ expect(timingSafeBearerMatch(undefined, "abc123xyz")).toBe(false);
36
+ });
37
+
38
+ it("rejects when expected token is empty (prevents accidental auth bypass)", () => {
39
+ expect(timingSafeBearerMatch("Bearer anything", "")).toBe(false);
40
+ expect(timingSafeBearerMatch("Bearer ", "")).toBe(false);
41
+ expect(timingSafeBearerMatch("", "")).toBe(false);
42
+ });
43
+
44
+ it("rejects tokens of different lengths without revealing prefix match", () => {
45
+ expect(timingSafeBearerMatch("Bearer abc", "abcdefg")).toBe(false);
46
+ expect(timingSafeBearerMatch("Bearer abcdefg", "abc")).toBe(false);
47
+ });
48
+
49
+ it("handles unicode tokens (not that we'd use them, but correctness)", () => {
50
+ expect(timingSafeBearerMatch("Bearer 🔒xyz", "🔒xyz")).toBe(true);
51
+ expect(timingSafeBearerMatch("Bearer 🔒xyz", "🔒xYz")).toBe(false);
52
+ });
53
+
54
+ it("case-sensitive comparison (tokens are opaque)", () => {
55
+ expect(timingSafeBearerMatch("Bearer AbCdEf", "abcdef")).toBe(false);
56
+ expect(timingSafeBearerMatch("Bearer AbCdEf", "AbCdEf")).toBe(true);
57
+ });
58
+
59
+ it("rejects Bearer with leading/trailing whitespace mismatches the expected format", () => {
60
+ // RFC 6750 says: Authorization: Bearer <token>
61
+ // Exactly one space between "Bearer" and the token.
62
+ expect(timingSafeBearerMatch("Bearer abc", "abc")).toBe(false); // double space
63
+ expect(timingSafeBearerMatch(" Bearer abc", "abc")).toBe(false); // leading space
64
+ });
65
+ });