alvin-bot 4.12.2 → 4.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,417 @@
1
+ /**
2
+ * v4.12.3 — Stress + edge-case tests for the bypass path.
3
+ *
4
+ * These tests exercise scenarios that aren't part of the happy path
5
+ * but should hold up in real-world use:
6
+ * - Many parallel sessions
7
+ * - Rapid churn (launch/deliver cycles)
8
+ * - Memory hygiene (no residual in-memory state after delivery)
9
+ * - Race conditions: delivery fires while counter is mid-update
10
+ * - Extreme counter drift (more deliveries than launches)
11
+ * - waitUntilProcessingFalse timeout paths
12
+ */
13
+ import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
14
+ import fs from "fs";
15
+ import os from "os";
16
+ import { resolve } from "path";
17
+
18
+ const TEST_DATA_DIR = resolve(
19
+ os.tmpdir(),
20
+ `alvin-bypass-stress-${process.pid}-${Date.now()}`,
21
+ );
22
+
23
+ beforeEach(async () => {
24
+ if (fs.existsSync(TEST_DATA_DIR)) {
25
+ fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
26
+ }
27
+ fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
28
+ process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
29
+ vi.resetModules();
30
+ vi.doMock("../src/services/subagent-delivery.js", () => ({
31
+ deliverSubAgentResult: async () => {},
32
+ attachBotApi: () => {},
33
+ __setBotApiForTest: () => {},
34
+ }));
35
+ });
36
+
37
+ afterEach(async () => {
38
+ try {
39
+ const mod = await import("../src/services/async-agent-watcher.js");
40
+ mod.stopWatcher();
41
+ mod.__resetForTest();
42
+ } catch {
43
+ /* ignore */
44
+ }
45
+ });
46
+
47
+ function writeCompletedJsonl(path: string, text: string): void {
48
+ const lines =
49
+ [
50
+ JSON.stringify({
51
+ type: "assistant",
52
+ isSidechain: true,
53
+ agentId: "x",
54
+ message: {
55
+ role: "assistant",
56
+ content: [{ type: "text", text }],
57
+ stop_reason: "end_turn",
58
+ usage: { input_tokens: 1, output_tokens: 1 },
59
+ },
60
+ }),
61
+ ].join("\n") + "\n";
62
+ fs.mkdirSync(resolve(path, ".."), { recursive: true });
63
+ fs.writeFileSync(path, lines, "utf-8");
64
+ }
65
+
66
+ describe("v4.12.3 bypass — stress + edge cases", () => {
67
+ it("100 parallel sessions each launch and deliver one agent — counters isolated", async () => {
68
+ const { getSession } = await import("../src/services/session.js");
69
+ const { handleToolResultChunk } = await import(
70
+ "../src/handlers/async-agent-chunk-handler.js"
71
+ );
72
+ const watcher = await import("../src/services/async-agent-watcher.js");
73
+
74
+ const N = 100;
75
+ const sessionKeys: string[] = [];
76
+
77
+ // Launch phase
78
+ for (let i = 0; i < N; i++) {
79
+ const sk = `stress-parallel-${i}`;
80
+ sessionKeys.push(sk);
81
+ const s = getSession(sk);
82
+ s.pendingBackgroundCount = 0;
83
+
84
+ const outPath = `${TEST_DATA_DIR}/p-${i}.jsonl`;
85
+ handleToolResultChunk(
86
+ {
87
+ type: "tool_result",
88
+ toolUseId: `p_${i}`,
89
+ toolResultContent:
90
+ "Async agent launched successfully.\n" +
91
+ `agentId: p-${i}\n` +
92
+ `output_file: ${outPath}\n`,
93
+ },
94
+ {
95
+ chatId: i,
96
+ userId: i,
97
+ sessionKey: sk,
98
+ lastToolUseInput: { description: `task ${i}`, prompt: "p" },
99
+ },
100
+ );
101
+ }
102
+
103
+ // Verify all have count=1
104
+ for (const sk of sessionKeys) {
105
+ expect(getSession(sk).pendingBackgroundCount).toBe(1);
106
+ }
107
+
108
+ // Complete phase
109
+ for (let i = 0; i < N; i++) {
110
+ writeCompletedJsonl(`${TEST_DATA_DIR}/p-${i}.jsonl`, `done ${i}`);
111
+ }
112
+ await watcher.pollOnce();
113
+
114
+ // Verify all back to 0
115
+ for (const sk of sessionKeys) {
116
+ expect(getSession(sk).pendingBackgroundCount).toBe(0);
117
+ }
118
+
119
+ // Verify watcher in-memory state is empty
120
+ expect(watcher.listPendingAgents()).toHaveLength(0);
121
+ });
122
+
123
+ it("churn: 200 rapid launch/deliver cycles on one session — counter stays [0,1]", async () => {
124
+ const { getSession } = await import("../src/services/session.js");
125
+ const { handleToolResultChunk } = await import(
126
+ "../src/handlers/async-agent-chunk-handler.js"
127
+ );
128
+ const watcher = await import("../src/services/async-agent-watcher.js");
129
+
130
+ const sk = "churn-hot";
131
+ const s = getSession(sk);
132
+ s.pendingBackgroundCount = 0;
133
+
134
+ for (let i = 0; i < 200; i++) {
135
+ const outPath = `${TEST_DATA_DIR}/churn-${i}.jsonl`;
136
+ handleToolResultChunk(
137
+ {
138
+ type: "tool_result",
139
+ toolUseId: `c_${i}`,
140
+ toolResultContent:
141
+ "Async agent launched successfully.\n" +
142
+ `agentId: c-${i}\n` +
143
+ `output_file: ${outPath}\n`,
144
+ },
145
+ {
146
+ chatId: 1,
147
+ userId: 1,
148
+ sessionKey: sk,
149
+ lastToolUseInput: { description: `task ${i}`, prompt: "p" },
150
+ },
151
+ );
152
+ expect(s.pendingBackgroundCount).toBe(1);
153
+
154
+ writeCompletedJsonl(outPath, `done ${i}`);
155
+ await watcher.pollOnce();
156
+ expect(s.pendingBackgroundCount).toBe(0);
157
+ }
158
+
159
+ // Final sanity
160
+ expect(watcher.listPendingAgents()).toHaveLength(0);
161
+ });
162
+
163
+ it("extreme drift: 10 deliveries but only 1 launch — counter clamps at 0", async () => {
164
+ const { getSession } = await import("../src/services/session.js");
165
+ const watcher = await import("../src/services/async-agent-watcher.js");
166
+
167
+ const sk = "drift-extreme";
168
+ const s = getSession(sk);
169
+ s.pendingBackgroundCount = 1;
170
+
171
+ // Register 10 agents to the same session, but keep the counter at 1
172
+ // (simulating a scenario where the handler increment got lost on 9 of them)
173
+ for (let i = 0; i < 10; i++) {
174
+ const outPath = `${TEST_DATA_DIR}/drift-${i}.jsonl`;
175
+ watcher.registerPendingAgent({
176
+ agentId: `drift-${i}`,
177
+ outputFile: outPath,
178
+ description: `drift ${i}`,
179
+ prompt: "p",
180
+ chatId: 1,
181
+ userId: 1,
182
+ toolUseId: null,
183
+ sessionKey: sk,
184
+ });
185
+ writeCompletedJsonl(outPath, `done ${i}`);
186
+ }
187
+
188
+ await watcher.pollOnce();
189
+
190
+ // First delivery takes counter from 1 → 0.
191
+ // The next 9 deliveries try to decrement from 0 and clamp.
192
+ expect(s.pendingBackgroundCount).toBe(0);
193
+ });
194
+
195
+ it("user /new during pending — counter reset is safe", async () => {
196
+ const { getSession, resetSession } = await import("../src/services/session.js");
197
+ const { handleToolResultChunk } = await import(
198
+ "../src/handlers/async-agent-chunk-handler.js"
199
+ );
200
+ const watcher = await import("../src/services/async-agent-watcher.js");
201
+
202
+ const sk = "reset-during-pending";
203
+ const s = getSession(sk);
204
+ s.pendingBackgroundCount = 0;
205
+
206
+ // Launch 3 agents
207
+ for (let i = 0; i < 3; i++) {
208
+ const outPath = `${TEST_DATA_DIR}/reset-${i}.jsonl`;
209
+ handleToolResultChunk(
210
+ {
211
+ type: "tool_result",
212
+ toolUseId: `r_${i}`,
213
+ toolResultContent:
214
+ "Async agent launched successfully.\n" +
215
+ `agentId: reset-${i}\n` +
216
+ `output_file: ${outPath}\n`,
217
+ },
218
+ {
219
+ chatId: 1,
220
+ userId: 1,
221
+ sessionKey: sk,
222
+ lastToolUseInput: { description: `task ${i}`, prompt: "p" },
223
+ },
224
+ );
225
+ }
226
+ expect(s.pendingBackgroundCount).toBe(3);
227
+
228
+ // User issues /new while all 3 are running
229
+ resetSession(sk);
230
+ expect(s.pendingBackgroundCount).toBe(0);
231
+
232
+ // Watcher delivers all 3 afterwards
233
+ for (let i = 0; i < 3; i++) {
234
+ writeCompletedJsonl(`${TEST_DATA_DIR}/reset-${i}.jsonl`, `done ${i}`);
235
+ }
236
+ await watcher.pollOnce();
237
+
238
+ // Counter should remain 0 (clamped)
239
+ expect(s.pendingBackgroundCount).toBe(0);
240
+ });
241
+
242
+ it("session removed from Map before delivery — decrement is no-op, no crash", async () => {
243
+ const { getAllSessions } = await import("../src/services/session.js");
244
+ const { handleToolResultChunk } = await import(
245
+ "../src/handlers/async-agent-chunk-handler.js"
246
+ );
247
+ const watcher = await import("../src/services/async-agent-watcher.js");
248
+
249
+ const sk = "ephemeral-session";
250
+ const s = getAllSessions();
251
+ // Use the standard path to ensure getSession works first
252
+ const { getSession } = await import("../src/services/session.js");
253
+ const session = getSession(sk);
254
+ session.pendingBackgroundCount = 0;
255
+
256
+ const outPath = `${TEST_DATA_DIR}/eph.jsonl`;
257
+ handleToolResultChunk(
258
+ {
259
+ type: "tool_result",
260
+ toolUseId: "eph_1",
261
+ toolResultContent:
262
+ "Async agent launched successfully.\n" +
263
+ "agentId: eph-1\n" +
264
+ `output_file: ${outPath}\n`,
265
+ },
266
+ {
267
+ chatId: 1,
268
+ userId: 1,
269
+ sessionKey: sk,
270
+ lastToolUseInput: { description: "d", prompt: "p" },
271
+ },
272
+ );
273
+ expect(session.pendingBackgroundCount).toBe(1);
274
+
275
+ // Nuke the session from the map (simulates TTL cleanup)
276
+ s.delete(sk);
277
+
278
+ writeCompletedJsonl(outPath, "done");
279
+ await expect(watcher.pollOnce()).resolves.not.toThrow();
280
+ });
281
+
282
+ it("mixed rollout: pre-v4.12.3 persisted entries (no sessionKey) mixed with new entries", async () => {
283
+ const { getSession } = await import("../src/services/session.js");
284
+ const watcher = await import("../src/services/async-agent-watcher.js");
285
+
286
+ // v4.12.3 session with counter
287
+ const sk = "mixed-v412";
288
+ const s = getSession(sk);
289
+ s.pendingBackgroundCount = 1;
290
+
291
+ // New-style entry with sessionKey
292
+ const newPath = `${TEST_DATA_DIR}/new.jsonl`;
293
+ watcher.registerPendingAgent({
294
+ agentId: "new-agent",
295
+ outputFile: newPath,
296
+ description: "new",
297
+ prompt: "p",
298
+ chatId: 1,
299
+ userId: 1,
300
+ toolUseId: null,
301
+ sessionKey: sk,
302
+ });
303
+
304
+ // Old-style entry without sessionKey (pre-v4.12.3)
305
+ const oldPath = `${TEST_DATA_DIR}/old.jsonl`;
306
+ watcher.registerPendingAgent({
307
+ agentId: "old-agent",
308
+ outputFile: oldPath,
309
+ description: "old",
310
+ prompt: "p",
311
+ chatId: 2,
312
+ userId: 2,
313
+ toolUseId: null,
314
+ // sessionKey intentionally omitted
315
+ });
316
+
317
+ writeCompletedJsonl(newPath, "new done");
318
+ writeCompletedJsonl(oldPath, "old done");
319
+ await watcher.pollOnce();
320
+
321
+ // New agent decrements our counter; old agent is a no-op
322
+ expect(s.pendingBackgroundCount).toBe(0);
323
+ expect(watcher.listPendingAgents()).toHaveLength(0);
324
+ });
325
+
326
+ it("waitUntilProcessingFalse: flag flips right at the tick boundary", async () => {
327
+ const { waitUntilProcessingFalse } = await import(
328
+ "../src/handlers/background-bypass.js"
329
+ );
330
+ const session = { isProcessing: true };
331
+ // Start waiting, then flip asynchronously
332
+ const waitPromise = waitUntilProcessingFalse(session, 2000, 10);
333
+ setTimeout(() => { session.isProcessing = false; }, 15);
334
+ const result = await waitPromise;
335
+ expect(result).toBe(true);
336
+ });
337
+
338
+ it("waitUntilProcessingFalse: timeout respected", async () => {
339
+ const { waitUntilProcessingFalse } = await import(
340
+ "../src/handlers/background-bypass.js"
341
+ );
342
+ const session = { isProcessing: true };
343
+ const start = Date.now();
344
+ const result = await waitUntilProcessingFalse(session, 200, 25);
345
+ const elapsed = Date.now() - start;
346
+ expect(result).toBe(false);
347
+ expect(elapsed).toBeGreaterThanOrEqual(180); // allow small jitter
348
+ expect(elapsed).toBeLessThan(400);
349
+ });
350
+
351
+ it(
352
+ "high load: 50 sessions, each with 4 parallel agents (200 total) — " +
353
+ "all deliver, all counters return to 0",
354
+ async () => {
355
+ const { getSession } = await import("../src/services/session.js");
356
+ const { handleToolResultChunk } = await import(
357
+ "../src/handlers/async-agent-chunk-handler.js"
358
+ );
359
+ const watcher = await import("../src/services/async-agent-watcher.js");
360
+
361
+ const S = 50;
362
+ const A = 4;
363
+ const sessionKeys: string[] = [];
364
+ const allPaths: string[] = [];
365
+
366
+ for (let i = 0; i < S; i++) {
367
+ const sk = `load-s-${i}`;
368
+ sessionKeys.push(sk);
369
+ const s = getSession(sk);
370
+ s.pendingBackgroundCount = 0;
371
+
372
+ for (let j = 0; j < A; j++) {
373
+ const outPath = `${TEST_DATA_DIR}/load-${i}-${j}.jsonl`;
374
+ allPaths.push(outPath);
375
+ handleToolResultChunk(
376
+ {
377
+ type: "tool_result",
378
+ toolUseId: `load_${i}_${j}`,
379
+ toolResultContent:
380
+ "Async agent launched successfully.\n" +
381
+ `agentId: load-${i}-${j}\n` +
382
+ `output_file: ${outPath}\n`,
383
+ },
384
+ {
385
+ chatId: i,
386
+ userId: i,
387
+ sessionKey: sk,
388
+ lastToolUseInput: {
389
+ description: `task ${i}-${j}`,
390
+ prompt: "p",
391
+ },
392
+ },
393
+ );
394
+ }
395
+ }
396
+
397
+ // Every session has A agents pending
398
+ for (const sk of sessionKeys) {
399
+ expect(getSession(sk).pendingBackgroundCount).toBe(A);
400
+ }
401
+
402
+ // Deliver all
403
+ for (const p of allPaths) {
404
+ writeCompletedJsonl(p, "done");
405
+ }
406
+ await watcher.pollOnce();
407
+
408
+ // All counters back to 0
409
+ for (const sk of sessionKeys) {
410
+ expect(getSession(sk).pendingBackgroundCount).toBe(0);
411
+ }
412
+
413
+ // No residual state
414
+ expect(watcher.listPendingAgents()).toHaveLength(0);
415
+ },
416
+ );
417
+ });
@@ -0,0 +1,127 @@
1
+ /**
2
+ * v4.12.3 — background-bypass pure helpers.
3
+ *
4
+ * These helpers factor out the SDK-resume-bypass decision from the
5
+ * message handler so it can be unit tested without grammy Context
6
+ * mocks. The real handler composes these functions — they're only
7
+ * state machines over session fields + time.
8
+ */
9
+ import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
10
+ import {
11
+ shouldBypassQueue,
12
+ shouldBypassSdkResume,
13
+ waitUntilProcessingFalse,
14
+ } from "../src/handlers/background-bypass.js";
15
+
16
+ describe("shouldBypassQueue (v4.12.3)", () => {
17
+ it("returns false when session is not processing", () => {
18
+ expect(
19
+ shouldBypassQueue({
20
+ isProcessing: false,
21
+ pendingBackgroundCount: 5,
22
+ abortController: new AbortController(),
23
+ }),
24
+ ).toBe(false);
25
+ });
26
+
27
+ it("returns false when no background agent is pending", () => {
28
+ expect(
29
+ shouldBypassQueue({
30
+ isProcessing: true,
31
+ pendingBackgroundCount: 0,
32
+ abortController: new AbortController(),
33
+ }),
34
+ ).toBe(false);
35
+ });
36
+
37
+ it("returns false when no abortController exists (can't abort)", () => {
38
+ expect(
39
+ shouldBypassQueue({
40
+ isProcessing: true,
41
+ pendingBackgroundCount: 2,
42
+ abortController: null,
43
+ }),
44
+ ).toBe(false);
45
+ });
46
+
47
+ it("returns true when processing, background pending, and abortable", () => {
48
+ expect(
49
+ shouldBypassQueue({
50
+ isProcessing: true,
51
+ pendingBackgroundCount: 1,
52
+ abortController: new AbortController(),
53
+ }),
54
+ ).toBe(true);
55
+ });
56
+
57
+ it("returns true even with multiple pending agents", () => {
58
+ expect(
59
+ shouldBypassQueue({
60
+ isProcessing: true,
61
+ pendingBackgroundCount: 3,
62
+ abortController: new AbortController(),
63
+ }),
64
+ ).toBe(true);
65
+ });
66
+
67
+ it("returns false if abortController is already aborted — nothing left to abort", () => {
68
+ const ac = new AbortController();
69
+ ac.abort();
70
+ expect(
71
+ shouldBypassQueue({
72
+ isProcessing: true,
73
+ pendingBackgroundCount: 1,
74
+ abortController: ac,
75
+ }),
76
+ ).toBe(false);
77
+ });
78
+ });
79
+
80
+ describe("shouldBypassSdkResume (v4.12.3)", () => {
81
+ it("returns true when pendingBackgroundCount > 0 — old SDK session is blocked, need fresh", () => {
82
+ expect(shouldBypassSdkResume({ pendingBackgroundCount: 1 })).toBe(true);
83
+ expect(shouldBypassSdkResume({ pendingBackgroundCount: 5 })).toBe(true);
84
+ });
85
+
86
+ it("returns false when no background pending — safe to resume", () => {
87
+ expect(shouldBypassSdkResume({ pendingBackgroundCount: 0 })).toBe(false);
88
+ });
89
+ });
90
+
91
+ describe("waitUntilProcessingFalse (v4.12.3)", () => {
92
+ beforeEach(() => vi.useFakeTimers());
93
+ afterEach(() => vi.useRealTimers());
94
+
95
+ it("resolves immediately when already not processing", async () => {
96
+ const session = { isProcessing: false };
97
+ const p = waitUntilProcessingFalse(session, 5000);
98
+ await vi.advanceTimersByTimeAsync(0);
99
+ await expect(p).resolves.toBe(true);
100
+ });
101
+
102
+ it("waits until isProcessing flips, then resolves true", async () => {
103
+ const session = { isProcessing: true };
104
+ const p = waitUntilProcessingFalse(session, 5000);
105
+ await vi.advanceTimersByTimeAsync(200);
106
+ session.isProcessing = false;
107
+ await vi.advanceTimersByTimeAsync(100);
108
+ await expect(p).resolves.toBe(true);
109
+ });
110
+
111
+ it("gives up after timeout if still processing, resolves false", async () => {
112
+ const session = { isProcessing: true };
113
+ const p = waitUntilProcessingFalse(session, 1000);
114
+ await vi.advanceTimersByTimeAsync(1100);
115
+ await expect(p).resolves.toBe(false);
116
+ });
117
+
118
+ it("uses the provided tick interval (default 50ms)", async () => {
119
+ const session = { isProcessing: true };
120
+ const p = waitUntilProcessingFalse(session, 500, 25);
121
+ // Flip after 130ms of "waiting" — should detect on the next 25ms tick
122
+ await vi.advanceTimersByTimeAsync(130);
123
+ session.isProcessing = false;
124
+ await vi.advanceTimersByTimeAsync(30);
125
+ await expect(p).resolves.toBe(true);
126
+ });
127
+ });
@@ -0,0 +1,59 @@
1
+ /**
2
+ * v4.12.3 — UserSession.pendingBackgroundCount
3
+ *
4
+ * When Claude launches an Agent/Task tool with run_in_background: true,
5
+ * the SDK's CLI subprocess stays alive until the task-notification is
6
+ * ready to deliver. During that window the main Telegram session is
7
+ * effectively blocked — isProcessing=true, all new user messages get
8
+ * queued. For 5-minute+ background tasks that's unacceptable UX.
9
+ *
10
+ * v4.12.3 tracks the count of pending background agents on each session
11
+ * so the handler can detect the blocked state and bypass the SDK resume
12
+ * (start a fresh SDK session for the new user message while the old
13
+ * session drains in the background).
14
+ *
15
+ * The count is incremented by the message handler on async_launched
16
+ * tool_result and decremented by the async-agent-watcher when it
17
+ * delivers the sub-agent's result.
18
+ */
19
+ import { describe, it, expect, beforeEach, vi } from "vitest";
20
+
21
+ beforeEach(() => vi.resetModules());
22
+
23
+ describe("UserSession.pendingBackgroundCount (v4.12.3)", () => {
24
+ it("new session starts with pendingBackgroundCount=0", async () => {
25
+ const { getSession } = await import("../src/services/session.js");
26
+ const s = getSession("test-user-new");
27
+ expect(s.pendingBackgroundCount).toBe(0);
28
+ });
29
+
30
+ it("incrementing on the session persists across getSession calls", async () => {
31
+ const { getSession } = await import("../src/services/session.js");
32
+ const s1 = getSession("test-user-inc");
33
+ s1.pendingBackgroundCount = 2;
34
+ const s2 = getSession("test-user-inc");
35
+ expect(s2.pendingBackgroundCount).toBe(2);
36
+ expect(s1).toBe(s2);
37
+ });
38
+
39
+ it("resetSession zeroes pendingBackgroundCount", async () => {
40
+ const { getSession, resetSession } = await import("../src/services/session.js");
41
+ const s = getSession("test-user-reset");
42
+ s.pendingBackgroundCount = 3;
43
+ resetSession("test-user-reset");
44
+ expect(s.pendingBackgroundCount).toBe(0);
45
+ });
46
+
47
+ it("count can be decremented without going negative via explicit guard", async () => {
48
+ // The handler/watcher code is responsible for not decrementing below
49
+ // zero. This test just documents that the field is a plain number
50
+ // with no built-in guard — decrement logic lives in the consumers.
51
+ const { getSession } = await import("../src/services/session.js");
52
+ const s = getSession("test-user-dec");
53
+ s.pendingBackgroundCount = 1;
54
+ s.pendingBackgroundCount = Math.max(0, s.pendingBackgroundCount - 1);
55
+ expect(s.pendingBackgroundCount).toBe(0);
56
+ s.pendingBackgroundCount = Math.max(0, s.pendingBackgroundCount - 1);
57
+ expect(s.pendingBackgroundCount).toBe(0);
58
+ });
59
+ });