alvin-bot 4.12.2 → 4.12.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,443 @@
1
+ /**
2
+ * v4.12.3 — End-to-end integration test for the background-agent bypass
3
+ * path. Simulates the following scenario:
4
+ *
5
+ * 1. User sends a message that causes Claude to launch an async Agent
6
+ * 2. While the SDK's CLI subprocess idles waiting for the
7
+ * task-notification, user sends a NEW message
8
+ * 3. The handler recognises the pending background state and:
9
+ * a. Aborts the blocked query
10
+ * b. Bypasses SDK resume for the new query (sessionId=null)
11
+ * c. Injects bridge preamble with history
12
+ * 4. The watcher delivers the background result via
13
+ * subagent-delivery.ts as a separate message
14
+ * 5. After delivery, pendingBackgroundCount returns to 0 and future
15
+ * queries use normal SDK resume again
16
+ *
17
+ * The full handler is too tightly coupled to grammy to unit-test end
18
+ * to end. Instead we exercise each layer directly:
19
+ * - session.pendingBackgroundCount updates (counter wiring)
20
+ * - shouldBypassQueue / shouldBypassSdkResume decision points
21
+ * - watcher delivery → counter decrement
22
+ * - abort + wait path
23
+ */
24
+ import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
25
+ import fs from "fs";
26
+ import os from "os";
27
+ import { resolve } from "path";
28
+
29
+ const TEST_DATA_DIR = resolve(
30
+ os.tmpdir(),
31
+ `alvin-bypass-int-${process.pid}-${Date.now()}`,
32
+ );
33
+
34
+ beforeEach(async () => {
35
+ if (fs.existsSync(TEST_DATA_DIR)) {
36
+ fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
37
+ }
38
+ fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
39
+ process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
40
+ vi.resetModules();
41
+ vi.doMock("../src/services/subagent-delivery.js", () => ({
42
+ deliverSubAgentResult: async () => {},
43
+ attachBotApi: () => {},
44
+ __setBotApiForTest: () => {},
45
+ }));
46
+ });
47
+
48
+ afterEach(async () => {
49
+ try {
50
+ const mod = await import("../src/services/async-agent-watcher.js");
51
+ mod.stopWatcher();
52
+ mod.__resetForTest();
53
+ } catch {
54
+ /* ignore */
55
+ }
56
+ });
57
+
58
+ function writeCompletedJsonl(path: string, finalText: string): void {
59
+ const lines =
60
+ [
61
+ JSON.stringify({
62
+ type: "user",
63
+ isSidechain: true,
64
+ agentId: "x",
65
+ message: { role: "user", content: "do it" },
66
+ }),
67
+ JSON.stringify({
68
+ type: "assistant",
69
+ isSidechain: true,
70
+ agentId: "x",
71
+ message: {
72
+ role: "assistant",
73
+ content: [{ type: "text", text: finalText }],
74
+ stop_reason: "end_turn",
75
+ usage: { input_tokens: 10, output_tokens: 5 },
76
+ },
77
+ }),
78
+ ].join("\n") + "\n";
79
+ fs.mkdirSync(resolve(path, ".."), { recursive: true });
80
+ fs.writeFileSync(path, lines, "utf-8");
81
+ }
82
+
83
+ describe("v4.12.3 background-bypass end-to-end", () => {
84
+ it(
85
+ "full scenario: async launch → counter incremented → new message triggers bypass → " +
86
+ "watcher delivery → counter decremented",
87
+ async () => {
88
+ const { getSession } = await import("../src/services/session.js");
89
+ const { handleToolResultChunk } = await import(
90
+ "../src/handlers/async-agent-chunk-handler.js"
91
+ );
92
+ const watcher = await import("../src/services/async-agent-watcher.js");
93
+ const {
94
+ shouldBypassQueue,
95
+ shouldBypassSdkResume,
96
+ } = await import("../src/handlers/background-bypass.js");
97
+
98
+ const sessionKey = "int-session-1";
99
+ const session = getSession(sessionKey);
100
+ expect(session.pendingBackgroundCount).toBe(0);
101
+
102
+ // === Step 1: simulate the tool_result chunk for an async launch ===
103
+ const outPath = `${TEST_DATA_DIR}/int-out.jsonl`;
104
+ handleToolResultChunk(
105
+ {
106
+ type: "tool_result",
107
+ toolUseId: "toolu_int",
108
+ toolResultContent:
109
+ "Async agent launched successfully.\n" +
110
+ "agentId: int-agent\n" +
111
+ `output_file: ${outPath}\n`,
112
+ },
113
+ {
114
+ chatId: 42,
115
+ userId: 42,
116
+ sessionKey,
117
+ lastToolUseInput: {
118
+ description: "Research Higgsfield",
119
+ prompt: "do deep research",
120
+ },
121
+ },
122
+ );
123
+
124
+ // === Step 2: counter should have been incremented ===
125
+ expect(session.pendingBackgroundCount).toBe(1);
126
+
127
+ // === Step 3: simulate the handler noticing isProcessing=true AND
128
+ // background pending. shouldBypassQueue must return true so it knows
129
+ // to abort-and-replace instead of queueing. ===
130
+ session.isProcessing = true;
131
+ session.abortController = new AbortController();
132
+ expect(
133
+ shouldBypassQueue({
134
+ isProcessing: session.isProcessing,
135
+ pendingBackgroundCount: session.pendingBackgroundCount,
136
+ abortController: session.abortController,
137
+ }),
138
+ ).toBe(true);
139
+
140
+ // === Step 4: shouldBypassSdkResume must return true so the fresh
141
+ // query uses sessionId=null ===
142
+ expect(
143
+ shouldBypassSdkResume({
144
+ pendingBackgroundCount: session.pendingBackgroundCount,
145
+ }),
146
+ ).toBe(true);
147
+
148
+ // === Step 5: simulate the watcher delivering the background result ===
149
+ writeCompletedJsonl(outPath, "Higgsfield research complete");
150
+ await watcher.pollOnce();
151
+
152
+ // === Step 6: counter should now be 0 again ===
153
+ expect(session.pendingBackgroundCount).toBe(0);
154
+
155
+ // === Step 7: subsequent queries should NOT bypass resume anymore ===
156
+ expect(
157
+ shouldBypassSdkResume({
158
+ pendingBackgroundCount: session.pendingBackgroundCount,
159
+ }),
160
+ ).toBe(false);
161
+ },
162
+ );
163
+
164
+ it(
165
+ "stress: 5 parallel background agents launched in one turn, " +
166
+ "counter reflects all of them, all decrement on delivery",
167
+ async () => {
168
+ const { getSession } = await import("../src/services/session.js");
169
+ const { handleToolResultChunk } = await import(
170
+ "../src/handlers/async-agent-chunk-handler.js"
171
+ );
172
+ const watcher = await import("../src/services/async-agent-watcher.js");
173
+
174
+ const sessionKey = "stress-session-5";
175
+ const session = getSession(sessionKey);
176
+ session.pendingBackgroundCount = 0;
177
+
178
+ const outPaths: string[] = [];
179
+ for (let i = 0; i < 5; i++) {
180
+ const outPath = `${TEST_DATA_DIR}/stress-${i}.jsonl`;
181
+ outPaths.push(outPath);
182
+ handleToolResultChunk(
183
+ {
184
+ type: "tool_result",
185
+ toolUseId: `toolu_stress_${i}`,
186
+ toolResultContent:
187
+ "Async agent launched successfully.\n" +
188
+ `agentId: stress-${i}\n` +
189
+ `output_file: ${outPath}\n`,
190
+ },
191
+ {
192
+ chatId: 42,
193
+ userId: 42,
194
+ sessionKey,
195
+ lastToolUseInput: {
196
+ description: `task ${i}`,
197
+ prompt: "p",
198
+ },
199
+ },
200
+ );
201
+ }
202
+ expect(session.pendingBackgroundCount).toBe(5);
203
+
204
+ // Deliver 3 of them
205
+ for (let i = 0; i < 3; i++) {
206
+ writeCompletedJsonl(outPaths[i], `result ${i}`);
207
+ }
208
+ await watcher.pollOnce();
209
+ expect(session.pendingBackgroundCount).toBe(2);
210
+
211
+ // Deliver the last 2
212
+ writeCompletedJsonl(outPaths[3], "result 3");
213
+ writeCompletedJsonl(outPaths[4], "result 4");
214
+ await watcher.pollOnce();
215
+ expect(session.pendingBackgroundCount).toBe(0);
216
+ },
217
+ );
218
+
219
+ it(
220
+ "stress: agents from DIFFERENT sessions do not interfere with each other",
221
+ async () => {
222
+ const { getSession } = await import("../src/services/session.js");
223
+ const { handleToolResultChunk } = await import(
224
+ "../src/handlers/async-agent-chunk-handler.js"
225
+ );
226
+ const watcher = await import("../src/services/async-agent-watcher.js");
227
+
228
+ const sessionA = getSession("stress-iso-a");
229
+ const sessionB = getSession("stress-iso-b");
230
+ const sessionC = getSession("stress-iso-c");
231
+ sessionA.pendingBackgroundCount = 0;
232
+ sessionB.pendingBackgroundCount = 0;
233
+ sessionC.pendingBackgroundCount = 0;
234
+
235
+ // Session A launches 2 agents
236
+ for (const i of [0, 1]) {
237
+ const p = `${TEST_DATA_DIR}/iso-a-${i}.jsonl`;
238
+ handleToolResultChunk(
239
+ {
240
+ type: "tool_result",
241
+ toolUseId: `a${i}`,
242
+ toolResultContent:
243
+ `Async agent launched successfully.\n` +
244
+ `agentId: iso-a-${i}\n` +
245
+ `output_file: ${p}\n`,
246
+ },
247
+ {
248
+ chatId: 1,
249
+ userId: 1,
250
+ sessionKey: "stress-iso-a",
251
+ lastToolUseInput: { description: "a", prompt: "p" },
252
+ },
253
+ );
254
+ }
255
+ // Session B launches 1
256
+ handleToolResultChunk(
257
+ {
258
+ type: "tool_result",
259
+ toolUseId: "b0",
260
+ toolResultContent:
261
+ "Async agent launched successfully.\n" +
262
+ "agentId: iso-b-0\n" +
263
+ `output_file: ${TEST_DATA_DIR}/iso-b-0.jsonl\n`,
264
+ },
265
+ {
266
+ chatId: 2,
267
+ userId: 2,
268
+ sessionKey: "stress-iso-b",
269
+ lastToolUseInput: { description: "b", prompt: "p" },
270
+ },
271
+ );
272
+ // Session C launches 0
273
+
274
+ expect(sessionA.pendingBackgroundCount).toBe(2);
275
+ expect(sessionB.pendingBackgroundCount).toBe(1);
276
+ expect(sessionC.pendingBackgroundCount).toBe(0);
277
+
278
+ // Complete only A's agents
279
+ writeCompletedJsonl(`${TEST_DATA_DIR}/iso-a-0.jsonl`, "a0 done");
280
+ writeCompletedJsonl(`${TEST_DATA_DIR}/iso-a-1.jsonl`, "a1 done");
281
+ await watcher.pollOnce();
282
+
283
+ // A should be 0, B should still be 1, C unchanged
284
+ expect(sessionA.pendingBackgroundCount).toBe(0);
285
+ expect(sessionB.pendingBackgroundCount).toBe(1);
286
+ expect(sessionC.pendingBackgroundCount).toBe(0);
287
+
288
+ // Complete B's agent
289
+ writeCompletedJsonl(`${TEST_DATA_DIR}/iso-b-0.jsonl`, "b0 done");
290
+ await watcher.pollOnce();
291
+ expect(sessionB.pendingBackgroundCount).toBe(0);
292
+ },
293
+ );
294
+
295
+ it(
296
+ "bypass decision is correct through a full lifecycle: " +
297
+ "no-pending → launch → pending → deliver → no-pending",
298
+ async () => {
299
+ const { getSession } = await import("../src/services/session.js");
300
+ const { handleToolResultChunk } = await import(
301
+ "../src/handlers/async-agent-chunk-handler.js"
302
+ );
303
+ const watcher = await import("../src/services/async-agent-watcher.js");
304
+ const { shouldBypassSdkResume } = await import(
305
+ "../src/handlers/background-bypass.js"
306
+ );
307
+
308
+ const sessionKey = "lifecycle-session";
309
+ const session = getSession(sessionKey);
310
+ session.pendingBackgroundCount = 0;
311
+
312
+ // Initially no bypass
313
+ expect(
314
+ shouldBypassSdkResume({
315
+ pendingBackgroundCount: session.pendingBackgroundCount,
316
+ }),
317
+ ).toBe(false);
318
+
319
+ // Launch
320
+ const outPath = `${TEST_DATA_DIR}/lifecycle.jsonl`;
321
+ handleToolResultChunk(
322
+ {
323
+ type: "tool_result",
324
+ toolUseId: "t1",
325
+ toolResultContent:
326
+ "Async agent launched successfully.\n" +
327
+ "agentId: life1\n" +
328
+ `output_file: ${outPath}\n`,
329
+ },
330
+ {
331
+ chatId: 1,
332
+ userId: 1,
333
+ sessionKey,
334
+ lastToolUseInput: { description: "d", prompt: "p" },
335
+ },
336
+ );
337
+
338
+ // Now bypass
339
+ expect(
340
+ shouldBypassSdkResume({
341
+ pendingBackgroundCount: session.pendingBackgroundCount,
342
+ }),
343
+ ).toBe(true);
344
+
345
+ // Deliver
346
+ writeCompletedJsonl(outPath, "life done");
347
+ await watcher.pollOnce();
348
+
349
+ // Back to no bypass
350
+ expect(
351
+ shouldBypassSdkResume({
352
+ pendingBackgroundCount: session.pendingBackgroundCount,
353
+ }),
354
+ ).toBe(false);
355
+ },
356
+ );
357
+
358
+ it(
359
+ "stress: rapid launch+deliver+launch cycle (10 iterations) — " +
360
+ "counter stays consistent, no drift, no negatives",
361
+ async () => {
362
+ const { getSession } = await import("../src/services/session.js");
363
+ const { handleToolResultChunk } = await import(
364
+ "../src/handlers/async-agent-chunk-handler.js"
365
+ );
366
+ const watcher = await import("../src/services/async-agent-watcher.js");
367
+
368
+ const sessionKey = "churn-session";
369
+ const session = getSession(sessionKey);
370
+ session.pendingBackgroundCount = 0;
371
+
372
+ for (let i = 0; i < 10; i++) {
373
+ const outPath = `${TEST_DATA_DIR}/churn-${i}.jsonl`;
374
+ handleToolResultChunk(
375
+ {
376
+ type: "tool_result",
377
+ toolUseId: `churn_${i}`,
378
+ toolResultContent:
379
+ "Async agent launched successfully.\n" +
380
+ `agentId: churn-${i}\n` +
381
+ `output_file: ${outPath}\n`,
382
+ },
383
+ {
384
+ chatId: 1,
385
+ userId: 1,
386
+ sessionKey,
387
+ lastToolUseInput: { description: `c${i}`, prompt: "p" },
388
+ },
389
+ );
390
+ expect(session.pendingBackgroundCount).toBe(1);
391
+
392
+ writeCompletedJsonl(outPath, `c${i}`);
393
+ await watcher.pollOnce();
394
+ expect(session.pendingBackgroundCount).toBe(0);
395
+ }
396
+ },
397
+ );
398
+
399
+ it(
400
+ "watcher decrement is robust against session being reset mid-flight",
401
+ async () => {
402
+ const { getSession, resetSession } = await import(
403
+ "../src/services/session.js"
404
+ );
405
+ const { handleToolResultChunk } = await import(
406
+ "../src/handlers/async-agent-chunk-handler.js"
407
+ );
408
+ const watcher = await import("../src/services/async-agent-watcher.js");
409
+
410
+ const sessionKey = "reset-session";
411
+ const session = getSession(sessionKey);
412
+ session.pendingBackgroundCount = 0;
413
+
414
+ const outPath = `${TEST_DATA_DIR}/reset.jsonl`;
415
+ handleToolResultChunk(
416
+ {
417
+ type: "tool_result",
418
+ toolUseId: "t1",
419
+ toolResultContent:
420
+ "Async agent launched successfully.\n" +
421
+ "agentId: reset1\n" +
422
+ `output_file: ${outPath}\n`,
423
+ },
424
+ {
425
+ chatId: 1,
426
+ userId: 1,
427
+ sessionKey,
428
+ lastToolUseInput: { description: "d", prompt: "p" },
429
+ },
430
+ );
431
+ expect(session.pendingBackgroundCount).toBe(1);
432
+
433
+ // Simulate /new during background task
434
+ resetSession(sessionKey);
435
+ expect(session.pendingBackgroundCount).toBe(0);
436
+
437
+ writeCompletedJsonl(outPath, "done");
438
+ // Delivery should not crash, counter stays at 0 (Math.max clamp)
439
+ await expect(watcher.pollOnce()).resolves.not.toThrow();
440
+ expect(session.pendingBackgroundCount).toBe(0);
441
+ },
442
+ );
443
+ });