alvin-bot 4.18.0 → 4.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/AEC-PLUGINS-SOURCES.md +53 -0
  2. package/CHANGELOG.md +37 -2
  3. package/DESIGN-SKILLS-SOURCES.md +81 -0
  4. package/bin/cli.js +1 -1
  5. package/dist/providers/claude-sdk-provider.js +24 -0
  6. package/package.json +3 -1
  7. package/test/allowed-users-gate.test.ts +0 -98
  8. package/test/alvin-dispatch.test.ts +0 -220
  9. package/test/async-agent-chunk-flow.test.ts +0 -244
  10. package/test/async-agent-parser-staleness.test.ts +0 -412
  11. package/test/async-agent-parser-streamjson.test.ts +0 -273
  12. package/test/async-agent-parser.test.ts +0 -322
  13. package/test/async-agent-watcher.test.ts +0 -229
  14. package/test/background-bypass-integration.test.ts +0 -443
  15. package/test/background-bypass-stress.test.ts +0 -417
  16. package/test/background-bypass.test.ts +0 -127
  17. package/test/browser-webfetch.test.ts +0 -121
  18. package/test/claude-sdk-provider.test.ts +0 -115
  19. package/test/claude-sdk-tool-use-id.test.ts +0 -180
  20. package/test/console-timestamps.test.ts +0 -98
  21. package/test/cron-progress-ticker.test.ts +0 -76
  22. package/test/cron-restart-resilience.test.ts +0 -191
  23. package/test/cron-run-resolver.test.ts +0 -133
  24. package/test/cron-runjobnow-throw.test.ts +0 -100
  25. package/test/debounce.test.ts +0 -60
  26. package/test/delivery-registry.test.ts +0 -71
  27. package/test/exec-guard-metachars.test.ts +0 -110
  28. package/test/file-permissions.test.ts +0 -130
  29. package/test/i18n.test.ts +0 -108
  30. package/test/list-subagents-merged.test.ts +0 -172
  31. package/test/memory-extractor.test.ts +0 -151
  32. package/test/memory-layers.test.ts +0 -169
  33. package/test/memory-sdk-injection.test.ts +0 -146
  34. package/test/memory-stress-restart.test.ts +0 -337
  35. package/test/multi-session-stress.test.ts +0 -255
  36. package/test/platform-session-key.test.ts +0 -69
  37. package/test/process-manager.test.ts +0 -186
  38. package/test/registry.test.ts +0 -201
  39. package/test/session-pending-background.test.ts +0 -59
  40. package/test/session-persistence.test.ts +0 -195
  41. package/test/slack-progress-ticker.test.ts +0 -123
  42. package/test/slack-slash-command.test.ts +0 -61
  43. package/test/slack-test-connection.test.ts +0 -176
  44. package/test/stress-scenarios.test.ts +0 -356
  45. package/test/stuck-timer.test.ts +0 -116
  46. package/test/subagent-delivery-markdown-fallback.test.ts +0 -147
  47. package/test/subagent-delivery-platform-routing.test.ts +0 -232
  48. package/test/subagent-delivery.test.ts +0 -273
  49. package/test/subagent-final-text.test.ts +0 -132
  50. package/test/subagent-stats.test.ts +0 -119
  51. package/test/subagent-toolset-allowlist.test.ts +0 -146
  52. package/test/subagents-commands.test.ts +0 -64
  53. package/test/subagents-config.test.ts +0 -114
  54. package/test/subagents-depth.test.ts +0 -58
  55. package/test/subagents-inheritance.test.ts +0 -67
  56. package/test/subagents-name-resolver.test.ts +0 -122
  57. package/test/subagents-priority-reject.test.ts +0 -88
  58. package/test/subagents-queue.test.ts +0 -127
  59. package/test/subagents-shutdown.test.ts +0 -126
  60. package/test/subagents-toolset.test.ts +0 -71
  61. package/test/sync-task-timeout.test.ts +0 -153
  62. package/test/system-prompt-background-hint.test.ts +0 -65
  63. package/test/telegram-error-filter.test.ts +0 -85
  64. package/test/telegram-workspace-command.test.ts +0 -78
  65. package/test/timing-safe-bearer.test.ts +0 -65
  66. package/test/watchdog-brake.test.ts +0 -157
  67. package/test/watcher-pending-count.test.ts +0 -228
  68. package/test/watcher-zombie-fix.test.ts +0 -252
  69. package/test/web-server-integration.test.ts +0 -189
  70. package/test/web-server-resilience.test.ts +0 -118
  71. package/test/web-server-shutdown.test.ts +0 -117
  72. package/test/whatsapp-auth-resilience.test.ts +0 -96
  73. package/test/workspaces.test.ts +0 -196
  74. package/vitest.config.ts +0 -17
@@ -1,229 +0,0 @@
1
- /**
2
- * Fix #17 (Stage 2) — async-agent-watcher integration tests.
3
- *
4
- * The watcher polls outputFiles of pending agents, detects completion,
5
- * delivers via subagent-delivery.ts, and persists state to disk so the
6
- * pending list survives bot restarts.
7
- */
8
- import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
9
- import fs from "fs";
10
- import os from "os";
11
- import { resolve } from "path";
12
-
13
- const TEST_DATA_DIR = resolve(os.tmpdir(), `alvin-async-watcher-${process.pid}-${Date.now()}`);
14
-
15
- interface DeliveredCall {
16
- info: { name: string; source?: string; parentChatId?: number; status?: string };
17
- result: { status: string; output: string; duration: number; error?: string };
18
- }
19
-
20
- let delivered: DeliveredCall[] = [];
21
-
22
- beforeEach(async () => {
23
- if (fs.existsSync(TEST_DATA_DIR)) fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
24
- fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
25
- process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
26
- delivered = [];
27
- vi.resetModules();
28
- vi.doMock("../src/services/subagent-delivery.js", () => ({
29
- deliverSubAgentResult: async (info: unknown, result: unknown) => {
30
- delivered.push({ info: info as DeliveredCall["info"], result: result as DeliveredCall["result"] });
31
- },
32
- attachBotApi: () => {},
33
- __setBotApiForTest: () => {},
34
- }));
35
- });
36
-
37
- afterEach(async () => {
38
- try {
39
- const mod = await import("../src/services/async-agent-watcher.js");
40
- mod.stopWatcher();
41
- mod.__resetForTest();
42
- } catch { /* ignore */ }
43
- });
44
-
45
- function writeCompletedJsonl(path: string, finalText: string): void {
46
- const lines = [
47
- JSON.stringify({
48
- type: "user",
49
- isSidechain: true,
50
- agentId: "x",
51
- message: { role: "user", content: "do it" },
52
- }),
53
- JSON.stringify({
54
- type: "assistant",
55
- isSidechain: true,
56
- agentId: "x",
57
- message: {
58
- role: "assistant",
59
- content: [{ type: "text", text: finalText }],
60
- stop_reason: "end_turn",
61
- usage: { input_tokens: 100, output_tokens: 50 },
62
- },
63
- }),
64
- ].join("\n") + "\n";
65
- fs.mkdirSync(resolve(path, ".."), { recursive: true });
66
- fs.writeFileSync(path, lines, "utf-8");
67
- }
68
-
69
- describe("async-agent-watcher (Stage 2)", () => {
70
- it("registers a pending agent and persists it to disk", async () => {
71
- const mod = await import("../src/services/async-agent-watcher.js");
72
- mod.registerPendingAgent({
73
- agentId: "abc-1",
74
- outputFile: `${TEST_DATA_DIR}/out-abc-1.jsonl`,
75
- description: "Test SEO audit",
76
- prompt: "do a test",
77
- chatId: 42,
78
- userId: 42,
79
- toolUseId: "toolu_1",
80
- });
81
- const stateFile = `${TEST_DATA_DIR}/state/async-agents.json`;
82
- expect(fs.existsSync(stateFile)).toBe(true);
83
- const parsed = JSON.parse(fs.readFileSync(stateFile, "utf-8"));
84
- expect(parsed).toHaveLength(1);
85
- expect(parsed[0].agentId).toBe("abc-1");
86
- expect(parsed[0].description).toBe("Test SEO audit");
87
- });
88
-
89
- it("delivers a pending agent when its outputFile completes", async () => {
90
- const mod = await import("../src/services/async-agent-watcher.js");
91
- const outPath = `${TEST_DATA_DIR}/out-abc-2.jsonl`;
92
- mod.registerPendingAgent({
93
- agentId: "abc-2",
94
- outputFile: outPath,
95
- description: "quick task",
96
- prompt: "p",
97
- chatId: 42,
98
- userId: 42,
99
- toolUseId: null,
100
- });
101
- writeCompletedJsonl(outPath, "Here is the report");
102
-
103
- await mod.pollOnce();
104
-
105
- expect(delivered).toHaveLength(1);
106
- expect(delivered[0].info.name).toBe("quick task");
107
- expect(delivered[0].result.output).toContain("Here is the report");
108
- expect(delivered[0].result.status).toBe("completed");
109
- });
110
-
111
- it("removes a pending agent from persistence after delivery", async () => {
112
- const mod = await import("../src/services/async-agent-watcher.js");
113
- const outPath = `${TEST_DATA_DIR}/out-abc-3.jsonl`;
114
- mod.registerPendingAgent({
115
- agentId: "abc-3",
116
- outputFile: outPath,
117
- description: "cleanup test",
118
- prompt: "p",
119
- chatId: 42,
120
- userId: 42,
121
- toolUseId: null,
122
- });
123
- writeCompletedJsonl(outPath, "done");
124
- await mod.pollOnce();
125
-
126
- const stateFile = `${TEST_DATA_DIR}/state/async-agents.json`;
127
- const state = JSON.parse(fs.readFileSync(stateFile, "utf-8"));
128
- expect(state).toHaveLength(0);
129
- });
130
-
131
- it("loads pending agents from disk at startup (bot restart catchup)", async () => {
132
- fs.mkdirSync(`${TEST_DATA_DIR}/state`, { recursive: true });
133
- const outPath = `${TEST_DATA_DIR}/out-preexisting.jsonl`;
134
- fs.writeFileSync(
135
- `${TEST_DATA_DIR}/state/async-agents.json`,
136
- JSON.stringify([
137
- {
138
- agentId: "preexisting",
139
- outputFile: outPath,
140
- description: "Survived restart",
141
- prompt: "p",
142
- chatId: 42,
143
- userId: 42,
144
- startedAt: Date.now() - 5000,
145
- lastCheckedAt: Date.now() - 1000,
146
- giveUpAt: Date.now() + 86_400_000,
147
- toolUseId: null,
148
- },
149
- ]),
150
- );
151
- writeCompletedJsonl(outPath, "result from earlier session");
152
-
153
- const mod = await import("../src/services/async-agent-watcher.js");
154
- mod.startWatcher();
155
- await mod.pollOnce();
156
-
157
- expect(delivered).toHaveLength(1);
158
- expect(delivered[0].info.name).toBe("Survived restart");
159
- expect(delivered[0].result.output).toContain("result from earlier session");
160
- });
161
-
162
- it("gives up on agents older than giveUpAt and delivers a timeout banner", async () => {
163
- const mod = await import("../src/services/async-agent-watcher.js");
164
- const outPath = `${TEST_DATA_DIR}/out-timeout.jsonl`;
165
- mod.registerPendingAgent({
166
- agentId: "abc-4",
167
- outputFile: outPath,
168
- description: "forever task",
169
- prompt: "p",
170
- chatId: 42,
171
- userId: 42,
172
- toolUseId: null,
173
- giveUpAt: Date.now() - 1000,
174
- });
175
- // File never exists
176
- await mod.pollOnce();
177
-
178
- expect(delivered).toHaveLength(1);
179
- expect(delivered[0].result.status).toBe("timeout");
180
- expect(delivered[0].info.status).toBe("timeout");
181
- });
182
-
183
- it("multiple concurrent pending agents all get delivered as they complete", async () => {
184
- const mod = await import("../src/services/async-agent-watcher.js");
185
- const outA = `${TEST_DATA_DIR}/out-a.jsonl`;
186
- const outB = `${TEST_DATA_DIR}/out-b.jsonl`;
187
- const outC = `${TEST_DATA_DIR}/out-c.jsonl`;
188
- mod.registerPendingAgent({
189
- agentId: "a", outputFile: outA, description: "A",
190
- prompt: "p", chatId: 1, userId: 1, toolUseId: null,
191
- });
192
- mod.registerPendingAgent({
193
- agentId: "b", outputFile: outB, description: "B",
194
- prompt: "p", chatId: 2, userId: 2, toolUseId: null,
195
- });
196
- mod.registerPendingAgent({
197
- agentId: "c", outputFile: outC, description: "C",
198
- prompt: "p", chatId: 3, userId: 3, toolUseId: null,
199
- });
200
-
201
- writeCompletedJsonl(outA, "A done");
202
- writeCompletedJsonl(outB, "B done");
203
- // C still pending
204
-
205
- await mod.pollOnce();
206
- expect(delivered).toHaveLength(2);
207
- expect(delivered.map((d) => d.info.name).sort()).toEqual(["A", "B"]);
208
-
209
- writeCompletedJsonl(outC, "C done");
210
- await mod.pollOnce();
211
- expect(delivered).toHaveLength(3);
212
- });
213
-
214
- it("listPendingAgents reflects in-memory state", async () => {
215
- const mod = await import("../src/services/async-agent-watcher.js");
216
- expect(mod.listPendingAgents()).toEqual([]);
217
- mod.registerPendingAgent({
218
- agentId: "x",
219
- outputFile: `${TEST_DATA_DIR}/out-x.jsonl`,
220
- description: "test",
221
- prompt: "p",
222
- chatId: 1,
223
- userId: 1,
224
- toolUseId: null,
225
- });
226
- expect(mod.listPendingAgents()).toHaveLength(1);
227
- expect(mod.listPendingAgents()[0].agentId).toBe("x");
228
- });
229
- });
@@ -1,443 +0,0 @@
1
- /**
2
- * v4.12.3 — End-to-end integration test for the background-agent bypass
3
- * path. Simulates the following scenario:
4
- *
5
- * 1. User sends a message that causes Claude to launch an async Agent
6
- * 2. While the SDK's CLI subprocess idles waiting for the
7
- * task-notification, user sends a NEW message
8
- * 3. The handler recognises the pending background state and:
9
- * a. Aborts the blocked query
10
- * b. Bypasses SDK resume for the new query (sessionId=null)
11
- * c. Injects bridge preamble with history
12
- * 4. The watcher delivers the background result via
13
- * subagent-delivery.ts as a separate message
14
- * 5. After delivery, pendingBackgroundCount returns to 0 and future
15
- * queries use normal SDK resume again
16
- *
17
- * The full handler is too tightly coupled to grammy to unit-test end
18
- * to end. Instead we exercise each layer directly:
19
- * - session.pendingBackgroundCount updates (counter wiring)
20
- * - shouldBypassQueue / shouldBypassSdkResume decision points
21
- * - watcher delivery → counter decrement
22
- * - abort + wait path
23
- */
24
- import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
25
- import fs from "fs";
26
- import os from "os";
27
- import { resolve } from "path";
28
-
29
- const TEST_DATA_DIR = resolve(
30
- os.tmpdir(),
31
- `alvin-bypass-int-${process.pid}-${Date.now()}`,
32
- );
33
-
34
- beforeEach(async () => {
35
- if (fs.existsSync(TEST_DATA_DIR)) {
36
- fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
37
- }
38
- fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
39
- process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
40
- vi.resetModules();
41
- vi.doMock("../src/services/subagent-delivery.js", () => ({
42
- deliverSubAgentResult: async () => {},
43
- attachBotApi: () => {},
44
- __setBotApiForTest: () => {},
45
- }));
46
- });
47
-
48
- afterEach(async () => {
49
- try {
50
- const mod = await import("../src/services/async-agent-watcher.js");
51
- mod.stopWatcher();
52
- mod.__resetForTest();
53
- } catch {
54
- /* ignore */
55
- }
56
- });
57
-
58
- function writeCompletedJsonl(path: string, finalText: string): void {
59
- const lines =
60
- [
61
- JSON.stringify({
62
- type: "user",
63
- isSidechain: true,
64
- agentId: "x",
65
- message: { role: "user", content: "do it" },
66
- }),
67
- JSON.stringify({
68
- type: "assistant",
69
- isSidechain: true,
70
- agentId: "x",
71
- message: {
72
- role: "assistant",
73
- content: [{ type: "text", text: finalText }],
74
- stop_reason: "end_turn",
75
- usage: { input_tokens: 10, output_tokens: 5 },
76
- },
77
- }),
78
- ].join("\n") + "\n";
79
- fs.mkdirSync(resolve(path, ".."), { recursive: true });
80
- fs.writeFileSync(path, lines, "utf-8");
81
- }
82
-
83
- describe("v4.12.3 background-bypass end-to-end", () => {
84
- it(
85
- "full scenario: async launch → counter incremented → new message triggers bypass → " +
86
- "watcher delivery → counter decremented",
87
- async () => {
88
- const { getSession } = await import("../src/services/session.js");
89
- const { handleToolResultChunk } = await import(
90
- "../src/handlers/async-agent-chunk-handler.js"
91
- );
92
- const watcher = await import("../src/services/async-agent-watcher.js");
93
- const {
94
- shouldBypassQueue,
95
- shouldBypassSdkResume,
96
- } = await import("../src/handlers/background-bypass.js");
97
-
98
- const sessionKey = "int-session-1";
99
- const session = getSession(sessionKey);
100
- expect(session.pendingBackgroundCount).toBe(0);
101
-
102
- // === Step 1: simulate the tool_result chunk for an async launch ===
103
- const outPath = `${TEST_DATA_DIR}/int-out.jsonl`;
104
- handleToolResultChunk(
105
- {
106
- type: "tool_result",
107
- toolUseId: "toolu_int",
108
- toolResultContent:
109
- "Async agent launched successfully.\n" +
110
- "agentId: int-agent\n" +
111
- `output_file: ${outPath}\n`,
112
- },
113
- {
114
- chatId: 42,
115
- userId: 42,
116
- sessionKey,
117
- lastToolUseInput: {
118
- description: "Research Higgsfield",
119
- prompt: "do deep research",
120
- },
121
- },
122
- );
123
-
124
- // === Step 2: counter should have been incremented ===
125
- expect(session.pendingBackgroundCount).toBe(1);
126
-
127
- // === Step 3: simulate the handler noticing isProcessing=true AND
128
- // background pending. shouldBypassQueue must return true so it knows
129
- // to abort-and-replace instead of queueing. ===
130
- session.isProcessing = true;
131
- session.abortController = new AbortController();
132
- expect(
133
- shouldBypassQueue({
134
- isProcessing: session.isProcessing,
135
- pendingBackgroundCount: session.pendingBackgroundCount,
136
- abortController: session.abortController,
137
- }),
138
- ).toBe(true);
139
-
140
- // === Step 4: shouldBypassSdkResume must return true so the fresh
141
- // query uses sessionId=null ===
142
- expect(
143
- shouldBypassSdkResume({
144
- pendingBackgroundCount: session.pendingBackgroundCount,
145
- }),
146
- ).toBe(true);
147
-
148
- // === Step 5: simulate the watcher delivering the background result ===
149
- writeCompletedJsonl(outPath, "Higgsfield research complete");
150
- await watcher.pollOnce();
151
-
152
- // === Step 6: counter should now be 0 again ===
153
- expect(session.pendingBackgroundCount).toBe(0);
154
-
155
- // === Step 7: subsequent queries should NOT bypass resume anymore ===
156
- expect(
157
- shouldBypassSdkResume({
158
- pendingBackgroundCount: session.pendingBackgroundCount,
159
- }),
160
- ).toBe(false);
161
- },
162
- );
163
-
164
- it(
165
- "stress: 5 parallel background agents launched in one turn, " +
166
- "counter reflects all of them, all decrement on delivery",
167
- async () => {
168
- const { getSession } = await import("../src/services/session.js");
169
- const { handleToolResultChunk } = await import(
170
- "../src/handlers/async-agent-chunk-handler.js"
171
- );
172
- const watcher = await import("../src/services/async-agent-watcher.js");
173
-
174
- const sessionKey = "stress-session-5";
175
- const session = getSession(sessionKey);
176
- session.pendingBackgroundCount = 0;
177
-
178
- const outPaths: string[] = [];
179
- for (let i = 0; i < 5; i++) {
180
- const outPath = `${TEST_DATA_DIR}/stress-${i}.jsonl`;
181
- outPaths.push(outPath);
182
- handleToolResultChunk(
183
- {
184
- type: "tool_result",
185
- toolUseId: `toolu_stress_${i}`,
186
- toolResultContent:
187
- "Async agent launched successfully.\n" +
188
- `agentId: stress-${i}\n` +
189
- `output_file: ${outPath}\n`,
190
- },
191
- {
192
- chatId: 42,
193
- userId: 42,
194
- sessionKey,
195
- lastToolUseInput: {
196
- description: `task ${i}`,
197
- prompt: "p",
198
- },
199
- },
200
- );
201
- }
202
- expect(session.pendingBackgroundCount).toBe(5);
203
-
204
- // Deliver 3 of them
205
- for (let i = 0; i < 3; i++) {
206
- writeCompletedJsonl(outPaths[i], `result ${i}`);
207
- }
208
- await watcher.pollOnce();
209
- expect(session.pendingBackgroundCount).toBe(2);
210
-
211
- // Deliver the last 2
212
- writeCompletedJsonl(outPaths[3], "result 3");
213
- writeCompletedJsonl(outPaths[4], "result 4");
214
- await watcher.pollOnce();
215
- expect(session.pendingBackgroundCount).toBe(0);
216
- },
217
- );
218
-
219
- it(
220
- "stress: agents from DIFFERENT sessions do not interfere with each other",
221
- async () => {
222
- const { getSession } = await import("../src/services/session.js");
223
- const { handleToolResultChunk } = await import(
224
- "../src/handlers/async-agent-chunk-handler.js"
225
- );
226
- const watcher = await import("../src/services/async-agent-watcher.js");
227
-
228
- const sessionA = getSession("stress-iso-a");
229
- const sessionB = getSession("stress-iso-b");
230
- const sessionC = getSession("stress-iso-c");
231
- sessionA.pendingBackgroundCount = 0;
232
- sessionB.pendingBackgroundCount = 0;
233
- sessionC.pendingBackgroundCount = 0;
234
-
235
- // Session A launches 2 agents
236
- for (const i of [0, 1]) {
237
- const p = `${TEST_DATA_DIR}/iso-a-${i}.jsonl`;
238
- handleToolResultChunk(
239
- {
240
- type: "tool_result",
241
- toolUseId: `a${i}`,
242
- toolResultContent:
243
- `Async agent launched successfully.\n` +
244
- `agentId: iso-a-${i}\n` +
245
- `output_file: ${p}\n`,
246
- },
247
- {
248
- chatId: 1,
249
- userId: 1,
250
- sessionKey: "stress-iso-a",
251
- lastToolUseInput: { description: "a", prompt: "p" },
252
- },
253
- );
254
- }
255
- // Session B launches 1
256
- handleToolResultChunk(
257
- {
258
- type: "tool_result",
259
- toolUseId: "b0",
260
- toolResultContent:
261
- "Async agent launched successfully.\n" +
262
- "agentId: iso-b-0\n" +
263
- `output_file: ${TEST_DATA_DIR}/iso-b-0.jsonl\n`,
264
- },
265
- {
266
- chatId: 2,
267
- userId: 2,
268
- sessionKey: "stress-iso-b",
269
- lastToolUseInput: { description: "b", prompt: "p" },
270
- },
271
- );
272
- // Session C launches 0
273
-
274
- expect(sessionA.pendingBackgroundCount).toBe(2);
275
- expect(sessionB.pendingBackgroundCount).toBe(1);
276
- expect(sessionC.pendingBackgroundCount).toBe(0);
277
-
278
- // Complete only A's agents
279
- writeCompletedJsonl(`${TEST_DATA_DIR}/iso-a-0.jsonl`, "a0 done");
280
- writeCompletedJsonl(`${TEST_DATA_DIR}/iso-a-1.jsonl`, "a1 done");
281
- await watcher.pollOnce();
282
-
283
- // A should be 0, B should still be 1, C unchanged
284
- expect(sessionA.pendingBackgroundCount).toBe(0);
285
- expect(sessionB.pendingBackgroundCount).toBe(1);
286
- expect(sessionC.pendingBackgroundCount).toBe(0);
287
-
288
- // Complete B's agent
289
- writeCompletedJsonl(`${TEST_DATA_DIR}/iso-b-0.jsonl`, "b0 done");
290
- await watcher.pollOnce();
291
- expect(sessionB.pendingBackgroundCount).toBe(0);
292
- },
293
- );
294
-
295
- it(
296
- "bypass decision is correct through a full lifecycle: " +
297
- "no-pending → launch → pending → deliver → no-pending",
298
- async () => {
299
- const { getSession } = await import("../src/services/session.js");
300
- const { handleToolResultChunk } = await import(
301
- "../src/handlers/async-agent-chunk-handler.js"
302
- );
303
- const watcher = await import("../src/services/async-agent-watcher.js");
304
- const { shouldBypassSdkResume } = await import(
305
- "../src/handlers/background-bypass.js"
306
- );
307
-
308
- const sessionKey = "lifecycle-session";
309
- const session = getSession(sessionKey);
310
- session.pendingBackgroundCount = 0;
311
-
312
- // Initially no bypass
313
- expect(
314
- shouldBypassSdkResume({
315
- pendingBackgroundCount: session.pendingBackgroundCount,
316
- }),
317
- ).toBe(false);
318
-
319
- // Launch
320
- const outPath = `${TEST_DATA_DIR}/lifecycle.jsonl`;
321
- handleToolResultChunk(
322
- {
323
- type: "tool_result",
324
- toolUseId: "t1",
325
- toolResultContent:
326
- "Async agent launched successfully.\n" +
327
- "agentId: life1\n" +
328
- `output_file: ${outPath}\n`,
329
- },
330
- {
331
- chatId: 1,
332
- userId: 1,
333
- sessionKey,
334
- lastToolUseInput: { description: "d", prompt: "p" },
335
- },
336
- );
337
-
338
- // Now bypass
339
- expect(
340
- shouldBypassSdkResume({
341
- pendingBackgroundCount: session.pendingBackgroundCount,
342
- }),
343
- ).toBe(true);
344
-
345
- // Deliver
346
- writeCompletedJsonl(outPath, "life done");
347
- await watcher.pollOnce();
348
-
349
- // Back to no bypass
350
- expect(
351
- shouldBypassSdkResume({
352
- pendingBackgroundCount: session.pendingBackgroundCount,
353
- }),
354
- ).toBe(false);
355
- },
356
- );
357
-
358
- it(
359
- "stress: rapid launch+deliver+launch cycle (10 iterations) — " +
360
- "counter stays consistent, no drift, no negatives",
361
- async () => {
362
- const { getSession } = await import("../src/services/session.js");
363
- const { handleToolResultChunk } = await import(
364
- "../src/handlers/async-agent-chunk-handler.js"
365
- );
366
- const watcher = await import("../src/services/async-agent-watcher.js");
367
-
368
- const sessionKey = "churn-session";
369
- const session = getSession(sessionKey);
370
- session.pendingBackgroundCount = 0;
371
-
372
- for (let i = 0; i < 10; i++) {
373
- const outPath = `${TEST_DATA_DIR}/churn-${i}.jsonl`;
374
- handleToolResultChunk(
375
- {
376
- type: "tool_result",
377
- toolUseId: `churn_${i}`,
378
- toolResultContent:
379
- "Async agent launched successfully.\n" +
380
- `agentId: churn-${i}\n` +
381
- `output_file: ${outPath}\n`,
382
- },
383
- {
384
- chatId: 1,
385
- userId: 1,
386
- sessionKey,
387
- lastToolUseInput: { description: `c${i}`, prompt: "p" },
388
- },
389
- );
390
- expect(session.pendingBackgroundCount).toBe(1);
391
-
392
- writeCompletedJsonl(outPath, `c${i}`);
393
- await watcher.pollOnce();
394
- expect(session.pendingBackgroundCount).toBe(0);
395
- }
396
- },
397
- );
398
-
399
- it(
400
- "watcher decrement is robust against session being reset mid-flight",
401
- async () => {
402
- const { getSession, resetSession } = await import(
403
- "../src/services/session.js"
404
- );
405
- const { handleToolResultChunk } = await import(
406
- "../src/handlers/async-agent-chunk-handler.js"
407
- );
408
- const watcher = await import("../src/services/async-agent-watcher.js");
409
-
410
- const sessionKey = "reset-session";
411
- const session = getSession(sessionKey);
412
- session.pendingBackgroundCount = 0;
413
-
414
- const outPath = `${TEST_DATA_DIR}/reset.jsonl`;
415
- handleToolResultChunk(
416
- {
417
- type: "tool_result",
418
- toolUseId: "t1",
419
- toolResultContent:
420
- "Async agent launched successfully.\n" +
421
- "agentId: reset1\n" +
422
- `output_file: ${outPath}\n`,
423
- },
424
- {
425
- chatId: 1,
426
- userId: 1,
427
- sessionKey,
428
- lastToolUseInput: { description: "d", prompt: "p" },
429
- },
430
- );
431
- expect(session.pendingBackgroundCount).toBe(1);
432
-
433
- // Simulate /new during background task
434
- resetSession(sessionKey);
435
- expect(session.pendingBackgroundCount).toBe(0);
436
-
437
- writeCompletedJsonl(outPath, "done");
438
- // Delivery should not crash, counter stays at 0 (Math.max clamp)
439
- await expect(watcher.pollOnce()).resolves.not.toThrow();
440
- expect(session.pendingBackgroundCount).toBe(0);
441
- },
442
- );
443
- });