alvin-bot 4.18.0 → 4.18.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AEC-PLUGINS-SOURCES.md +53 -0
- package/CHANGELOG.md +37 -2
- package/DESIGN-SKILLS-SOURCES.md +81 -0
- package/bin/cli.js +1 -1
- package/dist/providers/claude-sdk-provider.js +24 -0
- package/package.json +3 -1
- package/test/allowed-users-gate.test.ts +0 -98
- package/test/alvin-dispatch.test.ts +0 -220
- package/test/async-agent-chunk-flow.test.ts +0 -244
- package/test/async-agent-parser-staleness.test.ts +0 -412
- package/test/async-agent-parser-streamjson.test.ts +0 -273
- package/test/async-agent-parser.test.ts +0 -322
- package/test/async-agent-watcher.test.ts +0 -229
- package/test/background-bypass-integration.test.ts +0 -443
- package/test/background-bypass-stress.test.ts +0 -417
- package/test/background-bypass.test.ts +0 -127
- package/test/browser-webfetch.test.ts +0 -121
- package/test/claude-sdk-provider.test.ts +0 -115
- package/test/claude-sdk-tool-use-id.test.ts +0 -180
- package/test/console-timestamps.test.ts +0 -98
- package/test/cron-progress-ticker.test.ts +0 -76
- package/test/cron-restart-resilience.test.ts +0 -191
- package/test/cron-run-resolver.test.ts +0 -133
- package/test/cron-runjobnow-throw.test.ts +0 -100
- package/test/debounce.test.ts +0 -60
- package/test/delivery-registry.test.ts +0 -71
- package/test/exec-guard-metachars.test.ts +0 -110
- package/test/file-permissions.test.ts +0 -130
- package/test/i18n.test.ts +0 -108
- package/test/list-subagents-merged.test.ts +0 -172
- package/test/memory-extractor.test.ts +0 -151
- package/test/memory-layers.test.ts +0 -169
- package/test/memory-sdk-injection.test.ts +0 -146
- package/test/memory-stress-restart.test.ts +0 -337
- package/test/multi-session-stress.test.ts +0 -255
- package/test/platform-session-key.test.ts +0 -69
- package/test/process-manager.test.ts +0 -186
- package/test/registry.test.ts +0 -201
- package/test/session-pending-background.test.ts +0 -59
- package/test/session-persistence.test.ts +0 -195
- package/test/slack-progress-ticker.test.ts +0 -123
- package/test/slack-slash-command.test.ts +0 -61
- package/test/slack-test-connection.test.ts +0 -176
- package/test/stress-scenarios.test.ts +0 -356
- package/test/stuck-timer.test.ts +0 -116
- package/test/subagent-delivery-markdown-fallback.test.ts +0 -147
- package/test/subagent-delivery-platform-routing.test.ts +0 -232
- package/test/subagent-delivery.test.ts +0 -273
- package/test/subagent-final-text.test.ts +0 -132
- package/test/subagent-stats.test.ts +0 -119
- package/test/subagent-toolset-allowlist.test.ts +0 -146
- package/test/subagents-commands.test.ts +0 -64
- package/test/subagents-config.test.ts +0 -114
- package/test/subagents-depth.test.ts +0 -58
- package/test/subagents-inheritance.test.ts +0 -67
- package/test/subagents-name-resolver.test.ts +0 -122
- package/test/subagents-priority-reject.test.ts +0 -88
- package/test/subagents-queue.test.ts +0 -127
- package/test/subagents-shutdown.test.ts +0 -126
- package/test/subagents-toolset.test.ts +0 -71
- package/test/sync-task-timeout.test.ts +0 -153
- package/test/system-prompt-background-hint.test.ts +0 -65
- package/test/telegram-error-filter.test.ts +0 -85
- package/test/telegram-workspace-command.test.ts +0 -78
- package/test/timing-safe-bearer.test.ts +0 -65
- package/test/watchdog-brake.test.ts +0 -157
- package/test/watcher-pending-count.test.ts +0 -228
- package/test/watcher-zombie-fix.test.ts +0 -252
- package/test/web-server-integration.test.ts +0 -189
- package/test/web-server-resilience.test.ts +0 -118
- package/test/web-server-shutdown.test.ts +0 -117
- package/test/whatsapp-auth-resilience.test.ts +0 -96
- package/test/workspaces.test.ts +0 -196
- package/vitest.config.ts +0 -17
|
@@ -1,229 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Fix #17 (Stage 2) — async-agent-watcher integration tests.
|
|
3
|
-
*
|
|
4
|
-
* The watcher polls outputFiles of pending agents, detects completion,
|
|
5
|
-
* delivers via subagent-delivery.ts, and persists state to disk so the
|
|
6
|
-
* pending list survives bot restarts.
|
|
7
|
-
*/
|
|
8
|
-
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
|
9
|
-
import fs from "fs";
|
|
10
|
-
import os from "os";
|
|
11
|
-
import { resolve } from "path";
|
|
12
|
-
|
|
13
|
-
const TEST_DATA_DIR = resolve(os.tmpdir(), `alvin-async-watcher-${process.pid}-${Date.now()}`);
|
|
14
|
-
|
|
15
|
-
interface DeliveredCall {
|
|
16
|
-
info: { name: string; source?: string; parentChatId?: number; status?: string };
|
|
17
|
-
result: { status: string; output: string; duration: number; error?: string };
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
let delivered: DeliveredCall[] = [];
|
|
21
|
-
|
|
22
|
-
beforeEach(async () => {
|
|
23
|
-
if (fs.existsSync(TEST_DATA_DIR)) fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
|
24
|
-
fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
|
|
25
|
-
process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
|
|
26
|
-
delivered = [];
|
|
27
|
-
vi.resetModules();
|
|
28
|
-
vi.doMock("../src/services/subagent-delivery.js", () => ({
|
|
29
|
-
deliverSubAgentResult: async (info: unknown, result: unknown) => {
|
|
30
|
-
delivered.push({ info: info as DeliveredCall["info"], result: result as DeliveredCall["result"] });
|
|
31
|
-
},
|
|
32
|
-
attachBotApi: () => {},
|
|
33
|
-
__setBotApiForTest: () => {},
|
|
34
|
-
}));
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
afterEach(async () => {
|
|
38
|
-
try {
|
|
39
|
-
const mod = await import("../src/services/async-agent-watcher.js");
|
|
40
|
-
mod.stopWatcher();
|
|
41
|
-
mod.__resetForTest();
|
|
42
|
-
} catch { /* ignore */ }
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
function writeCompletedJsonl(path: string, finalText: string): void {
|
|
46
|
-
const lines = [
|
|
47
|
-
JSON.stringify({
|
|
48
|
-
type: "user",
|
|
49
|
-
isSidechain: true,
|
|
50
|
-
agentId: "x",
|
|
51
|
-
message: { role: "user", content: "do it" },
|
|
52
|
-
}),
|
|
53
|
-
JSON.stringify({
|
|
54
|
-
type: "assistant",
|
|
55
|
-
isSidechain: true,
|
|
56
|
-
agentId: "x",
|
|
57
|
-
message: {
|
|
58
|
-
role: "assistant",
|
|
59
|
-
content: [{ type: "text", text: finalText }],
|
|
60
|
-
stop_reason: "end_turn",
|
|
61
|
-
usage: { input_tokens: 100, output_tokens: 50 },
|
|
62
|
-
},
|
|
63
|
-
}),
|
|
64
|
-
].join("\n") + "\n";
|
|
65
|
-
fs.mkdirSync(resolve(path, ".."), { recursive: true });
|
|
66
|
-
fs.writeFileSync(path, lines, "utf-8");
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
describe("async-agent-watcher (Stage 2)", () => {
|
|
70
|
-
it("registers a pending agent and persists it to disk", async () => {
|
|
71
|
-
const mod = await import("../src/services/async-agent-watcher.js");
|
|
72
|
-
mod.registerPendingAgent({
|
|
73
|
-
agentId: "abc-1",
|
|
74
|
-
outputFile: `${TEST_DATA_DIR}/out-abc-1.jsonl`,
|
|
75
|
-
description: "Test SEO audit",
|
|
76
|
-
prompt: "do a test",
|
|
77
|
-
chatId: 42,
|
|
78
|
-
userId: 42,
|
|
79
|
-
toolUseId: "toolu_1",
|
|
80
|
-
});
|
|
81
|
-
const stateFile = `${TEST_DATA_DIR}/state/async-agents.json`;
|
|
82
|
-
expect(fs.existsSync(stateFile)).toBe(true);
|
|
83
|
-
const parsed = JSON.parse(fs.readFileSync(stateFile, "utf-8"));
|
|
84
|
-
expect(parsed).toHaveLength(1);
|
|
85
|
-
expect(parsed[0].agentId).toBe("abc-1");
|
|
86
|
-
expect(parsed[0].description).toBe("Test SEO audit");
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
it("delivers a pending agent when its outputFile completes", async () => {
|
|
90
|
-
const mod = await import("../src/services/async-agent-watcher.js");
|
|
91
|
-
const outPath = `${TEST_DATA_DIR}/out-abc-2.jsonl`;
|
|
92
|
-
mod.registerPendingAgent({
|
|
93
|
-
agentId: "abc-2",
|
|
94
|
-
outputFile: outPath,
|
|
95
|
-
description: "quick task",
|
|
96
|
-
prompt: "p",
|
|
97
|
-
chatId: 42,
|
|
98
|
-
userId: 42,
|
|
99
|
-
toolUseId: null,
|
|
100
|
-
});
|
|
101
|
-
writeCompletedJsonl(outPath, "Here is the report");
|
|
102
|
-
|
|
103
|
-
await mod.pollOnce();
|
|
104
|
-
|
|
105
|
-
expect(delivered).toHaveLength(1);
|
|
106
|
-
expect(delivered[0].info.name).toBe("quick task");
|
|
107
|
-
expect(delivered[0].result.output).toContain("Here is the report");
|
|
108
|
-
expect(delivered[0].result.status).toBe("completed");
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
it("removes a pending agent from persistence after delivery", async () => {
|
|
112
|
-
const mod = await import("../src/services/async-agent-watcher.js");
|
|
113
|
-
const outPath = `${TEST_DATA_DIR}/out-abc-3.jsonl`;
|
|
114
|
-
mod.registerPendingAgent({
|
|
115
|
-
agentId: "abc-3",
|
|
116
|
-
outputFile: outPath,
|
|
117
|
-
description: "cleanup test",
|
|
118
|
-
prompt: "p",
|
|
119
|
-
chatId: 42,
|
|
120
|
-
userId: 42,
|
|
121
|
-
toolUseId: null,
|
|
122
|
-
});
|
|
123
|
-
writeCompletedJsonl(outPath, "done");
|
|
124
|
-
await mod.pollOnce();
|
|
125
|
-
|
|
126
|
-
const stateFile = `${TEST_DATA_DIR}/state/async-agents.json`;
|
|
127
|
-
const state = JSON.parse(fs.readFileSync(stateFile, "utf-8"));
|
|
128
|
-
expect(state).toHaveLength(0);
|
|
129
|
-
});
|
|
130
|
-
|
|
131
|
-
it("loads pending agents from disk at startup (bot restart catchup)", async () => {
|
|
132
|
-
fs.mkdirSync(`${TEST_DATA_DIR}/state`, { recursive: true });
|
|
133
|
-
const outPath = `${TEST_DATA_DIR}/out-preexisting.jsonl`;
|
|
134
|
-
fs.writeFileSync(
|
|
135
|
-
`${TEST_DATA_DIR}/state/async-agents.json`,
|
|
136
|
-
JSON.stringify([
|
|
137
|
-
{
|
|
138
|
-
agentId: "preexisting",
|
|
139
|
-
outputFile: outPath,
|
|
140
|
-
description: "Survived restart",
|
|
141
|
-
prompt: "p",
|
|
142
|
-
chatId: 42,
|
|
143
|
-
userId: 42,
|
|
144
|
-
startedAt: Date.now() - 5000,
|
|
145
|
-
lastCheckedAt: Date.now() - 1000,
|
|
146
|
-
giveUpAt: Date.now() + 86_400_000,
|
|
147
|
-
toolUseId: null,
|
|
148
|
-
},
|
|
149
|
-
]),
|
|
150
|
-
);
|
|
151
|
-
writeCompletedJsonl(outPath, "result from earlier session");
|
|
152
|
-
|
|
153
|
-
const mod = await import("../src/services/async-agent-watcher.js");
|
|
154
|
-
mod.startWatcher();
|
|
155
|
-
await mod.pollOnce();
|
|
156
|
-
|
|
157
|
-
expect(delivered).toHaveLength(1);
|
|
158
|
-
expect(delivered[0].info.name).toBe("Survived restart");
|
|
159
|
-
expect(delivered[0].result.output).toContain("result from earlier session");
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
it("gives up on agents older than giveUpAt and delivers a timeout banner", async () => {
|
|
163
|
-
const mod = await import("../src/services/async-agent-watcher.js");
|
|
164
|
-
const outPath = `${TEST_DATA_DIR}/out-timeout.jsonl`;
|
|
165
|
-
mod.registerPendingAgent({
|
|
166
|
-
agentId: "abc-4",
|
|
167
|
-
outputFile: outPath,
|
|
168
|
-
description: "forever task",
|
|
169
|
-
prompt: "p",
|
|
170
|
-
chatId: 42,
|
|
171
|
-
userId: 42,
|
|
172
|
-
toolUseId: null,
|
|
173
|
-
giveUpAt: Date.now() - 1000,
|
|
174
|
-
});
|
|
175
|
-
// File never exists
|
|
176
|
-
await mod.pollOnce();
|
|
177
|
-
|
|
178
|
-
expect(delivered).toHaveLength(1);
|
|
179
|
-
expect(delivered[0].result.status).toBe("timeout");
|
|
180
|
-
expect(delivered[0].info.status).toBe("timeout");
|
|
181
|
-
});
|
|
182
|
-
|
|
183
|
-
it("multiple concurrent pending agents all get delivered as they complete", async () => {
|
|
184
|
-
const mod = await import("../src/services/async-agent-watcher.js");
|
|
185
|
-
const outA = `${TEST_DATA_DIR}/out-a.jsonl`;
|
|
186
|
-
const outB = `${TEST_DATA_DIR}/out-b.jsonl`;
|
|
187
|
-
const outC = `${TEST_DATA_DIR}/out-c.jsonl`;
|
|
188
|
-
mod.registerPendingAgent({
|
|
189
|
-
agentId: "a", outputFile: outA, description: "A",
|
|
190
|
-
prompt: "p", chatId: 1, userId: 1, toolUseId: null,
|
|
191
|
-
});
|
|
192
|
-
mod.registerPendingAgent({
|
|
193
|
-
agentId: "b", outputFile: outB, description: "B",
|
|
194
|
-
prompt: "p", chatId: 2, userId: 2, toolUseId: null,
|
|
195
|
-
});
|
|
196
|
-
mod.registerPendingAgent({
|
|
197
|
-
agentId: "c", outputFile: outC, description: "C",
|
|
198
|
-
prompt: "p", chatId: 3, userId: 3, toolUseId: null,
|
|
199
|
-
});
|
|
200
|
-
|
|
201
|
-
writeCompletedJsonl(outA, "A done");
|
|
202
|
-
writeCompletedJsonl(outB, "B done");
|
|
203
|
-
// C still pending
|
|
204
|
-
|
|
205
|
-
await mod.pollOnce();
|
|
206
|
-
expect(delivered).toHaveLength(2);
|
|
207
|
-
expect(delivered.map((d) => d.info.name).sort()).toEqual(["A", "B"]);
|
|
208
|
-
|
|
209
|
-
writeCompletedJsonl(outC, "C done");
|
|
210
|
-
await mod.pollOnce();
|
|
211
|
-
expect(delivered).toHaveLength(3);
|
|
212
|
-
});
|
|
213
|
-
|
|
214
|
-
it("listPendingAgents reflects in-memory state", async () => {
|
|
215
|
-
const mod = await import("../src/services/async-agent-watcher.js");
|
|
216
|
-
expect(mod.listPendingAgents()).toEqual([]);
|
|
217
|
-
mod.registerPendingAgent({
|
|
218
|
-
agentId: "x",
|
|
219
|
-
outputFile: `${TEST_DATA_DIR}/out-x.jsonl`,
|
|
220
|
-
description: "test",
|
|
221
|
-
prompt: "p",
|
|
222
|
-
chatId: 1,
|
|
223
|
-
userId: 1,
|
|
224
|
-
toolUseId: null,
|
|
225
|
-
});
|
|
226
|
-
expect(mod.listPendingAgents()).toHaveLength(1);
|
|
227
|
-
expect(mod.listPendingAgents()[0].agentId).toBe("x");
|
|
228
|
-
});
|
|
229
|
-
});
|
|
@@ -1,443 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* v4.12.3 — End-to-end integration test for the background-agent bypass
|
|
3
|
-
* path. Simulates the following scenario:
|
|
4
|
-
*
|
|
5
|
-
* 1. User sends a message that causes Claude to launch an async Agent
|
|
6
|
-
* 2. While the SDK's CLI subprocess idles waiting for the
|
|
7
|
-
* task-notification, user sends a NEW message
|
|
8
|
-
* 3. The handler recognises the pending background state and:
|
|
9
|
-
* a. Aborts the blocked query
|
|
10
|
-
* b. Bypasses SDK resume for the new query (sessionId=null)
|
|
11
|
-
* c. Injects bridge preamble with history
|
|
12
|
-
* 4. The watcher delivers the background result via
|
|
13
|
-
* subagent-delivery.ts as a separate message
|
|
14
|
-
* 5. After delivery, pendingBackgroundCount returns to 0 and future
|
|
15
|
-
* queries use normal SDK resume again
|
|
16
|
-
*
|
|
17
|
-
* The full handler is too tightly coupled to grammy to unit-test end
|
|
18
|
-
* to end. Instead we exercise each layer directly:
|
|
19
|
-
* - session.pendingBackgroundCount updates (counter wiring)
|
|
20
|
-
* - shouldBypassQueue / shouldBypassSdkResume decision points
|
|
21
|
-
* - watcher delivery → counter decrement
|
|
22
|
-
* - abort + wait path
|
|
23
|
-
*/
|
|
24
|
-
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
|
25
|
-
import fs from "fs";
|
|
26
|
-
import os from "os";
|
|
27
|
-
import { resolve } from "path";
|
|
28
|
-
|
|
29
|
-
const TEST_DATA_DIR = resolve(
|
|
30
|
-
os.tmpdir(),
|
|
31
|
-
`alvin-bypass-int-${process.pid}-${Date.now()}`,
|
|
32
|
-
);
|
|
33
|
-
|
|
34
|
-
beforeEach(async () => {
|
|
35
|
-
if (fs.existsSync(TEST_DATA_DIR)) {
|
|
36
|
-
fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
|
37
|
-
}
|
|
38
|
-
fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
|
|
39
|
-
process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
|
|
40
|
-
vi.resetModules();
|
|
41
|
-
vi.doMock("../src/services/subagent-delivery.js", () => ({
|
|
42
|
-
deliverSubAgentResult: async () => {},
|
|
43
|
-
attachBotApi: () => {},
|
|
44
|
-
__setBotApiForTest: () => {},
|
|
45
|
-
}));
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
afterEach(async () => {
|
|
49
|
-
try {
|
|
50
|
-
const mod = await import("../src/services/async-agent-watcher.js");
|
|
51
|
-
mod.stopWatcher();
|
|
52
|
-
mod.__resetForTest();
|
|
53
|
-
} catch {
|
|
54
|
-
/* ignore */
|
|
55
|
-
}
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
function writeCompletedJsonl(path: string, finalText: string): void {
|
|
59
|
-
const lines =
|
|
60
|
-
[
|
|
61
|
-
JSON.stringify({
|
|
62
|
-
type: "user",
|
|
63
|
-
isSidechain: true,
|
|
64
|
-
agentId: "x",
|
|
65
|
-
message: { role: "user", content: "do it" },
|
|
66
|
-
}),
|
|
67
|
-
JSON.stringify({
|
|
68
|
-
type: "assistant",
|
|
69
|
-
isSidechain: true,
|
|
70
|
-
agentId: "x",
|
|
71
|
-
message: {
|
|
72
|
-
role: "assistant",
|
|
73
|
-
content: [{ type: "text", text: finalText }],
|
|
74
|
-
stop_reason: "end_turn",
|
|
75
|
-
usage: { input_tokens: 10, output_tokens: 5 },
|
|
76
|
-
},
|
|
77
|
-
}),
|
|
78
|
-
].join("\n") + "\n";
|
|
79
|
-
fs.mkdirSync(resolve(path, ".."), { recursive: true });
|
|
80
|
-
fs.writeFileSync(path, lines, "utf-8");
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
describe("v4.12.3 background-bypass end-to-end", () => {
|
|
84
|
-
it(
|
|
85
|
-
"full scenario: async launch → counter incremented → new message triggers bypass → " +
|
|
86
|
-
"watcher delivery → counter decremented",
|
|
87
|
-
async () => {
|
|
88
|
-
const { getSession } = await import("../src/services/session.js");
|
|
89
|
-
const { handleToolResultChunk } = await import(
|
|
90
|
-
"../src/handlers/async-agent-chunk-handler.js"
|
|
91
|
-
);
|
|
92
|
-
const watcher = await import("../src/services/async-agent-watcher.js");
|
|
93
|
-
const {
|
|
94
|
-
shouldBypassQueue,
|
|
95
|
-
shouldBypassSdkResume,
|
|
96
|
-
} = await import("../src/handlers/background-bypass.js");
|
|
97
|
-
|
|
98
|
-
const sessionKey = "int-session-1";
|
|
99
|
-
const session = getSession(sessionKey);
|
|
100
|
-
expect(session.pendingBackgroundCount).toBe(0);
|
|
101
|
-
|
|
102
|
-
// === Step 1: simulate the tool_result chunk for an async launch ===
|
|
103
|
-
const outPath = `${TEST_DATA_DIR}/int-out.jsonl`;
|
|
104
|
-
handleToolResultChunk(
|
|
105
|
-
{
|
|
106
|
-
type: "tool_result",
|
|
107
|
-
toolUseId: "toolu_int",
|
|
108
|
-
toolResultContent:
|
|
109
|
-
"Async agent launched successfully.\n" +
|
|
110
|
-
"agentId: int-agent\n" +
|
|
111
|
-
`output_file: ${outPath}\n`,
|
|
112
|
-
},
|
|
113
|
-
{
|
|
114
|
-
chatId: 42,
|
|
115
|
-
userId: 42,
|
|
116
|
-
sessionKey,
|
|
117
|
-
lastToolUseInput: {
|
|
118
|
-
description: "Research Higgsfield",
|
|
119
|
-
prompt: "do deep research",
|
|
120
|
-
},
|
|
121
|
-
},
|
|
122
|
-
);
|
|
123
|
-
|
|
124
|
-
// === Step 2: counter should have been incremented ===
|
|
125
|
-
expect(session.pendingBackgroundCount).toBe(1);
|
|
126
|
-
|
|
127
|
-
// === Step 3: simulate the handler noticing isProcessing=true AND
|
|
128
|
-
// background pending. shouldBypassQueue must return true so it knows
|
|
129
|
-
// to abort-and-replace instead of queueing. ===
|
|
130
|
-
session.isProcessing = true;
|
|
131
|
-
session.abortController = new AbortController();
|
|
132
|
-
expect(
|
|
133
|
-
shouldBypassQueue({
|
|
134
|
-
isProcessing: session.isProcessing,
|
|
135
|
-
pendingBackgroundCount: session.pendingBackgroundCount,
|
|
136
|
-
abortController: session.abortController,
|
|
137
|
-
}),
|
|
138
|
-
).toBe(true);
|
|
139
|
-
|
|
140
|
-
// === Step 4: shouldBypassSdkResume must return true so the fresh
|
|
141
|
-
// query uses sessionId=null ===
|
|
142
|
-
expect(
|
|
143
|
-
shouldBypassSdkResume({
|
|
144
|
-
pendingBackgroundCount: session.pendingBackgroundCount,
|
|
145
|
-
}),
|
|
146
|
-
).toBe(true);
|
|
147
|
-
|
|
148
|
-
// === Step 5: simulate the watcher delivering the background result ===
|
|
149
|
-
writeCompletedJsonl(outPath, "Higgsfield research complete");
|
|
150
|
-
await watcher.pollOnce();
|
|
151
|
-
|
|
152
|
-
// === Step 6: counter should now be 0 again ===
|
|
153
|
-
expect(session.pendingBackgroundCount).toBe(0);
|
|
154
|
-
|
|
155
|
-
// === Step 7: subsequent queries should NOT bypass resume anymore ===
|
|
156
|
-
expect(
|
|
157
|
-
shouldBypassSdkResume({
|
|
158
|
-
pendingBackgroundCount: session.pendingBackgroundCount,
|
|
159
|
-
}),
|
|
160
|
-
).toBe(false);
|
|
161
|
-
},
|
|
162
|
-
);
|
|
163
|
-
|
|
164
|
-
it(
|
|
165
|
-
"stress: 5 parallel background agents launched in one turn, " +
|
|
166
|
-
"counter reflects all of them, all decrement on delivery",
|
|
167
|
-
async () => {
|
|
168
|
-
const { getSession } = await import("../src/services/session.js");
|
|
169
|
-
const { handleToolResultChunk } = await import(
|
|
170
|
-
"../src/handlers/async-agent-chunk-handler.js"
|
|
171
|
-
);
|
|
172
|
-
const watcher = await import("../src/services/async-agent-watcher.js");
|
|
173
|
-
|
|
174
|
-
const sessionKey = "stress-session-5";
|
|
175
|
-
const session = getSession(sessionKey);
|
|
176
|
-
session.pendingBackgroundCount = 0;
|
|
177
|
-
|
|
178
|
-
const outPaths: string[] = [];
|
|
179
|
-
for (let i = 0; i < 5; i++) {
|
|
180
|
-
const outPath = `${TEST_DATA_DIR}/stress-${i}.jsonl`;
|
|
181
|
-
outPaths.push(outPath);
|
|
182
|
-
handleToolResultChunk(
|
|
183
|
-
{
|
|
184
|
-
type: "tool_result",
|
|
185
|
-
toolUseId: `toolu_stress_${i}`,
|
|
186
|
-
toolResultContent:
|
|
187
|
-
"Async agent launched successfully.\n" +
|
|
188
|
-
`agentId: stress-${i}\n` +
|
|
189
|
-
`output_file: ${outPath}\n`,
|
|
190
|
-
},
|
|
191
|
-
{
|
|
192
|
-
chatId: 42,
|
|
193
|
-
userId: 42,
|
|
194
|
-
sessionKey,
|
|
195
|
-
lastToolUseInput: {
|
|
196
|
-
description: `task ${i}`,
|
|
197
|
-
prompt: "p",
|
|
198
|
-
},
|
|
199
|
-
},
|
|
200
|
-
);
|
|
201
|
-
}
|
|
202
|
-
expect(session.pendingBackgroundCount).toBe(5);
|
|
203
|
-
|
|
204
|
-
// Deliver 3 of them
|
|
205
|
-
for (let i = 0; i < 3; i++) {
|
|
206
|
-
writeCompletedJsonl(outPaths[i], `result ${i}`);
|
|
207
|
-
}
|
|
208
|
-
await watcher.pollOnce();
|
|
209
|
-
expect(session.pendingBackgroundCount).toBe(2);
|
|
210
|
-
|
|
211
|
-
// Deliver the last 2
|
|
212
|
-
writeCompletedJsonl(outPaths[3], "result 3");
|
|
213
|
-
writeCompletedJsonl(outPaths[4], "result 4");
|
|
214
|
-
await watcher.pollOnce();
|
|
215
|
-
expect(session.pendingBackgroundCount).toBe(0);
|
|
216
|
-
},
|
|
217
|
-
);
|
|
218
|
-
|
|
219
|
-
it(
|
|
220
|
-
"stress: agents from DIFFERENT sessions do not interfere with each other",
|
|
221
|
-
async () => {
|
|
222
|
-
const { getSession } = await import("../src/services/session.js");
|
|
223
|
-
const { handleToolResultChunk } = await import(
|
|
224
|
-
"../src/handlers/async-agent-chunk-handler.js"
|
|
225
|
-
);
|
|
226
|
-
const watcher = await import("../src/services/async-agent-watcher.js");
|
|
227
|
-
|
|
228
|
-
const sessionA = getSession("stress-iso-a");
|
|
229
|
-
const sessionB = getSession("stress-iso-b");
|
|
230
|
-
const sessionC = getSession("stress-iso-c");
|
|
231
|
-
sessionA.pendingBackgroundCount = 0;
|
|
232
|
-
sessionB.pendingBackgroundCount = 0;
|
|
233
|
-
sessionC.pendingBackgroundCount = 0;
|
|
234
|
-
|
|
235
|
-
// Session A launches 2 agents
|
|
236
|
-
for (const i of [0, 1]) {
|
|
237
|
-
const p = `${TEST_DATA_DIR}/iso-a-${i}.jsonl`;
|
|
238
|
-
handleToolResultChunk(
|
|
239
|
-
{
|
|
240
|
-
type: "tool_result",
|
|
241
|
-
toolUseId: `a${i}`,
|
|
242
|
-
toolResultContent:
|
|
243
|
-
`Async agent launched successfully.\n` +
|
|
244
|
-
`agentId: iso-a-${i}\n` +
|
|
245
|
-
`output_file: ${p}\n`,
|
|
246
|
-
},
|
|
247
|
-
{
|
|
248
|
-
chatId: 1,
|
|
249
|
-
userId: 1,
|
|
250
|
-
sessionKey: "stress-iso-a",
|
|
251
|
-
lastToolUseInput: { description: "a", prompt: "p" },
|
|
252
|
-
},
|
|
253
|
-
);
|
|
254
|
-
}
|
|
255
|
-
// Session B launches 1
|
|
256
|
-
handleToolResultChunk(
|
|
257
|
-
{
|
|
258
|
-
type: "tool_result",
|
|
259
|
-
toolUseId: "b0",
|
|
260
|
-
toolResultContent:
|
|
261
|
-
"Async agent launched successfully.\n" +
|
|
262
|
-
"agentId: iso-b-0\n" +
|
|
263
|
-
`output_file: ${TEST_DATA_DIR}/iso-b-0.jsonl\n`,
|
|
264
|
-
},
|
|
265
|
-
{
|
|
266
|
-
chatId: 2,
|
|
267
|
-
userId: 2,
|
|
268
|
-
sessionKey: "stress-iso-b",
|
|
269
|
-
lastToolUseInput: { description: "b", prompt: "p" },
|
|
270
|
-
},
|
|
271
|
-
);
|
|
272
|
-
// Session C launches 0
|
|
273
|
-
|
|
274
|
-
expect(sessionA.pendingBackgroundCount).toBe(2);
|
|
275
|
-
expect(sessionB.pendingBackgroundCount).toBe(1);
|
|
276
|
-
expect(sessionC.pendingBackgroundCount).toBe(0);
|
|
277
|
-
|
|
278
|
-
// Complete only A's agents
|
|
279
|
-
writeCompletedJsonl(`${TEST_DATA_DIR}/iso-a-0.jsonl`, "a0 done");
|
|
280
|
-
writeCompletedJsonl(`${TEST_DATA_DIR}/iso-a-1.jsonl`, "a1 done");
|
|
281
|
-
await watcher.pollOnce();
|
|
282
|
-
|
|
283
|
-
// A should be 0, B should still be 1, C unchanged
|
|
284
|
-
expect(sessionA.pendingBackgroundCount).toBe(0);
|
|
285
|
-
expect(sessionB.pendingBackgroundCount).toBe(1);
|
|
286
|
-
expect(sessionC.pendingBackgroundCount).toBe(0);
|
|
287
|
-
|
|
288
|
-
// Complete B's agent
|
|
289
|
-
writeCompletedJsonl(`${TEST_DATA_DIR}/iso-b-0.jsonl`, "b0 done");
|
|
290
|
-
await watcher.pollOnce();
|
|
291
|
-
expect(sessionB.pendingBackgroundCount).toBe(0);
|
|
292
|
-
},
|
|
293
|
-
);
|
|
294
|
-
|
|
295
|
-
it(
|
|
296
|
-
"bypass decision is correct through a full lifecycle: " +
|
|
297
|
-
"no-pending → launch → pending → deliver → no-pending",
|
|
298
|
-
async () => {
|
|
299
|
-
const { getSession } = await import("../src/services/session.js");
|
|
300
|
-
const { handleToolResultChunk } = await import(
|
|
301
|
-
"../src/handlers/async-agent-chunk-handler.js"
|
|
302
|
-
);
|
|
303
|
-
const watcher = await import("../src/services/async-agent-watcher.js");
|
|
304
|
-
const { shouldBypassSdkResume } = await import(
|
|
305
|
-
"../src/handlers/background-bypass.js"
|
|
306
|
-
);
|
|
307
|
-
|
|
308
|
-
const sessionKey = "lifecycle-session";
|
|
309
|
-
const session = getSession(sessionKey);
|
|
310
|
-
session.pendingBackgroundCount = 0;
|
|
311
|
-
|
|
312
|
-
// Initially no bypass
|
|
313
|
-
expect(
|
|
314
|
-
shouldBypassSdkResume({
|
|
315
|
-
pendingBackgroundCount: session.pendingBackgroundCount,
|
|
316
|
-
}),
|
|
317
|
-
).toBe(false);
|
|
318
|
-
|
|
319
|
-
// Launch
|
|
320
|
-
const outPath = `${TEST_DATA_DIR}/lifecycle.jsonl`;
|
|
321
|
-
handleToolResultChunk(
|
|
322
|
-
{
|
|
323
|
-
type: "tool_result",
|
|
324
|
-
toolUseId: "t1",
|
|
325
|
-
toolResultContent:
|
|
326
|
-
"Async agent launched successfully.\n" +
|
|
327
|
-
"agentId: life1\n" +
|
|
328
|
-
`output_file: ${outPath}\n`,
|
|
329
|
-
},
|
|
330
|
-
{
|
|
331
|
-
chatId: 1,
|
|
332
|
-
userId: 1,
|
|
333
|
-
sessionKey,
|
|
334
|
-
lastToolUseInput: { description: "d", prompt: "p" },
|
|
335
|
-
},
|
|
336
|
-
);
|
|
337
|
-
|
|
338
|
-
// Now bypass
|
|
339
|
-
expect(
|
|
340
|
-
shouldBypassSdkResume({
|
|
341
|
-
pendingBackgroundCount: session.pendingBackgroundCount,
|
|
342
|
-
}),
|
|
343
|
-
).toBe(true);
|
|
344
|
-
|
|
345
|
-
// Deliver
|
|
346
|
-
writeCompletedJsonl(outPath, "life done");
|
|
347
|
-
await watcher.pollOnce();
|
|
348
|
-
|
|
349
|
-
// Back to no bypass
|
|
350
|
-
expect(
|
|
351
|
-
shouldBypassSdkResume({
|
|
352
|
-
pendingBackgroundCount: session.pendingBackgroundCount,
|
|
353
|
-
}),
|
|
354
|
-
).toBe(false);
|
|
355
|
-
},
|
|
356
|
-
);
|
|
357
|
-
|
|
358
|
-
it(
|
|
359
|
-
"stress: rapid launch+deliver+launch cycle (10 iterations) — " +
|
|
360
|
-
"counter stays consistent, no drift, no negatives",
|
|
361
|
-
async () => {
|
|
362
|
-
const { getSession } = await import("../src/services/session.js");
|
|
363
|
-
const { handleToolResultChunk } = await import(
|
|
364
|
-
"../src/handlers/async-agent-chunk-handler.js"
|
|
365
|
-
);
|
|
366
|
-
const watcher = await import("../src/services/async-agent-watcher.js");
|
|
367
|
-
|
|
368
|
-
const sessionKey = "churn-session";
|
|
369
|
-
const session = getSession(sessionKey);
|
|
370
|
-
session.pendingBackgroundCount = 0;
|
|
371
|
-
|
|
372
|
-
for (let i = 0; i < 10; i++) {
|
|
373
|
-
const outPath = `${TEST_DATA_DIR}/churn-${i}.jsonl`;
|
|
374
|
-
handleToolResultChunk(
|
|
375
|
-
{
|
|
376
|
-
type: "tool_result",
|
|
377
|
-
toolUseId: `churn_${i}`,
|
|
378
|
-
toolResultContent:
|
|
379
|
-
"Async agent launched successfully.\n" +
|
|
380
|
-
`agentId: churn-${i}\n` +
|
|
381
|
-
`output_file: ${outPath}\n`,
|
|
382
|
-
},
|
|
383
|
-
{
|
|
384
|
-
chatId: 1,
|
|
385
|
-
userId: 1,
|
|
386
|
-
sessionKey,
|
|
387
|
-
lastToolUseInput: { description: `c${i}`, prompt: "p" },
|
|
388
|
-
},
|
|
389
|
-
);
|
|
390
|
-
expect(session.pendingBackgroundCount).toBe(1);
|
|
391
|
-
|
|
392
|
-
writeCompletedJsonl(outPath, `c${i}`);
|
|
393
|
-
await watcher.pollOnce();
|
|
394
|
-
expect(session.pendingBackgroundCount).toBe(0);
|
|
395
|
-
}
|
|
396
|
-
},
|
|
397
|
-
);
|
|
398
|
-
|
|
399
|
-
it(
|
|
400
|
-
"watcher decrement is robust against session being reset mid-flight",
|
|
401
|
-
async () => {
|
|
402
|
-
const { getSession, resetSession } = await import(
|
|
403
|
-
"../src/services/session.js"
|
|
404
|
-
);
|
|
405
|
-
const { handleToolResultChunk } = await import(
|
|
406
|
-
"../src/handlers/async-agent-chunk-handler.js"
|
|
407
|
-
);
|
|
408
|
-
const watcher = await import("../src/services/async-agent-watcher.js");
|
|
409
|
-
|
|
410
|
-
const sessionKey = "reset-session";
|
|
411
|
-
const session = getSession(sessionKey);
|
|
412
|
-
session.pendingBackgroundCount = 0;
|
|
413
|
-
|
|
414
|
-
const outPath = `${TEST_DATA_DIR}/reset.jsonl`;
|
|
415
|
-
handleToolResultChunk(
|
|
416
|
-
{
|
|
417
|
-
type: "tool_result",
|
|
418
|
-
toolUseId: "t1",
|
|
419
|
-
toolResultContent:
|
|
420
|
-
"Async agent launched successfully.\n" +
|
|
421
|
-
"agentId: reset1\n" +
|
|
422
|
-
`output_file: ${outPath}\n`,
|
|
423
|
-
},
|
|
424
|
-
{
|
|
425
|
-
chatId: 1,
|
|
426
|
-
userId: 1,
|
|
427
|
-
sessionKey,
|
|
428
|
-
lastToolUseInput: { description: "d", prompt: "p" },
|
|
429
|
-
},
|
|
430
|
-
);
|
|
431
|
-
expect(session.pendingBackgroundCount).toBe(1);
|
|
432
|
-
|
|
433
|
-
// Simulate /new during background task
|
|
434
|
-
resetSession(sessionKey);
|
|
435
|
-
expect(session.pendingBackgroundCount).toBe(0);
|
|
436
|
-
|
|
437
|
-
writeCompletedJsonl(outPath, "done");
|
|
438
|
-
// Delivery should not crash, counter stays at 0 (Math.max clamp)
|
|
439
|
-
await expect(watcher.pollOnce()).resolves.not.toThrow();
|
|
440
|
-
expect(session.pendingBackgroundCount).toBe(0);
|
|
441
|
-
},
|
|
442
|
-
);
|
|
443
|
-
});
|