alvin-bot 4.18.0 → 4.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/AEC-PLUGINS-SOURCES.md +53 -0
  2. package/CHANGELOG.md +37 -2
  3. package/DESIGN-SKILLS-SOURCES.md +81 -0
  4. package/bin/cli.js +1 -1
  5. package/dist/providers/claude-sdk-provider.js +24 -0
  6. package/package.json +3 -1
  7. package/test/allowed-users-gate.test.ts +0 -98
  8. package/test/alvin-dispatch.test.ts +0 -220
  9. package/test/async-agent-chunk-flow.test.ts +0 -244
  10. package/test/async-agent-parser-staleness.test.ts +0 -412
  11. package/test/async-agent-parser-streamjson.test.ts +0 -273
  12. package/test/async-agent-parser.test.ts +0 -322
  13. package/test/async-agent-watcher.test.ts +0 -229
  14. package/test/background-bypass-integration.test.ts +0 -443
  15. package/test/background-bypass-stress.test.ts +0 -417
  16. package/test/background-bypass.test.ts +0 -127
  17. package/test/browser-webfetch.test.ts +0 -121
  18. package/test/claude-sdk-provider.test.ts +0 -115
  19. package/test/claude-sdk-tool-use-id.test.ts +0 -180
  20. package/test/console-timestamps.test.ts +0 -98
  21. package/test/cron-progress-ticker.test.ts +0 -76
  22. package/test/cron-restart-resilience.test.ts +0 -191
  23. package/test/cron-run-resolver.test.ts +0 -133
  24. package/test/cron-runjobnow-throw.test.ts +0 -100
  25. package/test/debounce.test.ts +0 -60
  26. package/test/delivery-registry.test.ts +0 -71
  27. package/test/exec-guard-metachars.test.ts +0 -110
  28. package/test/file-permissions.test.ts +0 -130
  29. package/test/i18n.test.ts +0 -108
  30. package/test/list-subagents-merged.test.ts +0 -172
  31. package/test/memory-extractor.test.ts +0 -151
  32. package/test/memory-layers.test.ts +0 -169
  33. package/test/memory-sdk-injection.test.ts +0 -146
  34. package/test/memory-stress-restart.test.ts +0 -337
  35. package/test/multi-session-stress.test.ts +0 -255
  36. package/test/platform-session-key.test.ts +0 -69
  37. package/test/process-manager.test.ts +0 -186
  38. package/test/registry.test.ts +0 -201
  39. package/test/session-pending-background.test.ts +0 -59
  40. package/test/session-persistence.test.ts +0 -195
  41. package/test/slack-progress-ticker.test.ts +0 -123
  42. package/test/slack-slash-command.test.ts +0 -61
  43. package/test/slack-test-connection.test.ts +0 -176
  44. package/test/stress-scenarios.test.ts +0 -356
  45. package/test/stuck-timer.test.ts +0 -116
  46. package/test/subagent-delivery-markdown-fallback.test.ts +0 -147
  47. package/test/subagent-delivery-platform-routing.test.ts +0 -232
  48. package/test/subagent-delivery.test.ts +0 -273
  49. package/test/subagent-final-text.test.ts +0 -132
  50. package/test/subagent-stats.test.ts +0 -119
  51. package/test/subagent-toolset-allowlist.test.ts +0 -146
  52. package/test/subagents-commands.test.ts +0 -64
  53. package/test/subagents-config.test.ts +0 -114
  54. package/test/subagents-depth.test.ts +0 -58
  55. package/test/subagents-inheritance.test.ts +0 -67
  56. package/test/subagents-name-resolver.test.ts +0 -122
  57. package/test/subagents-priority-reject.test.ts +0 -88
  58. package/test/subagents-queue.test.ts +0 -127
  59. package/test/subagents-shutdown.test.ts +0 -126
  60. package/test/subagents-toolset.test.ts +0 -71
  61. package/test/sync-task-timeout.test.ts +0 -153
  62. package/test/system-prompt-background-hint.test.ts +0 -65
  63. package/test/telegram-error-filter.test.ts +0 -85
  64. package/test/telegram-workspace-command.test.ts +0 -78
  65. package/test/timing-safe-bearer.test.ts +0 -65
  66. package/test/watchdog-brake.test.ts +0 -157
  67. package/test/watcher-pending-count.test.ts +0 -228
  68. package/test/watcher-zombie-fix.test.ts +0 -252
  69. package/test/web-server-integration.test.ts +0 -189
  70. package/test/web-server-resilience.test.ts +0 -118
  71. package/test/web-server-shutdown.test.ts +0 -117
  72. package/test/whatsapp-auth-resilience.test.ts +0 -96
  73. package/test/workspaces.test.ts +0 -196
  74. package/vitest.config.ts +0 -17
@@ -1,244 +0,0 @@
1
- /**
2
- * Fix #17 (Stage 2) — when the SDK yields a tool_result chunk with an
3
- * "Async agent launched successfully" payload, the message handler
4
- * must register the pending agent with the watcher.
5
- *
6
- * This tests the helper `handleToolResultChunk` in isolation —
7
- * the integration with message.ts is covered by the live e2e test.
8
- */
9
- import { describe, it, expect, beforeEach, vi } from "vitest";
10
-
11
- describe("async agent chunk flow (Stage 2)", () => {
12
- beforeEach(() => vi.resetModules());
13
-
14
- it("tool_result with async_launched gets registered with the watcher", async () => {
15
- const registered: unknown[] = [];
16
- vi.doMock("../src/services/async-agent-watcher.js", () => ({
17
- registerPendingAgent: (input: unknown) => registered.push(input),
18
- startWatcher: () => {},
19
- stopWatcher: () => {},
20
- pollOnce: async () => {},
21
- listPendingAgents: () => [],
22
- }));
23
-
24
- const { handleToolResultChunk } = await import(
25
- "../src/handlers/async-agent-chunk-handler.js"
26
- );
27
-
28
- const chunk = {
29
- type: "tool_result" as const,
30
- toolUseId: "toolu_1",
31
- toolResultContent:
32
- "Async agent launched successfully.\n" +
33
- "agentId: abc-1 (something)\n" +
34
- "output_file: /tmp/out-abc-1.jsonl\n" +
35
- "If asked, you can check progress.",
36
- };
37
- handleToolResultChunk(chunk, {
38
- chatId: 42,
39
- userId: 99,
40
- lastToolUseInput: {
41
- description: "SEO audit",
42
- prompt: "audit example.com",
43
- },
44
- });
45
-
46
- expect(registered).toHaveLength(1);
47
- const r = registered[0] as { agentId: string; description: string; outputFile: string };
48
- expect(r.agentId).toBe("abc-1");
49
- expect(r.description).toBe("SEO audit");
50
- expect(r.outputFile).toBe("/tmp/out-abc-1.jsonl");
51
- });
52
-
53
- it("v4.12.3 — passes sessionKey to registerPendingAgent and increments session counter", async () => {
54
- const registered: Array<{ sessionKey?: string }> = [];
55
- vi.doMock("../src/services/async-agent-watcher.js", () => ({
56
- registerPendingAgent: (input: { sessionKey?: string }) =>
57
- registered.push(input),
58
- startWatcher: () => {},
59
- stopWatcher: () => {},
60
- pollOnce: async () => {},
61
- listPendingAgents: () => [],
62
- }));
63
-
64
- const { getSession } = await import("../src/services/session.js");
65
- const session = getSession("v412-chunk-test-session");
66
- session.pendingBackgroundCount = 0;
67
-
68
- const { handleToolResultChunk } = await import(
69
- "../src/handlers/async-agent-chunk-handler.js"
70
- );
71
- handleToolResultChunk(
72
- {
73
- type: "tool_result",
74
- toolUseId: "toolu_sess",
75
- toolResultContent:
76
- "Async agent launched successfully.\n" +
77
- "agentId: ag-sess\n" +
78
- "output_file: /tmp/ag-sess.jsonl\n",
79
- },
80
- {
81
- chatId: 10,
82
- userId: 20,
83
- sessionKey: "v412-chunk-test-session",
84
- lastToolUseInput: { description: "SEO", prompt: "do it" },
85
- },
86
- );
87
-
88
- expect(registered).toHaveLength(1);
89
- expect(registered[0].sessionKey).toBe("v412-chunk-test-session");
90
- expect(session.pendingBackgroundCount).toBe(1);
91
- });
92
-
93
- it("v4.12.3 — multiple async launches in same turn stack the counter", async () => {
94
- vi.doMock("../src/services/async-agent-watcher.js", () => ({
95
- registerPendingAgent: () => {},
96
- startWatcher: () => {},
97
- stopWatcher: () => {},
98
- pollOnce: async () => {},
99
- listPendingAgents: () => [],
100
- }));
101
-
102
- const { getSession } = await import("../src/services/session.js");
103
- const session = getSession("v412-chunk-stack");
104
- session.pendingBackgroundCount = 0;
105
-
106
- const { handleToolResultChunk } = await import(
107
- "../src/handlers/async-agent-chunk-handler.js"
108
- );
109
-
110
- for (let i = 0; i < 3; i++) {
111
- handleToolResultChunk(
112
- {
113
- type: "tool_result",
114
- toolUseId: `toolu_${i}`,
115
- toolResultContent:
116
- `Async agent launched successfully.\n` +
117
- `agentId: ag-${i}\n` +
118
- `output_file: /tmp/ag-${i}.jsonl\n`,
119
- },
120
- {
121
- chatId: 10,
122
- userId: 20,
123
- sessionKey: "v412-chunk-stack",
124
- lastToolUseInput: { description: `task ${i}`, prompt: "p" },
125
- },
126
- );
127
- }
128
-
129
- expect(session.pendingBackgroundCount).toBe(3);
130
- });
131
-
132
- it("v4.12.3 — non-async tool_result does not increment the counter", async () => {
133
- vi.doMock("../src/services/async-agent-watcher.js", () => ({
134
- registerPendingAgent: () => {
135
- throw new Error("should not be called");
136
- },
137
- startWatcher: () => {},
138
- stopWatcher: () => {},
139
- pollOnce: async () => {},
140
- listPendingAgents: () => [],
141
- }));
142
-
143
- const { getSession } = await import("../src/services/session.js");
144
- const session = getSession("v412-chunk-nonasync");
145
- session.pendingBackgroundCount = 0;
146
-
147
- const { handleToolResultChunk } = await import(
148
- "../src/handlers/async-agent-chunk-handler.js"
149
- );
150
- handleToolResultChunk(
151
- {
152
- type: "tool_result",
153
- toolUseId: "toolu_read",
154
- toolResultContent: "plain read result — no async_launched marker",
155
- },
156
- {
157
- chatId: 1,
158
- userId: 1,
159
- sessionKey: "v412-chunk-nonasync",
160
- lastToolUseInput: { description: "read", prompt: "p" },
161
- },
162
- );
163
- expect(session.pendingBackgroundCount).toBe(0);
164
- });
165
-
166
- it("falls back to a generic description when no toolUseInput is provided", async () => {
167
- const registered: unknown[] = [];
168
- vi.doMock("../src/services/async-agent-watcher.js", () => ({
169
- registerPendingAgent: (input: unknown) => registered.push(input),
170
- startWatcher: () => {},
171
- stopWatcher: () => {},
172
- pollOnce: async () => {},
173
- listPendingAgents: () => [],
174
- }));
175
-
176
- const { handleToolResultChunk } = await import(
177
- "../src/handlers/async-agent-chunk-handler.js"
178
- );
179
-
180
- handleToolResultChunk(
181
- {
182
- type: "tool_result",
183
- toolUseId: "toolu_2",
184
- toolResultContent:
185
- "Async agent launched successfully.\n" +
186
- "agentId: x\n" +
187
- "output_file: /tmp/o\n",
188
- },
189
- { chatId: 42, userId: 99 },
190
- );
191
-
192
- expect(registered).toHaveLength(1);
193
- const r = registered[0] as { description: string };
194
- expect(r.description.length).toBeGreaterThan(0);
195
- });
196
-
197
- it("non-async tool_result (e.g. Read) is ignored", async () => {
198
- const registered: unknown[] = [];
199
- vi.doMock("../src/services/async-agent-watcher.js", () => ({
200
- registerPendingAgent: (input: unknown) => registered.push(input),
201
- startWatcher: () => {},
202
- stopWatcher: () => {},
203
- pollOnce: async () => {},
204
- listPendingAgents: () => [],
205
- }));
206
-
207
- const { handleToolResultChunk } = await import(
208
- "../src/handlers/async-agent-chunk-handler.js"
209
- );
210
-
211
- handleToolResultChunk(
212
- {
213
- type: "tool_result",
214
- toolUseId: "toolu_3",
215
- toolResultContent: "file contents here (plain Read result)",
216
- },
217
- { chatId: 42, userId: 99 },
218
- );
219
- expect(registered).toHaveLength(0);
220
- });
221
-
222
- it("non-tool_result chunks are ignored without throwing", async () => {
223
- vi.doMock("../src/services/async-agent-watcher.js", () => ({
224
- registerPendingAgent: () => {
225
- throw new Error("should not be called");
226
- },
227
- startWatcher: () => {},
228
- stopWatcher: () => {},
229
- pollOnce: async () => {},
230
- listPendingAgents: () => [],
231
- }));
232
-
233
- const { handleToolResultChunk } = await import(
234
- "../src/handlers/async-agent-chunk-handler.js"
235
- );
236
-
237
- expect(() =>
238
- handleToolResultChunk(
239
- { type: "text", text: "hi" },
240
- { chatId: 42, userId: 99 },
241
- ),
242
- ).not.toThrow();
243
- });
244
- });
@@ -1,412 +0,0 @@
1
- /**
2
- * v4.12.4 — parseOutputFileStatus staleness detection.
3
- *
4
- * Problem this fixes: when a background sub-agent is interrupted (e.g. by
5
- * v4.12.3's bypass-abort propagating through the SDK subprocess), its
6
- * outputFile is left with partial JSONL — real work, real text — but
7
- * without the `stop_reason: "end_turn"` marker the pre-v4.12.4 parser
8
- * required for "completed" state.
9
- *
10
- * Real-world evidence (2026-04-16):
11
- * - Three agents (a03ce829, af61fa6e, ac47c4a2) pending in state file
12
- * - Each outputFile has 81-131 lines of REAL work (WebSearch, tool_use,
13
- * partial reports like "Here's the summary:\n\n## Critical Bugs")
14
- * - Last event is either "[Request interrupted by user for tool use]"
15
- * or a mid-streaming assistant text that never got end_turn
16
- * - Watcher polls forever, hits 12h giveUpAt, delivers "empty output"
17
- * - User sees useless "720m timeout · 0 in / 0 out · (empty output)"
18
- * messages hours later, while the actual work is sitting on disk
19
- *
20
- * Fix behavior:
21
- * - If no end_turn is found, check mtime/size of the file
22
- * - If file hasn't been touched for `stalenessMs` (default 5 min) AND
23
- * there's usable text content in the tail, mark as "completed"
24
- * with the partial output PREFIXED by an "⚠️ interrupted, partial
25
- * output" header so the user knows it's not a clean finish
26
- * - If file IS fresh or has no text content, stay in "running" state
27
- * (normal polling continues)
28
- *
29
- * This deliberately biases toward delivering SOMETHING rather than
30
- * nothing. Worst case: an agent that's still alive but genuinely idle
31
- * for >5 min gets its partial text delivered early. Best case: dozens
32
- * of stuck interrupted agents get their real work back to the user.
33
- */
34
- import { describe, it, expect, beforeEach, afterEach } from "vitest";
35
- import fs from "fs";
36
- import os from "os";
37
- import { resolve } from "path";
38
- import { parseOutputFileStatus } from "../src/services/async-agent-parser.js";
39
-
40
- const TMP_BASE = resolve(os.tmpdir(), `alvin-parser-stale-${process.pid}`);
41
-
42
- beforeEach(() => {
43
- fs.mkdirSync(TMP_BASE, { recursive: true });
44
- });
45
- afterEach(() => {
46
- try {
47
- fs.rmSync(TMP_BASE, { recursive: true, force: true });
48
- } catch {
49
- /* ignore */
50
- }
51
- });
52
-
53
- /**
54
- * Write a JSONL file with a mid-execution interrupted state. No end_turn,
55
- * but contains real assistant text + tool calls. Last line is the
56
- * "Request interrupted" marker.
57
- */
58
- function writeInterruptedJsonl(name: string): string {
59
- const path = resolve(TMP_BASE, name);
60
- const lines = [
61
- JSON.stringify({
62
- type: "user",
63
- isSidechain: true,
64
- agentId: "x",
65
- message: { role: "user", content: "do a report" },
66
- }),
67
- JSON.stringify({
68
- type: "assistant",
69
- isSidechain: true,
70
- agentId: "x",
71
- message: {
72
- role: "assistant",
73
- content: [{ type: "text", text: "Starting research..." }],
74
- stop_reason: "tool_use",
75
- },
76
- }),
77
- JSON.stringify({
78
- type: "assistant",
79
- isSidechain: true,
80
- agentId: "x",
81
- message: {
82
- role: "assistant",
83
- content: [
84
- {
85
- type: "text",
86
- text:
87
- "Here's what I found:\n\n## Key Findings\n- Finding A\n- Finding B\n- Finding C",
88
- },
89
- ],
90
- stop_reason: "tool_use",
91
- },
92
- }),
93
- JSON.stringify({
94
- type: "user",
95
- isSidechain: true,
96
- agentId: "x",
97
- message: {
98
- role: "user",
99
- content: [
100
- {
101
- type: "tool_result",
102
- content: "[Request interrupted by user for tool use]",
103
- },
104
- ],
105
- },
106
- }),
107
- ];
108
- fs.writeFileSync(path, lines.join("\n") + "\n", "utf-8");
109
- return path;
110
- }
111
-
112
- /** Set file mtime to N ms in the past. */
113
- function setStale(path: string, ageMs: number): void {
114
- const target = Date.now() - ageMs;
115
- fs.utimesSync(path, target / 1000, target / 1000);
116
- }
117
-
118
- describe("parseOutputFileStatus — staleness detection (v4.12.4)", () => {
119
- it("still returns 'completed' when end_turn is present (staleness is a fallback only)", async () => {
120
- const path = resolve(TMP_BASE, "complete.jsonl");
121
- fs.writeFileSync(
122
- path,
123
- JSON.stringify({
124
- type: "assistant",
125
- agentId: "x",
126
- message: {
127
- content: [{ type: "text", text: "clean end" }],
128
- stop_reason: "end_turn",
129
- },
130
- }) + "\n",
131
- "utf-8",
132
- );
133
- setStale(path, 3600_000); // 1h old
134
- const status = await parseOutputFileStatus(path, {
135
- stalenessMs: 300_000,
136
- });
137
- expect(status.state).toBe("completed");
138
- if (status.state === "completed") {
139
- expect(status.output).toContain("clean end");
140
- // No interrupted banner for clean end_turn
141
- expect(status.output).not.toMatch(/interrupt/i);
142
- }
143
- });
144
-
145
- it("returns 'running' when file is fresh and no end_turn (normal polling)", async () => {
146
- const path = writeInterruptedJsonl("fresh-interrupted.jsonl");
147
- // File is fresh (just written)
148
- const status = await parseOutputFileStatus(path, {
149
- stalenessMs: 300_000,
150
- });
151
- expect(status.state).toBe("running");
152
- });
153
-
154
- it("returns 'completed' (partial) when file is stale AND has text content", async () => {
155
- const path = writeInterruptedJsonl("stale-interrupted.jsonl");
156
- setStale(path, 600_000); // 10 min old
157
- const status = await parseOutputFileStatus(path, {
158
- stalenessMs: 300_000, // 5 min threshold
159
- });
160
- expect(status.state).toBe("completed");
161
- if (status.state === "completed") {
162
- // Should contain the real report content
163
- expect(status.output).toContain("Key Findings");
164
- expect(status.output).toContain("Finding A");
165
- // Should be prefixed with an interrupted banner so user knows
166
- // (German "unterbrochen" / "partielle" OR English "interrupted"/"partial")
167
- expect(status.output).toMatch(/interrupt|partial|unterbroch|partiell|⚠️/i);
168
- }
169
- });
170
-
171
- it("returns 'running' when file is stale but has NO text content (nothing to deliver)", async () => {
172
- // Only tool-use events, no text. Delivery would be useless.
173
- const path = resolve(TMP_BASE, "no-text.jsonl");
174
- fs.writeFileSync(
175
- path,
176
- [
177
- JSON.stringify({
178
- type: "user",
179
- agentId: "x",
180
- message: { role: "user", content: "go" },
181
- }),
182
- JSON.stringify({
183
- type: "assistant",
184
- agentId: "x",
185
- message: {
186
- content: [
187
- { type: "tool_use", name: "Bash", input: { command: "ls" } },
188
- ],
189
- stop_reason: "tool_use",
190
- },
191
- }),
192
- ].join("\n") + "\n",
193
- "utf-8",
194
- );
195
- setStale(path, 600_000);
196
- const status = await parseOutputFileStatus(path, {
197
- stalenessMs: 300_000,
198
- });
199
- expect(status.state).toBe("running");
200
- });
201
-
202
- it("default stalenessMs is applied when not provided (no crashes on legacy callers)", async () => {
203
- const path = writeInterruptedJsonl("default-cfg.jsonl");
204
- setStale(path, 24 * 3600_000); // 24h old — very stale
205
- const status = await parseOutputFileStatus(path);
206
- // Whatever the default is, 24h should definitely exceed it
207
- expect(status.state).toBe("completed");
208
- });
209
-
210
- it("stalenessMs: 0 disables the staleness fallback entirely", async () => {
211
- const path = writeInterruptedJsonl("disabled.jsonl");
212
- setStale(path, 24 * 3600_000);
213
- const status = await parseOutputFileStatus(path, { stalenessMs: 0 });
214
- // With staleness disabled, we're back to strict end_turn requirement
215
- expect(status.state).toBe("running");
216
- });
217
-
218
- it("aggregates ALL text blocks from ALL assistant turns when delivering partial", async () => {
219
- const path = resolve(TMP_BASE, "multi-turn-interrupted.jsonl");
220
- const lines = [
221
- { type: "user", agentId: "x", message: { role: "user", content: "go" } },
222
- {
223
- type: "assistant",
224
- agentId: "x",
225
- message: {
226
- content: [{ type: "text", text: "First thought." }],
227
- stop_reason: "tool_use",
228
- },
229
- },
230
- {
231
- type: "assistant",
232
- agentId: "x",
233
- message: {
234
- content: [{ type: "text", text: "Second thought." }],
235
- stop_reason: "tool_use",
236
- },
237
- },
238
- {
239
- type: "assistant",
240
- agentId: "x",
241
- message: {
242
- content: [{ type: "text", text: "Final partial report." }],
243
- stop_reason: "tool_use",
244
- },
245
- },
246
- ];
247
- fs.writeFileSync(
248
- path,
249
- lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
250
- "utf-8",
251
- );
252
- setStale(path, 600_000);
253
- const status = await parseOutputFileStatus(path, {
254
- stalenessMs: 300_000,
255
- });
256
- expect(status.state).toBe("completed");
257
- if (status.state === "completed") {
258
- // Should contain text from all three turns (bias toward delivering more)
259
- expect(status.output).toContain("First thought");
260
- expect(status.output).toContain("Second thought");
261
- expect(status.output).toContain("Final partial report");
262
- }
263
- });
264
-
265
- it("ignores thinking blocks in partial delivery (user doesn't want Claude's scratchpad)", async () => {
266
- const path = resolve(TMP_BASE, "thinking-filter.jsonl");
267
- const lines = [
268
- {
269
- type: "assistant",
270
- agentId: "x",
271
- message: {
272
- content: [
273
- { type: "thinking", text: "internal reasoning nobody should see" },
274
- { type: "text", text: "Actual output text." },
275
- ],
276
- stop_reason: "tool_use",
277
- },
278
- },
279
- ];
280
- fs.writeFileSync(
281
- path,
282
- lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
283
- "utf-8",
284
- );
285
- setStale(path, 600_000);
286
- const status = await parseOutputFileStatus(path, {
287
- stalenessMs: 300_000,
288
- });
289
- expect(status.state).toBe("completed");
290
- if (status.state === "completed") {
291
- expect(status.output).toContain("Actual output text");
292
- expect(status.output).not.toContain("internal reasoning");
293
- }
294
- });
295
-
296
- it("extracts usage tokens from the last assistant event when available", async () => {
297
- const path = resolve(TMP_BASE, "tokens-partial.jsonl");
298
- const lines = [
299
- {
300
- type: "assistant",
301
- agentId: "x",
302
- message: {
303
- content: [{ type: "text", text: "partial text" }],
304
- stop_reason: "tool_use",
305
- usage: { input_tokens: 500, output_tokens: 200 },
306
- },
307
- },
308
- ];
309
- fs.writeFileSync(
310
- path,
311
- lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
312
- "utf-8",
313
- );
314
- setStale(path, 600_000);
315
- const status = await parseOutputFileStatus(path, {
316
- stalenessMs: 300_000,
317
- });
318
- expect(status.state).toBe("completed");
319
- if (status.state === "completed") {
320
- expect(status.tokensUsed).toEqual({ input: 500, output: 200 });
321
- }
322
- });
323
-
324
- it("handles file that only has the interruption marker (nothing useful to deliver)", async () => {
325
- // Edge case: only interruption, no prior text
326
- const path = resolve(TMP_BASE, "only-interrupt.jsonl");
327
- const lines = [
328
- {
329
- type: "user",
330
- agentId: "x",
331
- message: {
332
- role: "user",
333
- content: [
334
- {
335
- type: "tool_result",
336
- content: "[Request interrupted by user for tool use]",
337
- },
338
- ],
339
- },
340
- },
341
- ];
342
- fs.writeFileSync(
343
- path,
344
- lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
345
- "utf-8",
346
- );
347
- setStale(path, 600_000);
348
- const status = await parseOutputFileStatus(path, {
349
- stalenessMs: 300_000,
350
- });
351
- // No assistant text content at all → still running (nothing useful)
352
- expect(status.state).toBe("running");
353
- });
354
-
355
- it("preserves ordering of text across turns (earlier text first, later text last)", async () => {
356
- const path = resolve(TMP_BASE, "order.jsonl");
357
- const lines = [
358
- {
359
- type: "assistant",
360
- agentId: "x",
361
- message: {
362
- content: [{ type: "text", text: "ALPHA" }],
363
- stop_reason: "tool_use",
364
- },
365
- },
366
- {
367
- type: "user",
368
- agentId: "x",
369
- message: { content: [{ type: "tool_result", content: "..." }] },
370
- },
371
- {
372
- type: "assistant",
373
- agentId: "x",
374
- message: {
375
- content: [{ type: "text", text: "BETA" }],
376
- stop_reason: "tool_use",
377
- },
378
- },
379
- {
380
- type: "user",
381
- agentId: "x",
382
- message: { content: [{ type: "tool_result", content: "..." }] },
383
- },
384
- {
385
- type: "assistant",
386
- agentId: "x",
387
- message: {
388
- content: [{ type: "text", text: "GAMMA" }],
389
- stop_reason: "tool_use",
390
- },
391
- },
392
- ];
393
- fs.writeFileSync(
394
- path,
395
- lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
396
- "utf-8",
397
- );
398
- setStale(path, 600_000);
399
- const status = await parseOutputFileStatus(path, {
400
- stalenessMs: 300_000,
401
- });
402
- expect(status.state).toBe("completed");
403
- if (status.state === "completed") {
404
- const alphaIdx = status.output.indexOf("ALPHA");
405
- const betaIdx = status.output.indexOf("BETA");
406
- const gammaIdx = status.output.indexOf("GAMMA");
407
- expect(alphaIdx).toBeGreaterThan(-1);
408
- expect(betaIdx).toBeGreaterThan(alphaIdx);
409
- expect(gammaIdx).toBeGreaterThan(betaIdx);
410
- }
411
- });
412
- });