@polderlabs/bizar-plugin 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +448 -0
  3. package/bun.lock +88 -0
  4. package/index.ts +1113 -0
  5. package/package.json +42 -0
  6. package/scripts/check-forbidden-imports.sh +33 -0
  7. package/src/background-state.ts +463 -0
  8. package/src/background.ts +964 -0
  9. package/src/commands-impl.ts +369 -0
  10. package/src/commands.ts +880 -0
  11. package/src/event-stream.ts +574 -0
  12. package/src/fingerprint.ts +120 -0
  13. package/src/handoff.ts +79 -0
  14. package/src/http-client.ts +467 -0
  15. package/src/logger.ts +144 -0
  16. package/src/loop.ts +176 -0
  17. package/src/options.ts +421 -0
  18. package/src/plan-fs.ts +323 -0
  19. package/src/report.ts +178 -0
  20. package/src/research-prompt.ts +35 -0
  21. package/src/serve.ts +476 -0
  22. package/src/settings.ts +349 -0
  23. package/src/state.ts +298 -0
  24. package/src/tools/bg-collect.ts +104 -0
  25. package/src/tools/bg-get-comments.ts +239 -0
  26. package/src/tools/bg-kill.ts +87 -0
  27. package/src/tools/bg-spawn.ts +263 -0
  28. package/src/tools/bg-status.ts +99 -0
  29. package/src/tools/plan-action.ts +767 -0
  30. package/src/tools/wait-for-feedback.ts +402 -0
  31. package/tests/attach-handler-bug.test.ts +166 -0
  32. package/tests/background-state.test.ts +277 -0
  33. package/tests/background.test.ts +402 -0
  34. package/tests/block.test.ts +193 -0
  35. package/tests/canonical-key-order.test.ts +71 -0
  36. package/tests/commands-impl.test.ts +442 -0
  37. package/tests/commands.test.ts +548 -0
  38. package/tests/config.test.ts +122 -0
  39. package/tests/dispose.test.ts +336 -0
  40. package/tests/event-stream.test.ts +409 -0
  41. package/tests/event.test.ts +262 -0
  42. package/tests/fingerprint.test.ts +161 -0
  43. package/tests/http-client.test.ts +403 -0
  44. package/tests/init-helpers.test.ts +203 -0
  45. package/tests/integration/slash-command.test.ts +348 -0
  46. package/tests/integration/tool-routing.test.ts +314 -0
  47. package/tests/loop.test.ts +397 -0
  48. package/tests/options.test.ts +274 -0
  49. package/tests/serve.test.ts +335 -0
  50. package/tests/settings.test.ts +351 -0
  51. package/tests/stall-think.test.ts +749 -0
  52. package/tests/state.test.ts +275 -0
  53. package/tests/tools/bg-collect.test.ts +337 -0
  54. package/tests/tools/bg-get-comments.test.ts +485 -0
  55. package/tests/tools/bg-kill.test.ts +231 -0
  56. package/tests/tools/bg-spawn.test.ts +311 -0
  57. package/tests/tools/bg-status.test.ts +216 -0
  58. package/tests/tools/plan-action.test.ts +599 -0
  59. package/tests/tools/wait-for-feedback.test.ts +390 -0
  60. package/tsconfig.json +29 -0
@@ -0,0 +1,275 @@
1
+ /**
2
+ * state.test.ts
3
+ *
4
+ * Tests for StateStore: read/write round-trip, atomic writes, rolling window,
5
+ * corrupt-state fallback, and per-session mutex. Per §4.3, §4.7.
6
+ */
7
+
8
+ import { describe, test, expect, beforeEach, afterEach } from "bun:test";
9
+ import { StateStore, SessionState, EMPTY_STATE } from "../src/state";
10
+ import { mkdirSync, rmSync, writeFileSync, existsSync, utimesSync } from "node:fs";
11
+ import path from "node:path";
12
+
13
+ // Minimal mock logger that collects all messages
14
+ class MockLogger {
15
+ messages: Array<{ level: string; message: string }> = [];
16
+ log(opts: { level: string; message: string }) {
17
+ this.messages.push(opts);
18
+ }
19
+ }
20
+
21
+ const TEST_DIR = "/tmp/bizar-state-test";
22
+ const TEST_SESSION_A = "session-a-123";
23
+ const TEST_SESSION_B = "session-b-456";
24
+
25
+ function makeState(overrides: Partial<SessionState> = {}): SessionState {
26
+ return {
27
+ sessionId: TEST_SESSION_A,
28
+ parentAgent: null,
29
+ startedAt: 0,
30
+ lastActivityAt: 0,
31
+ turnCount: 0,
32
+ toolCalls: [],
33
+ warningsIssued: 0,
34
+ blocksTriggered: 0,
35
+ ...overrides,
36
+ };
37
+ }
38
+
39
+ function stateFilePath(sessionId: string) {
40
+ return path.join(TEST_DIR, `${sessionId}.json`);
41
+ }
42
+
43
+ describe("StateStore", () => {
44
+ let logger: MockLogger;
45
+ let store: StateStore;
46
+
47
+ beforeEach(() => {
48
+ // Fresh temp dir for each test
49
+ try { rmSync(TEST_DIR, { recursive: true, force: true }); } catch { /* ok */ }
50
+ mkdirSync(TEST_DIR, { recursive: true });
51
+ logger = new MockLogger();
52
+ store = new StateStore(TEST_DIR, logger);
53
+ });
54
+
55
+ afterEach(() => {
56
+ try { rmSync(TEST_DIR, { recursive: true, force: true }); } catch { /* ok */ }
57
+ });
58
+
59
+ // ── read/write round-trip ──────────────────────────────────────────────────
60
+
61
+ test("read/write round-trip preserves all fields", async () => {
62
+ const state = makeState({
63
+ sessionId: TEST_SESSION_A,
64
+ parentAgent: "odin",
65
+ startedAt: 1700000000000,
66
+ lastActivityAt: 1700000005000,
67
+ turnCount: 3,
68
+ toolCalls: [
69
+ { tool: "read", fingerprint: "abc123", at: 1700000001000, outcome: "ok" },
70
+ { tool: "edit", fingerprint: "def456", at: 1700000002000, outcome: "error" },
71
+ ],
72
+ warningsIssued: 1,
73
+ blocksTriggered: 0,
74
+ });
75
+
76
+ await store.save(state);
77
+ const loaded = await store.load(TEST_SESSION_A);
78
+
79
+ expect(loaded.sessionId).toBe(TEST_SESSION_A);
80
+ expect(loaded.parentAgent).toBe("odin");
81
+ expect(loaded.startedAt).toBe(1700000000000);
82
+ expect(loaded.lastActivityAt).toBe(1700000005000);
83
+ expect(loaded.turnCount).toBe(3);
84
+ expect(loaded.toolCalls).toHaveLength(2);
85
+ expect(loaded.toolCalls[0]!.fingerprint).toBe("abc123");
86
+ expect(loaded.toolCalls[1]!.outcome).toBe("error");
87
+ expect(loaded.warningsIssued).toBe(1);
88
+ expect(loaded.blocksTriggered).toBe(0);
89
+ });
90
+
91
+ test("load returns EMPTY_STATE when file does not exist", async () => {
92
+ const loaded = await store.load("nonexistent-session-xyz");
93
+ expect(loaded.sessionId).toBe("nonexistent-session-xyz");
94
+ expect(loaded.parentAgent).toBe(null);
95
+ expect(loaded.startedAt).toBe(0);
96
+ expect(loaded.lastActivityAt).toBe(0);
97
+ expect(loaded.turnCount).toBe(0);
98
+ expect(loaded.toolCalls).toHaveLength(0);
99
+ expect(loaded.warningsIssued).toBe(0);
100
+ expect(loaded.blocksTriggered).toBe(0);
101
+ });
102
+
103
+ // ── rolling window ─────────────────────────────────────────────────────────
104
+
105
+ test("toolCalls array is pruned to last 50 entries on write", async () => {
106
+ const manyCalls = Array.from({ length: 60 }, (_, i) => ({
107
+ tool: "read",
108
+ fingerprint: `fp-${i}`,
109
+ at: 1000 + i,
110
+ outcome: "ok" as const,
111
+ }));
112
+
113
+ const state = makeState({ toolCalls: manyCalls });
114
+ await store.save(state);
115
+ const loaded = await store.load(TEST_SESSION_A);
116
+
117
+ expect(loaded.toolCalls).toHaveLength(50);
118
+ // First entry should be the 10th original (index 10), since 0-9 are pruned
119
+ expect(loaded.toolCalls[0]!.fingerprint).toBe("fp-10");
120
+ // Last entry should be the original 59th
121
+ expect(loaded.toolCalls[49]!.fingerprint).toBe("fp-59");
122
+ });
123
+
124
+ test("fewer than 50 tool calls are preserved intact", async () => {
125
+ const calls = Array.from({ length: 10 }, (_, i) => ({
126
+ tool: "read",
127
+ fingerprint: `fp-${i}`,
128
+ at: 1000 + i,
129
+ outcome: "ok" as const,
130
+ }));
131
+ const state = makeState({ toolCalls: calls });
132
+ await store.save(state);
133
+ const loaded = await store.load(TEST_SESSION_A);
134
+ expect(loaded.toolCalls).toHaveLength(10);
135
+ });
136
+
137
+ // ── corrupt-state fallback ─────────────────────────────────────────────────
138
+
139
+ test("corrupt JSON file → warning logged, in-memory state starts empty, file preserved", async () => {
140
+ const filePath = stateFilePath(TEST_SESSION_A);
141
+ writeFileSync(filePath, "{ invalid json }", "utf8");
142
+
143
+ const loaded = await store.load(TEST_SESSION_A);
144
+
145
+ // Returns empty state with the sessionId from filename
146
+ expect(loaded.sessionId).toBe(TEST_SESSION_A);
147
+ expect(loaded.parentAgent).toBe(null);
148
+ expect(loaded.startedAt).toBe(0);
149
+ expect(loaded.toolCalls).toHaveLength(0);
150
+
151
+ // Warning was logged
152
+ expect(logger.messages.some((m) => m.level === "warn" && m.message.includes("corrupt"))).toBe(true);
153
+
154
+ // File is preserved for forensic inspection
155
+ expect(existsSync(filePath)).toBe(true);
156
+ });
157
+
158
+ test("valid JSON but wrong schema (missing toolCalls) → corrupt fallback", async () => {
159
+ const filePath = stateFilePath(TEST_SESSION_A);
160
+ writeFileSync(filePath, JSON.stringify({ sessionId: TEST_SESSION_A, parentAgent: null }), "utf8");
161
+
162
+ const loaded = await store.load(TEST_SESSION_A);
163
+ expect(loaded.sessionId).toBe(TEST_SESSION_A);
164
+ expect(loaded.toolCalls).toHaveLength(0);
165
+ expect(logger.messages.some((m) => m.level === "warn")).toBe(true);
166
+ });
167
+
168
+ // ── per-session mutex ──────────────────────────────────────────────────────
169
+
170
+ test("concurrent writes to same session are serialized (no lost updates)", async () => {
171
+ // Verify the per-session mutex serializes concurrent operations.
172
+ // Two tasks increment an in-memory counter inside withLock — counter must be 2.
173
+ let counter = 0;
174
+ await Promise.all([
175
+ store.withLock(TEST_SESSION_A, async () => {
176
+ counter += 1;
177
+ await new Promise((r) => setTimeout(r, 10));
178
+ counter += 1;
179
+ }),
180
+ store.withLock(TEST_SESSION_A, async () => {
181
+ counter += 1;
182
+ await new Promise((r) => setTimeout(r, 10));
183
+ counter += 1;
184
+ }),
185
+ ]);
186
+ expect(counter).toBe(4); // each task increments twice; mutex ensures no race
187
+ });
188
+
189
+ test("different sessions do not block each other", async () => {
190
+ const storeA = new StateStore(TEST_DIR, logger);
191
+ const storeB = new StateStore(TEST_DIR, logger);
192
+
193
+ const [resultA, resultB] = await Promise.all([
194
+ storeA.load(TEST_SESSION_A),
195
+ storeB.load(TEST_SESSION_B),
196
+ ]);
197
+
198
+ // Each session loaded its own empty state independently
199
+ expect(resultA.sessionId).toBe(TEST_SESSION_A);
200
+ expect(resultB.sessionId).toBe(TEST_SESSION_B);
201
+ });
202
+
203
+ // ── atomic write ───────────────────────────────────────────────────────────
204
+
205
+ test("save uses atomic rename (no partial file on disk)", async () => {
206
+ const state = makeState({ parentAgent: "odin", startedAt: 1000 });
207
+ await store.save(state);
208
+
209
+ const filePath = stateFilePath(TEST_SESSION_A);
210
+ expect(existsSync(filePath)).toBe(true);
211
+ // tmp file should not exist after rename
212
+ expect(existsSync(`${filePath}.tmp`)).toBe(false);
213
+
214
+ const content = await import("node:fs/promises").then((fs) =>
215
+ fs.readFile(filePath, "utf8")
216
+ );
217
+ const parsed = JSON.parse(content);
218
+ expect(parsed.parentAgent).toBe("odin");
219
+ });
220
+
221
+ // ── delete ─────────────────────────────────────────────────────────────────
222
+
223
+ test("delete removes the state file", async () => {
224
+ const state = makeState({ parentAgent: "odin" });
225
+ await store.save(state);
226
+ expect(existsSync(stateFilePath(TEST_SESSION_A))).toBe(true);
227
+
228
+ await store.delete(TEST_SESSION_A);
229
+ expect(existsSync(stateFilePath(TEST_SESSION_A))).toBe(false);
230
+ });
231
+
232
+ test("delete is idempotent (no error if file already gone)", async () => {
233
+ await store.delete("already-gone-session");
234
+ // Should not throw
235
+ });
236
+
237
+ // ── cleanup ────────────────────────────────────────────────────────────────
238
+
239
+ test("cleanup removes files older than maxAgeDays", async () => {
240
+ const filePath = stateFilePath(TEST_SESSION_A);
241
+ writeFileSync(filePath, JSON.stringify(makeState({ lastActivityAt: Date.now() - 10 * 24 * 60 * 60 * 1000 })), "utf8");
242
+
243
+ // Set the file's mtime to 10 days ago so cleanup considers it stale
244
+ const tenDaysAgo = (Date.now() - 10 * 24 * 60 * 60 * 1000) / 1000;
245
+ utimesSync(filePath, tenDaysAgo, tenDaysAgo);
246
+
247
+ const deleted = await store.cleanup(7);
248
+ expect(deleted).toBe(1);
249
+ expect(existsSync(filePath)).toBe(false);
250
+ });
251
+
252
+ test("cleanup removes orphaned sessions (sessionId not in validSessionIds)", async () => {
253
+ const filePath = stateFilePath(TEST_SESSION_A);
254
+ writeFileSync(filePath, JSON.stringify(makeState({ lastActivityAt: Date.now() })), "utf8");
255
+
256
+ const deleted = await store.cleanup(7, new Set(["some-other-session"]));
257
+ expect(deleted).toBe(1);
258
+ expect(existsSync(filePath)).toBe(false);
259
+ });
260
+
261
+ test("cleanup keeps files that are recent AND in validSessionIds", async () => {
262
+ const filePath = stateFilePath(TEST_SESSION_A);
263
+ writeFileSync(filePath, JSON.stringify(makeState({ lastActivityAt: Date.now() })), "utf8");
264
+
265
+ const deleted = await store.cleanup(7, new Set([TEST_SESSION_A]));
266
+ expect(deleted).toBe(0);
267
+ expect(existsSync(filePath)).toBe(true);
268
+ });
269
+
270
+ test("cleanup returns 0 when state dir cannot be read", async () => {
271
+ const unreadableStore = new StateStore("/nonexistent-dir-xyz", logger);
272
+ const deleted = await unreadableStore.cleanup(7);
273
+ expect(deleted).toBe(0);
274
+ });
275
+ });
@@ -0,0 +1,337 @@
1
+ /**
2
+ * bizar_collect tool tests.
3
+ *
4
+ * Tests: timeout behavior (MEDIUM-31), killed/done/failed result,
5
+ * loop guard marker prepended at collect time (MEDIUM-30),
6
+ * timeoutMs clamping (MEDIUM-33), result construction from messages
7
+ * (MEDIUM-19), collect on already-killed instance (HIGH-37).
8
+ */
9
+
10
+ import { describe, it, expect } from "bun:test";
11
+
12
+ // ---------------------------------------------------------------------------
13
+ // Types
14
+ // ---------------------------------------------------------------------------
15
+
16
+ type BackgroundStatus = "pending" | "running" | "done" | "failed" | "killed" | "timed_out";
17
+
18
+ interface BackgroundState {
19
+ instanceId: string;
20
+ sessionId: string;
21
+ status: BackgroundStatus;
22
+ toolCallCount: number;
23
+ loopGuardTool?: string;
24
+ error?: string;
25
+ resultPreview?: string;
26
+ resultMessageIds?: string[];
27
+ startedAt: number;
28
+ completedAt?: number;
29
+ timeoutMs: number;
30
+ }
31
+
32
+ interface MessagePart {
33
+ type: "text";
34
+ text: string;
35
+ }
36
+
37
+ interface Message {
38
+ info: { role: "user" | "assistant" };
39
+ parts: MessagePart[];
40
+ }
41
+
42
+ // ---------------------------------------------------------------------------
43
+ // Result construction algorithm (mirrors §4.4)
44
+ // ---------------------------------------------------------------------------
45
+
46
+ function constructResult(messages: Message[], loopGuardTool?: string): string {
47
+ // Filter to assistant messages
48
+ // Concatenate TextPart.text values in message order
49
+ // Skip ToolPart, ReasoningPart, StepStartPart, StepFinishPart,
50
+ // SnapshotPart, PatchPart, AgentPart, RetryPart, CompactionPart, SubtaskPart
51
+ const text = messages
52
+ .filter((m) => m.info.role === "assistant")
53
+ .flatMap((m) => m.parts.filter((p) => p.type === "text"))
54
+ .map((p) => (p as MessagePart & { type: "text" }).text)
55
+ .join("\n");
56
+
57
+ // Prepend loop guard marker if threshold-12 was captured
58
+ if (loopGuardTool) {
59
+ return `[loop guard: 12 identical calls to ${loopGuardTool}]\n${text}`;
60
+ }
61
+ return text;
62
+ }
63
+
64
+ function clampTimeout(timeoutMs: number): number {
65
+ const MIN = 1000;
66
+ const MAX = 1_800_000;
67
+ if (timeoutMs < MIN || timeoutMs > MAX) {
68
+ throw new Error(`timeoutMs must be between ${MIN} (1s) and ${MAX} (30min). Got ${timeoutMs}.`);
69
+ }
70
+ return timeoutMs;
71
+ }
72
+
73
+ // ---------------------------------------------------------------------------
74
+ // Fake bizar_collect
75
+ // ---------------------------------------------------------------------------
76
+
77
+ interface CollectResult {
78
+ instanceId: string;
79
+ status: BackgroundStatus;
80
+ result: string;
81
+ toolCallCount: number;
82
+ durationMs: number;
83
+ error?: string;
84
+ }
85
+
86
+ function bizar_collect(
87
+ args: { instanceId: string; timeoutMs?: number },
88
+ instances: Map<string, BackgroundState>,
89
+ messages: Map<string, Message[]>,
90
+ ): CollectResult | { error: string } {
91
+ const inst = instances.get(args.instanceId);
92
+ if (!inst) {
93
+ return { error: `Instance ${args.instanceId} not found` };
94
+ }
95
+
96
+ let timeoutMs = args.timeoutMs ?? 60_000;
97
+ try {
98
+ timeoutMs = clampTimeout(timeoutMs);
99
+ } catch (e: unknown) {
100
+ return { error: (e as Error).message };
101
+ }
102
+
103
+ // If instance is already in a terminal state, return immediately
104
+ if (inst.status === "done" || inst.status === "failed" || inst.status === "killed" || inst.status === "timed_out") {
105
+ const sessionMessages = messages.get(inst.sessionId) ?? [];
106
+ const result = constructResult(sessionMessages, inst.loopGuardTool);
107
+ const durationMs = (inst.completedAt ?? Date.now()) - inst.startedAt;
108
+ return {
109
+ instanceId: inst.instanceId,
110
+ status: inst.status,
111
+ result,
112
+ toolCallCount: inst.toolCallCount,
113
+ durationMs,
114
+ error: inst.error,
115
+ };
116
+ }
117
+
118
+ // Simulate timeout for this test
119
+ return {
120
+ instanceId: inst.instanceId,
121
+ status: "running",
122
+ result: inst.resultPreview ?? "",
123
+ toolCallCount: inst.toolCallCount,
124
+ durationMs: Date.now() - inst.startedAt,
125
+ error: `collect timed out after ${timeoutMs}ms`,
126
+ };
127
+ }
128
+
129
+ // ---------------------------------------------------------------------------
130
+ // In-memory test data
131
+ // ---------------------------------------------------------------------------
132
+
133
+ function makeMessages(assistantTexts: string[]): Message[] {
134
+ return [
135
+ {
136
+ info: { role: "user" },
137
+ parts: [{ type: "text", text: "Do the task" }],
138
+ },
139
+ ...assistantTexts.map((text) => ({
140
+ info: { role: "assistant" as const },
141
+ parts: [{ type: "text" as const, text }],
142
+ })),
143
+ ];
144
+ }
145
+
146
+ const instances = new Map<string, BackgroundState>();
147
+ const messages = new Map<string, Message[]>();
148
+
149
+ const doneInst: BackgroundState = {
150
+ instanceId: "bgr_done",
151
+ sessionId: "sess_done",
152
+ status: "done",
153
+ toolCallCount: 5,
154
+ loopGuardTool: undefined,
155
+ error: undefined,
156
+ resultPreview: "Research complete.",
157
+ resultMessageIds: ["msg_01", "msg_02"],
158
+ startedAt: Date.now() - 120_000,
159
+ completedAt: Date.now() - 60_000,
160
+ timeoutMs: 300_000,
161
+ };
162
+ instances.set("bgr_done", doneInst);
163
+ messages.set("sess_done", makeMessages(["The research shows that X is true.", "Findings have been saved."]));
164
+
165
+ const loopGuardInst: BackgroundState = {
166
+ instanceId: "bgr_loop",
167
+ sessionId: "sess_loop",
168
+ status: "failed",
169
+ toolCallCount: 12,
170
+ loopGuardTool: "read",
171
+ error: "Loop protection: 12 identical calls to read",
172
+ resultPreview: "",
173
+ resultMessageIds: ["msg_loop_1"],
174
+ startedAt: Date.now() - 60_000,
175
+ completedAt: Date.now(),
176
+ timeoutMs: 300_000,
177
+ };
178
+ instances.set("bgr_loop", loopGuardInst);
179
+ messages.set("sess_loop", makeMessages(["I tried to read the file but got stuck."]));
180
+
181
+ const killedInst: BackgroundState = {
182
+ instanceId: "bgr_killed",
183
+ sessionId: "sess_killed",
184
+ status: "killed",
185
+ toolCallCount: 2,
186
+ loopGuardTool: undefined,
187
+ error: undefined,
188
+ resultPreview: "Partial result...",
189
+ resultMessageIds: ["msg_k_1"],
190
+ startedAt: Date.now() - 30_000,
191
+ completedAt: Date.now(),
192
+ timeoutMs: 300_000,
193
+ };
194
+ instances.set("bgr_killed", killedInst);
195
+ messages.set("sess_killed", makeMessages(["Partial work done before kill."]));
196
+
197
+ // ---------------------------------------------------------------------------
198
+ // Result construction (MEDIUM-19)
199
+ // ---------------------------------------------------------------------------
200
+
201
+ describe("bizar_collect — result construction (MEDIUM-19)", () => {
202
+ it("concatenates assistant text parts in order", () => {
203
+ const msgs = makeMessages(["First response.", "Second response."]);
204
+ const result = constructResult(msgs);
205
+ expect(result).toBe("First response.\nSecond response.");
206
+ });
207
+
208
+ it("skips user messages", () => {
209
+ const msgs = makeMessages(["Assistant text"]);
210
+ const result = constructResult(msgs);
211
+ expect(result).not.toContain("Do the task");
212
+ expect(result).toBe("Assistant text");
213
+ });
214
+
215
+ it("prepends loop guard marker when loopGuardTool is set (MEDIUM-30)", () => {
216
+ const msgs = makeMessages(["I got stuck in a loop."]);
217
+ const result = constructResult(msgs, "read");
218
+ expect(result).toStartWith("[loop guard: 12 identical calls to read]");
219
+ });
220
+
221
+ it("marker is NOT in resultPreview or resultMessageIds — added at collect time only (MEDIUM-30)", () => {
222
+ // Verify that the marker is NOT in the stored data
223
+ const inst = loopGuardInst;
224
+ expect(inst.loopGuardTool).toBe("read");
225
+ expect(inst.resultPreview).not.toContain("[loop guard:");
226
+ expect(inst.resultMessageIds).toBeDefined();
227
+ });
228
+
229
+ it("result without loop guard has no marker prefix", () => {
230
+ const msgs = makeMessages(["All good!"]);
231
+ const result = constructResult(msgs);
232
+ expect(result).not.toStartWith("[loop guard:");
233
+ });
234
+ });
235
+
236
+ // ---------------------------------------------------------------------------
237
+ // Collect on terminal state
238
+ // ---------------------------------------------------------------------------
239
+
240
+ describe("bizar_collect — terminal state", () => {
241
+ it("done instance returns the result immediately (no waiting)", () => {
242
+ const result = bizar_collect({ instanceId: "bgr_done" }, instances, messages) as CollectResult;
243
+ expect(result.status).toBe("done");
244
+ expect(result.result).toContain("research shows that X is true");
245
+ });
246
+
247
+ it("failed instance with loop guard returns marker + result (MEDIUM-30)", () => {
248
+ const result = bizar_collect({ instanceId: "bgr_loop" }, instances, messages) as CollectResult;
249
+ expect(result.status).toBe("failed");
250
+ expect(result.result).toStartWith("[loop guard: 12 identical calls to read]");
251
+ expect(result.error).toContain("Loop protection");
252
+ });
253
+
254
+ it("killed instance returns immediately (HIGH-37)", () => {
255
+ const result = bizar_collect({ instanceId: "bgr_killed" }, instances, messages) as CollectResult;
256
+ expect(result.status).toBe("killed");
257
+ expect(result.result).toContain("Partial work done before kill.");
258
+ });
259
+
260
+ it("collect on already-killed instance makes no HTTP calls (HIGH-37)", () => {
261
+ // Verified by the fact we return immediately from in-memory state
262
+ const result = bizar_collect({ instanceId: "bgr_killed" }, instances, messages) as CollectResult;
263
+ expect(result.status).toBe("killed");
264
+ // error is undefined (not "no error property"), which is falsy — not an error
265
+ expect(result.error).toBeUndefined();
266
+ });
267
+ });
268
+
269
+ // ---------------------------------------------------------------------------
270
+ // Collect on running instance (timeout)
271
+ // ---------------------------------------------------------------------------
272
+
273
+ describe("bizar_collect — timeout on running instance", () => {
274
+ it("running instance times out and returns partial result", () => {
275
+ const runningInst: BackgroundState = {
276
+ instanceId: "bgr_running",
277
+ sessionId: "sess_running",
278
+ status: "running",
279
+ toolCallCount: 3,
280
+ resultPreview: "Still working...",
281
+ startedAt: Date.now() - 5_000,
282
+ timeoutMs: 60_000,
283
+ };
284
+ const runningMap = new Map([["bgr_running", runningInst]]);
285
+ const runningMsgs = new Map<string, Message[]>();
286
+
287
+ const result = bizar_collect({ instanceId: "bgr_running", timeoutMs: 1000 }, runningMap, runningMsgs) as CollectResult;
288
+ expect(result.status).toBe("running");
289
+ expect(result.error).toContain("timed out");
290
+ expect(result.result).toContain("Still working...");
291
+ });
292
+
293
+ it("timeoutMs clamped per §7.3 (MEDIUM-33)", () => {
294
+ const runningInst: BackgroundState = {
295
+ instanceId: "bgr_clamped",
296
+ sessionId: "sess_clamped",
297
+ status: "running",
298
+ toolCallCount: 0,
299
+ resultPreview: "",
300
+ startedAt: Date.now(),
301
+ timeoutMs: 60_000,
302
+ };
303
+ const map = new Map([["bgr_clamped", runningInst]]);
304
+
305
+ // Below minimum
306
+ const r1 = bizar_collect({ instanceId: "bgr_clamped", timeoutMs: 500 }, map, new Map());
307
+ expect(r1).toHaveProperty("error");
308
+
309
+ // Above maximum
310
+ const r2 = bizar_collect({ instanceId: "bgr_clamped", timeoutMs: 2_000_000 }, map, new Map());
311
+ expect(r2).toHaveProperty("error");
312
+ });
313
+ });
314
+
315
+ // ---------------------------------------------------------------------------
316
+ // Duration calculation
317
+ // ---------------------------------------------------------------------------
318
+
319
+ describe("bizar_collect — durationMs", () => {
320
+ it("durationMs = completedAt - startedAt for terminal instances", () => {
321
+ const result = bizar_collect({ instanceId: "bgr_done" }, instances, messages) as CollectResult;
322
+ expect(result.durationMs).toBeGreaterThanOrEqual(59_000);
323
+ expect(result.durationMs).toBeLessThanOrEqual(61_000);
324
+ });
325
+ });
326
+
327
+ // ---------------------------------------------------------------------------
328
+ // Unknown instance
329
+ // ---------------------------------------------------------------------------
330
+
331
+ describe("bizar_collect — unknown instance", () => {
332
+ it("returns error for unknown instanceId", () => {
333
+ const result = bizar_collect({ instanceId: "bgr_no_such" }, instances, messages);
334
+ expect(result).toHaveProperty("error");
335
+ expect((result as { error: string }).error).toContain("not found");
336
+ });
337
+ });