@polderlabs/bizar-plugin 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +448 -0
- package/bun.lock +88 -0
- package/index.ts +1113 -0
- package/package.json +42 -0
- package/scripts/check-forbidden-imports.sh +33 -0
- package/src/background-state.ts +463 -0
- package/src/background.ts +964 -0
- package/src/commands-impl.ts +369 -0
- package/src/commands.ts +880 -0
- package/src/event-stream.ts +574 -0
- package/src/fingerprint.ts +120 -0
- package/src/handoff.ts +79 -0
- package/src/http-client.ts +467 -0
- package/src/logger.ts +144 -0
- package/src/loop.ts +176 -0
- package/src/options.ts +421 -0
- package/src/plan-fs.ts +323 -0
- package/src/report.ts +178 -0
- package/src/research-prompt.ts +35 -0
- package/src/serve.ts +476 -0
- package/src/settings.ts +349 -0
- package/src/state.ts +298 -0
- package/src/tools/bg-collect.ts +104 -0
- package/src/tools/bg-get-comments.ts +239 -0
- package/src/tools/bg-kill.ts +87 -0
- package/src/tools/bg-spawn.ts +263 -0
- package/src/tools/bg-status.ts +99 -0
- package/src/tools/plan-action.ts +767 -0
- package/src/tools/wait-for-feedback.ts +402 -0
- package/tests/attach-handler-bug.test.ts +166 -0
- package/tests/background-state.test.ts +277 -0
- package/tests/background.test.ts +402 -0
- package/tests/block.test.ts +193 -0
- package/tests/canonical-key-order.test.ts +71 -0
- package/tests/commands-impl.test.ts +442 -0
- package/tests/commands.test.ts +548 -0
- package/tests/config.test.ts +122 -0
- package/tests/dispose.test.ts +336 -0
- package/tests/event-stream.test.ts +409 -0
- package/tests/event.test.ts +262 -0
- package/tests/fingerprint.test.ts +161 -0
- package/tests/http-client.test.ts +403 -0
- package/tests/init-helpers.test.ts +203 -0
- package/tests/integration/slash-command.test.ts +348 -0
- package/tests/integration/tool-routing.test.ts +314 -0
- package/tests/loop.test.ts +397 -0
- package/tests/options.test.ts +274 -0
- package/tests/serve.test.ts +335 -0
- package/tests/settings.test.ts +351 -0
- package/tests/stall-think.test.ts +749 -0
- package/tests/state.test.ts +275 -0
- package/tests/tools/bg-collect.test.ts +337 -0
- package/tests/tools/bg-get-comments.test.ts +485 -0
- package/tests/tools/bg-kill.test.ts +231 -0
- package/tests/tools/bg-spawn.test.ts +311 -0
- package/tests/tools/bg-status.test.ts +216 -0
- package/tests/tools/plan-action.test.ts +599 -0
- package/tests/tools/wait-for-feedback.test.ts +390 -0
- package/tsconfig.json +29 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* state.test.ts
|
|
3
|
+
*
|
|
4
|
+
* Tests for StateStore: read/write round-trip, atomic writes, rolling window,
|
|
5
|
+
* corrupt-state fallback, and per-session mutex. Per §4.3, §4.7.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { describe, test, expect, beforeEach, afterEach } from "bun:test";
|
|
9
|
+
import { StateStore, SessionState, EMPTY_STATE } from "../src/state";
|
|
10
|
+
import { mkdirSync, rmSync, writeFileSync, existsSync, utimesSync } from "node:fs";
|
|
11
|
+
import path from "node:path";
|
|
12
|
+
|
|
13
|
+
// Minimal mock logger that collects all messages
|
|
14
|
+
class MockLogger {
|
|
15
|
+
messages: Array<{ level: string; message: string }> = [];
|
|
16
|
+
log(opts: { level: string; message: string }) {
|
|
17
|
+
this.messages.push(opts);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const TEST_DIR = "/tmp/bizar-state-test";
|
|
22
|
+
const TEST_SESSION_A = "session-a-123";
|
|
23
|
+
const TEST_SESSION_B = "session-b-456";
|
|
24
|
+
|
|
25
|
+
function makeState(overrides: Partial<SessionState> = {}): SessionState {
|
|
26
|
+
return {
|
|
27
|
+
sessionId: TEST_SESSION_A,
|
|
28
|
+
parentAgent: null,
|
|
29
|
+
startedAt: 0,
|
|
30
|
+
lastActivityAt: 0,
|
|
31
|
+
turnCount: 0,
|
|
32
|
+
toolCalls: [],
|
|
33
|
+
warningsIssued: 0,
|
|
34
|
+
blocksTriggered: 0,
|
|
35
|
+
...overrides,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function stateFilePath(sessionId: string) {
|
|
40
|
+
return path.join(TEST_DIR, `${sessionId}.json`);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
describe("StateStore", () => {
|
|
44
|
+
let logger: MockLogger;
|
|
45
|
+
let store: StateStore;
|
|
46
|
+
|
|
47
|
+
beforeEach(() => {
|
|
48
|
+
// Fresh temp dir for each test
|
|
49
|
+
try { rmSync(TEST_DIR, { recursive: true, force: true }); } catch { /* ok */ }
|
|
50
|
+
mkdirSync(TEST_DIR, { recursive: true });
|
|
51
|
+
logger = new MockLogger();
|
|
52
|
+
store = new StateStore(TEST_DIR, logger);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
afterEach(() => {
|
|
56
|
+
try { rmSync(TEST_DIR, { recursive: true, force: true }); } catch { /* ok */ }
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
// ── read/write round-trip ──────────────────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
test("read/write round-trip preserves all fields", async () => {
|
|
62
|
+
const state = makeState({
|
|
63
|
+
sessionId: TEST_SESSION_A,
|
|
64
|
+
parentAgent: "odin",
|
|
65
|
+
startedAt: 1700000000000,
|
|
66
|
+
lastActivityAt: 1700000005000,
|
|
67
|
+
turnCount: 3,
|
|
68
|
+
toolCalls: [
|
|
69
|
+
{ tool: "read", fingerprint: "abc123", at: 1700000001000, outcome: "ok" },
|
|
70
|
+
{ tool: "edit", fingerprint: "def456", at: 1700000002000, outcome: "error" },
|
|
71
|
+
],
|
|
72
|
+
warningsIssued: 1,
|
|
73
|
+
blocksTriggered: 0,
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
await store.save(state);
|
|
77
|
+
const loaded = await store.load(TEST_SESSION_A);
|
|
78
|
+
|
|
79
|
+
expect(loaded.sessionId).toBe(TEST_SESSION_A);
|
|
80
|
+
expect(loaded.parentAgent).toBe("odin");
|
|
81
|
+
expect(loaded.startedAt).toBe(1700000000000);
|
|
82
|
+
expect(loaded.lastActivityAt).toBe(1700000005000);
|
|
83
|
+
expect(loaded.turnCount).toBe(3);
|
|
84
|
+
expect(loaded.toolCalls).toHaveLength(2);
|
|
85
|
+
expect(loaded.toolCalls[0]!.fingerprint).toBe("abc123");
|
|
86
|
+
expect(loaded.toolCalls[1]!.outcome).toBe("error");
|
|
87
|
+
expect(loaded.warningsIssued).toBe(1);
|
|
88
|
+
expect(loaded.blocksTriggered).toBe(0);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
test("load returns EMPTY_STATE when file does not exist", async () => {
|
|
92
|
+
const loaded = await store.load("nonexistent-session-xyz");
|
|
93
|
+
expect(loaded.sessionId).toBe("nonexistent-session-xyz");
|
|
94
|
+
expect(loaded.parentAgent).toBe(null);
|
|
95
|
+
expect(loaded.startedAt).toBe(0);
|
|
96
|
+
expect(loaded.lastActivityAt).toBe(0);
|
|
97
|
+
expect(loaded.turnCount).toBe(0);
|
|
98
|
+
expect(loaded.toolCalls).toHaveLength(0);
|
|
99
|
+
expect(loaded.warningsIssued).toBe(0);
|
|
100
|
+
expect(loaded.blocksTriggered).toBe(0);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// ── rolling window ─────────────────────────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
test("toolCalls array is pruned to last 50 entries on write", async () => {
|
|
106
|
+
const manyCalls = Array.from({ length: 60 }, (_, i) => ({
|
|
107
|
+
tool: "read",
|
|
108
|
+
fingerprint: `fp-${i}`,
|
|
109
|
+
at: 1000 + i,
|
|
110
|
+
outcome: "ok" as const,
|
|
111
|
+
}));
|
|
112
|
+
|
|
113
|
+
const state = makeState({ toolCalls: manyCalls });
|
|
114
|
+
await store.save(state);
|
|
115
|
+
const loaded = await store.load(TEST_SESSION_A);
|
|
116
|
+
|
|
117
|
+
expect(loaded.toolCalls).toHaveLength(50);
|
|
118
|
+
// First entry should be the 10th original (index 10), since 0-9 are pruned
|
|
119
|
+
expect(loaded.toolCalls[0]!.fingerprint).toBe("fp-10");
|
|
120
|
+
// Last entry should be the original 59th
|
|
121
|
+
expect(loaded.toolCalls[49]!.fingerprint).toBe("fp-59");
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test("fewer than 50 tool calls are preserved intact", async () => {
|
|
125
|
+
const calls = Array.from({ length: 10 }, (_, i) => ({
|
|
126
|
+
tool: "read",
|
|
127
|
+
fingerprint: `fp-${i}`,
|
|
128
|
+
at: 1000 + i,
|
|
129
|
+
outcome: "ok" as const,
|
|
130
|
+
}));
|
|
131
|
+
const state = makeState({ toolCalls: calls });
|
|
132
|
+
await store.save(state);
|
|
133
|
+
const loaded = await store.load(TEST_SESSION_A);
|
|
134
|
+
expect(loaded.toolCalls).toHaveLength(10);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
// ── corrupt-state fallback ─────────────────────────────────────────────────
|
|
138
|
+
|
|
139
|
+
test("corrupt JSON file → warning logged, in-memory state starts empty, file preserved", async () => {
|
|
140
|
+
const filePath = stateFilePath(TEST_SESSION_A);
|
|
141
|
+
writeFileSync(filePath, "{ invalid json }", "utf8");
|
|
142
|
+
|
|
143
|
+
const loaded = await store.load(TEST_SESSION_A);
|
|
144
|
+
|
|
145
|
+
// Returns empty state with the sessionId from filename
|
|
146
|
+
expect(loaded.sessionId).toBe(TEST_SESSION_A);
|
|
147
|
+
expect(loaded.parentAgent).toBe(null);
|
|
148
|
+
expect(loaded.startedAt).toBe(0);
|
|
149
|
+
expect(loaded.toolCalls).toHaveLength(0);
|
|
150
|
+
|
|
151
|
+
// Warning was logged
|
|
152
|
+
expect(logger.messages.some((m) => m.level === "warn" && m.message.includes("corrupt"))).toBe(true);
|
|
153
|
+
|
|
154
|
+
// File is preserved for forensic inspection
|
|
155
|
+
expect(existsSync(filePath)).toBe(true);
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
test("valid JSON but wrong schema (missing toolCalls) → corrupt fallback", async () => {
|
|
159
|
+
const filePath = stateFilePath(TEST_SESSION_A);
|
|
160
|
+
writeFileSync(filePath, JSON.stringify({ sessionId: TEST_SESSION_A, parentAgent: null }), "utf8");
|
|
161
|
+
|
|
162
|
+
const loaded = await store.load(TEST_SESSION_A);
|
|
163
|
+
expect(loaded.sessionId).toBe(TEST_SESSION_A);
|
|
164
|
+
expect(loaded.toolCalls).toHaveLength(0);
|
|
165
|
+
expect(logger.messages.some((m) => m.level === "warn")).toBe(true);
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
// ── per-session mutex ──────────────────────────────────────────────────────
|
|
169
|
+
|
|
170
|
+
test("concurrent writes to same session are serialized (no lost updates)", async () => {
|
|
171
|
+
// Verify the per-session mutex serializes concurrent operations.
|
|
172
|
+
// Two tasks increment an in-memory counter inside withLock — counter must be 2.
|
|
173
|
+
let counter = 0;
|
|
174
|
+
await Promise.all([
|
|
175
|
+
store.withLock(TEST_SESSION_A, async () => {
|
|
176
|
+
counter += 1;
|
|
177
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
178
|
+
counter += 1;
|
|
179
|
+
}),
|
|
180
|
+
store.withLock(TEST_SESSION_A, async () => {
|
|
181
|
+
counter += 1;
|
|
182
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
183
|
+
counter += 1;
|
|
184
|
+
}),
|
|
185
|
+
]);
|
|
186
|
+
expect(counter).toBe(4); // each task increments twice; mutex ensures no race
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
test("different sessions do not block each other", async () => {
|
|
190
|
+
const storeA = new StateStore(TEST_DIR, logger);
|
|
191
|
+
const storeB = new StateStore(TEST_DIR, logger);
|
|
192
|
+
|
|
193
|
+
const [resultA, resultB] = await Promise.all([
|
|
194
|
+
storeA.load(TEST_SESSION_A),
|
|
195
|
+
storeB.load(TEST_SESSION_B),
|
|
196
|
+
]);
|
|
197
|
+
|
|
198
|
+
// Each session loaded its own empty state independently
|
|
199
|
+
expect(resultA.sessionId).toBe(TEST_SESSION_A);
|
|
200
|
+
expect(resultB.sessionId).toBe(TEST_SESSION_B);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
// ── atomic write ───────────────────────────────────────────────────────────
|
|
204
|
+
|
|
205
|
+
test("save uses atomic rename (no partial file on disk)", async () => {
|
|
206
|
+
const state = makeState({ parentAgent: "odin", startedAt: 1000 });
|
|
207
|
+
await store.save(state);
|
|
208
|
+
|
|
209
|
+
const filePath = stateFilePath(TEST_SESSION_A);
|
|
210
|
+
expect(existsSync(filePath)).toBe(true);
|
|
211
|
+
// tmp file should not exist after rename
|
|
212
|
+
expect(existsSync(`${filePath}.tmp`)).toBe(false);
|
|
213
|
+
|
|
214
|
+
const content = await import("node:fs/promises").then((fs) =>
|
|
215
|
+
fs.readFile(filePath, "utf8")
|
|
216
|
+
);
|
|
217
|
+
const parsed = JSON.parse(content);
|
|
218
|
+
expect(parsed.parentAgent).toBe("odin");
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
// ── delete ─────────────────────────────────────────────────────────────────
|
|
222
|
+
|
|
223
|
+
test("delete removes the state file", async () => {
|
|
224
|
+
const state = makeState({ parentAgent: "odin" });
|
|
225
|
+
await store.save(state);
|
|
226
|
+
expect(existsSync(stateFilePath(TEST_SESSION_A))).toBe(true);
|
|
227
|
+
|
|
228
|
+
await store.delete(TEST_SESSION_A);
|
|
229
|
+
expect(existsSync(stateFilePath(TEST_SESSION_A))).toBe(false);
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
test("delete is idempotent (no error if file already gone)", async () => {
|
|
233
|
+
await store.delete("already-gone-session");
|
|
234
|
+
// Should not throw
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
// ── cleanup ────────────────────────────────────────────────────────────────
|
|
238
|
+
|
|
239
|
+
test("cleanup removes files older than maxAgeDays", async () => {
|
|
240
|
+
const filePath = stateFilePath(TEST_SESSION_A);
|
|
241
|
+
writeFileSync(filePath, JSON.stringify(makeState({ lastActivityAt: Date.now() - 10 * 24 * 60 * 60 * 1000 })), "utf8");
|
|
242
|
+
|
|
243
|
+
// Set the file's mtime to 10 days ago so cleanup considers it stale
|
|
244
|
+
const tenDaysAgo = (Date.now() - 10 * 24 * 60 * 60 * 1000) / 1000;
|
|
245
|
+
utimesSync(filePath, tenDaysAgo, tenDaysAgo);
|
|
246
|
+
|
|
247
|
+
const deleted = await store.cleanup(7);
|
|
248
|
+
expect(deleted).toBe(1);
|
|
249
|
+
expect(existsSync(filePath)).toBe(false);
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
test("cleanup removes orphaned sessions (sessionId not in validSessionIds)", async () => {
|
|
253
|
+
const filePath = stateFilePath(TEST_SESSION_A);
|
|
254
|
+
writeFileSync(filePath, JSON.stringify(makeState({ lastActivityAt: Date.now() })), "utf8");
|
|
255
|
+
|
|
256
|
+
const deleted = await store.cleanup(7, new Set(["some-other-session"]));
|
|
257
|
+
expect(deleted).toBe(1);
|
|
258
|
+
expect(existsSync(filePath)).toBe(false);
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
test("cleanup keeps files that are recent AND in validSessionIds", async () => {
|
|
262
|
+
const filePath = stateFilePath(TEST_SESSION_A);
|
|
263
|
+
writeFileSync(filePath, JSON.stringify(makeState({ lastActivityAt: Date.now() })), "utf8");
|
|
264
|
+
|
|
265
|
+
const deleted = await store.cleanup(7, new Set([TEST_SESSION_A]));
|
|
266
|
+
expect(deleted).toBe(0);
|
|
267
|
+
expect(existsSync(filePath)).toBe(true);
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
test("cleanup returns 0 when state dir cannot be read", async () => {
|
|
271
|
+
const unreadableStore = new StateStore("/nonexistent-dir-xyz", logger);
|
|
272
|
+
const deleted = await unreadableStore.cleanup(7);
|
|
273
|
+
expect(deleted).toBe(0);
|
|
274
|
+
});
|
|
275
|
+
});
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* bizar_collect tool tests.
|
|
3
|
+
*
|
|
4
|
+
* Tests: timeout behavior (MEDIUM-31), killed/done/failed result,
|
|
5
|
+
* loop guard marker prepended at collect time (MEDIUM-30),
|
|
6
|
+
* timeoutMs clamping (MEDIUM-33), result construction from messages
|
|
7
|
+
* (MEDIUM-19), collect on already-killed instance (HIGH-37).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { describe, it, expect } from "bun:test";
|
|
11
|
+
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Types
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
type BackgroundStatus = "pending" | "running" | "done" | "failed" | "killed" | "timed_out";
|
|
17
|
+
|
|
18
|
+
interface BackgroundState {
|
|
19
|
+
instanceId: string;
|
|
20
|
+
sessionId: string;
|
|
21
|
+
status: BackgroundStatus;
|
|
22
|
+
toolCallCount: number;
|
|
23
|
+
loopGuardTool?: string;
|
|
24
|
+
error?: string;
|
|
25
|
+
resultPreview?: string;
|
|
26
|
+
resultMessageIds?: string[];
|
|
27
|
+
startedAt: number;
|
|
28
|
+
completedAt?: number;
|
|
29
|
+
timeoutMs: number;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
interface MessagePart {
|
|
33
|
+
type: "text";
|
|
34
|
+
text: string;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
interface Message {
|
|
38
|
+
info: { role: "user" | "assistant" };
|
|
39
|
+
parts: MessagePart[];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// Result construction algorithm (mirrors §4.4)
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
function constructResult(messages: Message[], loopGuardTool?: string): string {
|
|
47
|
+
// Filter to assistant messages
|
|
48
|
+
// Concatenate TextPart.text values in message order
|
|
49
|
+
// Skip ToolPart, ReasoningPart, StepStartPart, StepFinishPart,
|
|
50
|
+
// SnapshotPart, PatchPart, AgentPart, RetryPart, CompactionPart, SubtaskPart
|
|
51
|
+
const text = messages
|
|
52
|
+
.filter((m) => m.info.role === "assistant")
|
|
53
|
+
.flatMap((m) => m.parts.filter((p) => p.type === "text"))
|
|
54
|
+
.map((p) => (p as MessagePart & { type: "text" }).text)
|
|
55
|
+
.join("\n");
|
|
56
|
+
|
|
57
|
+
// Prepend loop guard marker if threshold-12 was captured
|
|
58
|
+
if (loopGuardTool) {
|
|
59
|
+
return `[loop guard: 12 identical calls to ${loopGuardTool}]\n${text}`;
|
|
60
|
+
}
|
|
61
|
+
return text;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function clampTimeout(timeoutMs: number): number {
|
|
65
|
+
const MIN = 1000;
|
|
66
|
+
const MAX = 1_800_000;
|
|
67
|
+
if (timeoutMs < MIN || timeoutMs > MAX) {
|
|
68
|
+
throw new Error(`timeoutMs must be between ${MIN} (1s) and ${MAX} (30min). Got ${timeoutMs}.`);
|
|
69
|
+
}
|
|
70
|
+
return timeoutMs;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
// Fake bizar_collect
|
|
75
|
+
// ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
interface CollectResult {
|
|
78
|
+
instanceId: string;
|
|
79
|
+
status: BackgroundStatus;
|
|
80
|
+
result: string;
|
|
81
|
+
toolCallCount: number;
|
|
82
|
+
durationMs: number;
|
|
83
|
+
error?: string;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function bizar_collect(
|
|
87
|
+
args: { instanceId: string; timeoutMs?: number },
|
|
88
|
+
instances: Map<string, BackgroundState>,
|
|
89
|
+
messages: Map<string, Message[]>,
|
|
90
|
+
): CollectResult | { error: string } {
|
|
91
|
+
const inst = instances.get(args.instanceId);
|
|
92
|
+
if (!inst) {
|
|
93
|
+
return { error: `Instance ${args.instanceId} not found` };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
let timeoutMs = args.timeoutMs ?? 60_000;
|
|
97
|
+
try {
|
|
98
|
+
timeoutMs = clampTimeout(timeoutMs);
|
|
99
|
+
} catch (e: unknown) {
|
|
100
|
+
return { error: (e as Error).message };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// If instance is already in a terminal state, return immediately
|
|
104
|
+
if (inst.status === "done" || inst.status === "failed" || inst.status === "killed" || inst.status === "timed_out") {
|
|
105
|
+
const sessionMessages = messages.get(inst.sessionId) ?? [];
|
|
106
|
+
const result = constructResult(sessionMessages, inst.loopGuardTool);
|
|
107
|
+
const durationMs = (inst.completedAt ?? Date.now()) - inst.startedAt;
|
|
108
|
+
return {
|
|
109
|
+
instanceId: inst.instanceId,
|
|
110
|
+
status: inst.status,
|
|
111
|
+
result,
|
|
112
|
+
toolCallCount: inst.toolCallCount,
|
|
113
|
+
durationMs,
|
|
114
|
+
error: inst.error,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Simulate timeout for this test
|
|
119
|
+
return {
|
|
120
|
+
instanceId: inst.instanceId,
|
|
121
|
+
status: "running",
|
|
122
|
+
result: inst.resultPreview ?? "",
|
|
123
|
+
toolCallCount: inst.toolCallCount,
|
|
124
|
+
durationMs: Date.now() - inst.startedAt,
|
|
125
|
+
error: `collect timed out after ${timeoutMs}ms`,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// ---------------------------------------------------------------------------
|
|
130
|
+
// In-memory test data
|
|
131
|
+
// ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
function makeMessages(assistantTexts: string[]): Message[] {
|
|
134
|
+
return [
|
|
135
|
+
{
|
|
136
|
+
info: { role: "user" },
|
|
137
|
+
parts: [{ type: "text", text: "Do the task" }],
|
|
138
|
+
},
|
|
139
|
+
...assistantTexts.map((text) => ({
|
|
140
|
+
info: { role: "assistant" as const },
|
|
141
|
+
parts: [{ type: "text" as const, text }],
|
|
142
|
+
})),
|
|
143
|
+
];
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const instances = new Map<string, BackgroundState>();
|
|
147
|
+
const messages = new Map<string, Message[]>();
|
|
148
|
+
|
|
149
|
+
const doneInst: BackgroundState = {
|
|
150
|
+
instanceId: "bgr_done",
|
|
151
|
+
sessionId: "sess_done",
|
|
152
|
+
status: "done",
|
|
153
|
+
toolCallCount: 5,
|
|
154
|
+
loopGuardTool: undefined,
|
|
155
|
+
error: undefined,
|
|
156
|
+
resultPreview: "Research complete.",
|
|
157
|
+
resultMessageIds: ["msg_01", "msg_02"],
|
|
158
|
+
startedAt: Date.now() - 120_000,
|
|
159
|
+
completedAt: Date.now() - 60_000,
|
|
160
|
+
timeoutMs: 300_000,
|
|
161
|
+
};
|
|
162
|
+
instances.set("bgr_done", doneInst);
|
|
163
|
+
messages.set("sess_done", makeMessages(["The research shows that X is true.", "Findings have been saved."]));
|
|
164
|
+
|
|
165
|
+
const loopGuardInst: BackgroundState = {
|
|
166
|
+
instanceId: "bgr_loop",
|
|
167
|
+
sessionId: "sess_loop",
|
|
168
|
+
status: "failed",
|
|
169
|
+
toolCallCount: 12,
|
|
170
|
+
loopGuardTool: "read",
|
|
171
|
+
error: "Loop protection: 12 identical calls to read",
|
|
172
|
+
resultPreview: "",
|
|
173
|
+
resultMessageIds: ["msg_loop_1"],
|
|
174
|
+
startedAt: Date.now() - 60_000,
|
|
175
|
+
completedAt: Date.now(),
|
|
176
|
+
timeoutMs: 300_000,
|
|
177
|
+
};
|
|
178
|
+
instances.set("bgr_loop", loopGuardInst);
|
|
179
|
+
messages.set("sess_loop", makeMessages(["I tried to read the file but got stuck."]));
|
|
180
|
+
|
|
181
|
+
const killedInst: BackgroundState = {
|
|
182
|
+
instanceId: "bgr_killed",
|
|
183
|
+
sessionId: "sess_killed",
|
|
184
|
+
status: "killed",
|
|
185
|
+
toolCallCount: 2,
|
|
186
|
+
loopGuardTool: undefined,
|
|
187
|
+
error: undefined,
|
|
188
|
+
resultPreview: "Partial result...",
|
|
189
|
+
resultMessageIds: ["msg_k_1"],
|
|
190
|
+
startedAt: Date.now() - 30_000,
|
|
191
|
+
completedAt: Date.now(),
|
|
192
|
+
timeoutMs: 300_000,
|
|
193
|
+
};
|
|
194
|
+
instances.set("bgr_killed", killedInst);
|
|
195
|
+
messages.set("sess_killed", makeMessages(["Partial work done before kill."]));
|
|
196
|
+
|
|
197
|
+
// ---------------------------------------------------------------------------
|
|
198
|
+
// Result construction (MEDIUM-19)
|
|
199
|
+
// ---------------------------------------------------------------------------
|
|
200
|
+
|
|
201
|
+
describe("bizar_collect — result construction (MEDIUM-19)", () => {
|
|
202
|
+
it("concatenates assistant text parts in order", () => {
|
|
203
|
+
const msgs = makeMessages(["First response.", "Second response."]);
|
|
204
|
+
const result = constructResult(msgs);
|
|
205
|
+
expect(result).toBe("First response.\nSecond response.");
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
it("skips user messages", () => {
|
|
209
|
+
const msgs = makeMessages(["Assistant text"]);
|
|
210
|
+
const result = constructResult(msgs);
|
|
211
|
+
expect(result).not.toContain("Do the task");
|
|
212
|
+
expect(result).toBe("Assistant text");
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
it("prepends loop guard marker when loopGuardTool is set (MEDIUM-30)", () => {
|
|
216
|
+
const msgs = makeMessages(["I got stuck in a loop."]);
|
|
217
|
+
const result = constructResult(msgs, "read");
|
|
218
|
+
expect(result).toStartWith("[loop guard: 12 identical calls to read]");
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
it("marker is NOT in resultPreview or resultMessageIds — added at collect time only (MEDIUM-30)", () => {
|
|
222
|
+
// Verify that the marker is NOT in the stored data
|
|
223
|
+
const inst = loopGuardInst;
|
|
224
|
+
expect(inst.loopGuardTool).toBe("read");
|
|
225
|
+
expect(inst.resultPreview).not.toContain("[loop guard:");
|
|
226
|
+
expect(inst.resultMessageIds).toBeDefined();
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
it("result without loop guard has no marker prefix", () => {
|
|
230
|
+
const msgs = makeMessages(["All good!"]);
|
|
231
|
+
const result = constructResult(msgs);
|
|
232
|
+
expect(result).not.toStartWith("[loop guard:");
|
|
233
|
+
});
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
// ---------------------------------------------------------------------------
|
|
237
|
+
// Collect on terminal state
|
|
238
|
+
// ---------------------------------------------------------------------------
|
|
239
|
+
|
|
240
|
+
describe("bizar_collect — terminal state", () => {
|
|
241
|
+
it("done instance returns the result immediately (no waiting)", () => {
|
|
242
|
+
const result = bizar_collect({ instanceId: "bgr_done" }, instances, messages) as CollectResult;
|
|
243
|
+
expect(result.status).toBe("done");
|
|
244
|
+
expect(result.result).toContain("research shows that X is true");
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
it("failed instance with loop guard returns marker + result (MEDIUM-30)", () => {
|
|
248
|
+
const result = bizar_collect({ instanceId: "bgr_loop" }, instances, messages) as CollectResult;
|
|
249
|
+
expect(result.status).toBe("failed");
|
|
250
|
+
expect(result.result).toStartWith("[loop guard: 12 identical calls to read]");
|
|
251
|
+
expect(result.error).toContain("Loop protection");
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
it("killed instance returns immediately (HIGH-37)", () => {
|
|
255
|
+
const result = bizar_collect({ instanceId: "bgr_killed" }, instances, messages) as CollectResult;
|
|
256
|
+
expect(result.status).toBe("killed");
|
|
257
|
+
expect(result.result).toContain("Partial work done before kill.");
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
it("collect on already-killed instance makes no HTTP calls (HIGH-37)", () => {
|
|
261
|
+
// Verified by the fact we return immediately from in-memory state
|
|
262
|
+
const result = bizar_collect({ instanceId: "bgr_killed" }, instances, messages) as CollectResult;
|
|
263
|
+
expect(result.status).toBe("killed");
|
|
264
|
+
// error is undefined (not "no error property"), which is falsy — not an error
|
|
265
|
+
expect(result.error).toBeUndefined();
|
|
266
|
+
});
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
// ---------------------------------------------------------------------------
|
|
270
|
+
// Collect on running instance (timeout)
|
|
271
|
+
// ---------------------------------------------------------------------------
|
|
272
|
+
|
|
273
|
+
describe("bizar_collect — timeout on running instance", () => {
|
|
274
|
+
it("running instance times out and returns partial result", () => {
|
|
275
|
+
const runningInst: BackgroundState = {
|
|
276
|
+
instanceId: "bgr_running",
|
|
277
|
+
sessionId: "sess_running",
|
|
278
|
+
status: "running",
|
|
279
|
+
toolCallCount: 3,
|
|
280
|
+
resultPreview: "Still working...",
|
|
281
|
+
startedAt: Date.now() - 5_000,
|
|
282
|
+
timeoutMs: 60_000,
|
|
283
|
+
};
|
|
284
|
+
const runningMap = new Map([["bgr_running", runningInst]]);
|
|
285
|
+
const runningMsgs = new Map<string, Message[]>();
|
|
286
|
+
|
|
287
|
+
const result = bizar_collect({ instanceId: "bgr_running", timeoutMs: 1000 }, runningMap, runningMsgs) as CollectResult;
|
|
288
|
+
expect(result.status).toBe("running");
|
|
289
|
+
expect(result.error).toContain("timed out");
|
|
290
|
+
expect(result.result).toContain("Still working...");
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
it("timeoutMs clamped per §7.3 (MEDIUM-33)", () => {
|
|
294
|
+
const runningInst: BackgroundState = {
|
|
295
|
+
instanceId: "bgr_clamped",
|
|
296
|
+
sessionId: "sess_clamped",
|
|
297
|
+
status: "running",
|
|
298
|
+
toolCallCount: 0,
|
|
299
|
+
resultPreview: "",
|
|
300
|
+
startedAt: Date.now(),
|
|
301
|
+
timeoutMs: 60_000,
|
|
302
|
+
};
|
|
303
|
+
const map = new Map([["bgr_clamped", runningInst]]);
|
|
304
|
+
|
|
305
|
+
// Below minimum
|
|
306
|
+
const r1 = bizar_collect({ instanceId: "bgr_clamped", timeoutMs: 500 }, map, new Map());
|
|
307
|
+
expect(r1).toHaveProperty("error");
|
|
308
|
+
|
|
309
|
+
// Above maximum
|
|
310
|
+
const r2 = bizar_collect({ instanceId: "bgr_clamped", timeoutMs: 2_000_000 }, map, new Map());
|
|
311
|
+
expect(r2).toHaveProperty("error");
|
|
312
|
+
});
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
// ---------------------------------------------------------------------------
|
|
316
|
+
// Duration calculation
|
|
317
|
+
// ---------------------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
describe("bizar_collect — durationMs", () => {
|
|
320
|
+
it("durationMs = completedAt - startedAt for terminal instances", () => {
|
|
321
|
+
const result = bizar_collect({ instanceId: "bgr_done" }, instances, messages) as CollectResult;
|
|
322
|
+
expect(result.durationMs).toBeGreaterThanOrEqual(59_000);
|
|
323
|
+
expect(result.durationMs).toBeLessThanOrEqual(61_000);
|
|
324
|
+
});
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
// ---------------------------------------------------------------------------
|
|
328
|
+
// Unknown instance
|
|
329
|
+
// ---------------------------------------------------------------------------
|
|
330
|
+
|
|
331
|
+
describe("bizar_collect — unknown instance", () => {
|
|
332
|
+
it("returns error for unknown instanceId", () => {
|
|
333
|
+
const result = bizar_collect({ instanceId: "bgr_no_such" }, instances, messages);
|
|
334
|
+
expect(result).toHaveProperty("error");
|
|
335
|
+
expect((result as { error: string }).error).toContain("not found");
|
|
336
|
+
});
|
|
337
|
+
});
|