@polderlabs/bizar-plugin 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +448 -0
- package/bun.lock +88 -0
- package/index.ts +1113 -0
- package/package.json +42 -0
- package/scripts/check-forbidden-imports.sh +33 -0
- package/src/background-state.ts +463 -0
- package/src/background.ts +964 -0
- package/src/commands-impl.ts +369 -0
- package/src/commands.ts +880 -0
- package/src/event-stream.ts +574 -0
- package/src/fingerprint.ts +120 -0
- package/src/handoff.ts +79 -0
- package/src/http-client.ts +467 -0
- package/src/logger.ts +144 -0
- package/src/loop.ts +176 -0
- package/src/options.ts +421 -0
- package/src/plan-fs.ts +323 -0
- package/src/report.ts +178 -0
- package/src/research-prompt.ts +35 -0
- package/src/serve.ts +476 -0
- package/src/settings.ts +349 -0
- package/src/state.ts +298 -0
- package/src/tools/bg-collect.ts +104 -0
- package/src/tools/bg-get-comments.ts +239 -0
- package/src/tools/bg-kill.ts +87 -0
- package/src/tools/bg-spawn.ts +263 -0
- package/src/tools/bg-status.ts +99 -0
- package/src/tools/plan-action.ts +767 -0
- package/src/tools/wait-for-feedback.ts +402 -0
- package/tests/attach-handler-bug.test.ts +166 -0
- package/tests/background-state.test.ts +277 -0
- package/tests/background.test.ts +402 -0
- package/tests/block.test.ts +193 -0
- package/tests/canonical-key-order.test.ts +71 -0
- package/tests/commands-impl.test.ts +442 -0
- package/tests/commands.test.ts +548 -0
- package/tests/config.test.ts +122 -0
- package/tests/dispose.test.ts +336 -0
- package/tests/event-stream.test.ts +409 -0
- package/tests/event.test.ts +262 -0
- package/tests/fingerprint.test.ts +161 -0
- package/tests/http-client.test.ts +403 -0
- package/tests/init-helpers.test.ts +203 -0
- package/tests/integration/slash-command.test.ts +348 -0
- package/tests/integration/tool-routing.test.ts +314 -0
- package/tests/loop.test.ts +397 -0
- package/tests/options.test.ts +274 -0
- package/tests/serve.test.ts +335 -0
- package/tests/settings.test.ts +351 -0
- package/tests/stall-think.test.ts +749 -0
- package/tests/state.test.ts +275 -0
- package/tests/tools/bg-collect.test.ts +337 -0
- package/tests/tools/bg-get-comments.test.ts +485 -0
- package/tests/tools/bg-kill.test.ts +231 -0
- package/tests/tools/bg-spawn.test.ts +311 -0
- package/tests/tools/bg-status.test.ts +216 -0
- package/tests/tools/plan-action.test.ts +599 -0
- package/tests/tools/wait-for-feedback.test.ts +390 -0
- package/tsconfig.json +29 -0
|
@@ -0,0 +1,749 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* stall-think.test.ts
|
|
3
|
+
*
|
|
4
|
+
* v0.3.0 stall timeout + thinking-loop protection tests.
|
|
5
|
+
*
|
|
6
|
+
* Groups:
|
|
7
|
+
* 1. researchInterventionPrompt() — 5 tests
|
|
8
|
+
* 2. normalizeOptions() v0.3.0 fields — 6 tests
|
|
9
|
+
* 3. BackgroundState schema backfill — 3 tests
|
|
10
|
+
* 4. Stall + thinking-loop detection logic — 7 tests
|
|
11
|
+
* 5. bg-status toView v0.3.0 fields — 2 tests
|
|
12
|
+
*
|
|
13
|
+
* Total: 23 tests
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { describe, it, expect, beforeEach, afterEach } from "bun:test";
|
|
17
|
+
import { writeFileSync, mkdirSync, unlinkSync, rmSync } from "node:fs";
|
|
18
|
+
import path from "node:path";
|
|
19
|
+
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Group 1 — researchInterventionPrompt
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
import { researchInterventionPrompt } from "../src/research-prompt.js";
|
|
25
|
+
|
|
26
|
+
describe("researchInterventionPrompt", () => {
|
|
27
|
+
it("returns a string containing '[SYSTEM REMINDER — Thinking Loop Detected]'", () => {
|
|
28
|
+
const result = researchInterventionPrompt(60_000);
|
|
29
|
+
expect(result).toContain("[SYSTEM REMINDER — Thinking Loop Detected]");
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("includes the duration formatted as 'Xm Ys' for multi-minute durations", () => {
|
|
33
|
+
const result = researchInterventionPrompt(330_000); // 5m 30s
|
|
34
|
+
expect(result).toContain("5m 30s");
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it("formats under-a-minute durations as 'Ys' (not '0m Ys')", () => {
|
|
38
|
+
const result = researchInterventionPrompt(45_000); // 45s
|
|
39
|
+
expect(result).toContain("45s");
|
|
40
|
+
// Must NOT contain "0m" in the duration portion
|
|
41
|
+
expect(result).not.toMatch(/0m \d+s/);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it("clamps negative or zero duration to 0s", () => {
|
|
45
|
+
const neg = researchInterventionPrompt(-99_000);
|
|
46
|
+
const zero = researchInterventionPrompt(0);
|
|
47
|
+
// Both should produce "0s" (Math.max(0, …) in the function)
|
|
48
|
+
expect(neg).toContain("0s");
|
|
49
|
+
expect(zero).toContain("0s");
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("mentions 'task tool', 'Mimir', and 'bash' — the three action options", () => {
|
|
53
|
+
const result = researchInterventionPrompt(60_000);
|
|
54
|
+
expect(result).toContain("task tool");
|
|
55
|
+
expect(result).toContain("Mimir");
|
|
56
|
+
expect(result).toContain("bash");
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
// Group 2 — normalizeOptions v0.3.0 fields
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
import { normalizeOptions } from "../src/options.js";
|
|
65
|
+
|
|
66
|
+
describe("normalizeOptions — v0.3.0 fields", () => {
|
|
67
|
+
it("defaults: backgroundStallTimeoutMs === 180_000, backgroundThinkingLoopTimeoutMs === 300_000, backgroundMaxInterventions === 1", () => {
|
|
68
|
+
const { options } = normalizeOptions(undefined);
|
|
69
|
+
expect(options.backgroundStallTimeoutMs).toBe(180_000);
|
|
70
|
+
expect(options.backgroundThinkingLoopTimeoutMs).toBe(300_000);
|
|
71
|
+
expect(options.backgroundMaxInterventions).toBe(1);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it("clamps backgroundStallTimeoutMs < 10000 to 10000 and pushes a note", () => {
|
|
75
|
+
const { options, notes } = normalizeOptions({ backgroundStallTimeoutMs: 999 });
|
|
76
|
+
expect(options.backgroundStallTimeoutMs).toBe(10_000);
|
|
77
|
+
expect(notes.some((n) => n.includes("backgroundStallTimeoutMs") && n.includes("clamped"))).toBe(true);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it("clamps backgroundStallTimeoutMs > 600000 to 600000 and pushes a note", () => {
|
|
81
|
+
const { options, notes } = normalizeOptions({ backgroundStallTimeoutMs: 999_999 });
|
|
82
|
+
expect(options.backgroundStallTimeoutMs).toBe(600_000);
|
|
83
|
+
expect(notes.some((n) => n.includes("backgroundStallTimeoutMs") && n.includes("clamped"))).toBe(true);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it("clamps backgroundMaxInterventions < 1 to 1", () => {
|
|
87
|
+
const { options, notes } = normalizeOptions({ backgroundMaxInterventions: 0 });
|
|
88
|
+
expect(options.backgroundMaxInterventions).toBe(1);
|
|
89
|
+
expect(notes.some((n) => n.includes("backgroundMaxInterventions") && n.includes("clamped"))).toBe(true);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it("clamps backgroundMaxInterventions > 3 to 3", () => {
|
|
93
|
+
const { options, notes } = normalizeOptions({ backgroundMaxInterventions: 99 });
|
|
94
|
+
expect(options.backgroundMaxInterventions).toBe(3);
|
|
95
|
+
expect(notes.some((n) => n.includes("backgroundMaxInterventions") && n.includes("clamped"))).toBe(true);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it("honors BIZAR_STALL_TIMEOUT_MS env var when no option is set", async () => {
|
|
99
|
+
// Set the env var before importing/evaluating normalizeOptions.
|
|
100
|
+
// Static imports are hoisted and evaluated before test code runs,
|
|
101
|
+
// so we must use dynamic import AFTER setting the env var.
|
|
102
|
+
process.env.BIZAR_STALL_TIMEOUT_MS = "42000";
|
|
103
|
+
try {
|
|
104
|
+
// Dynamically import AFTER the env var is set so the module's
|
|
105
|
+
// function closure picks up the updated process.env at evaluation time.
|
|
106
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
107
|
+
const mod = await import("../src/options.js") as any;
|
|
108
|
+
const { options } = mod.normalizeOptions(undefined);
|
|
109
|
+
expect(options.backgroundStallTimeoutMs).toBe(42000);
|
|
110
|
+
} finally {
|
|
111
|
+
delete process.env.BIZAR_STALL_TIMEOUT_MS;
|
|
112
|
+
}
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
// Group 3 — BackgroundState schema backfill
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
import { BackgroundStateStore } from "../src/background-state.js";
|
|
121
|
+
import { TERMINAL_STATUSES } from "../src/background-state.js";
|
|
122
|
+
|
|
123
|
+
// Minimal Logger for tests
|
|
124
|
+
const silentLogger = {
|
|
125
|
+
log(_opts: { level: "debug" | "info" | "warn" | "error"; message: string }) {},
|
|
126
|
+
debug(_m: string) {},
|
|
127
|
+
info(_m: string) {},
|
|
128
|
+
warn(_m: string) {},
|
|
129
|
+
error(_m: string) {},
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
function makeTempDir(prefix: string): string {
|
|
133
|
+
const dir = `/tmp/bizar-stall-test-${prefix}-${process.pid}`;
|
|
134
|
+
mkdirSync(dir, { recursive: true });
|
|
135
|
+
return dir;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function writeOldStateFile(dir: string, instanceId: string, state: Record<string, unknown>): void {
|
|
139
|
+
const bgDir = path.join(dir, "bg");
|
|
140
|
+
mkdirSync(bgDir, { recursive: true });
|
|
141
|
+
writeFileSync(path.join(bgDir, `${instanceId}.json`), JSON.stringify(state), "utf8");
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
describe("BackgroundState schema backfill", () => {
|
|
145
|
+
let tmpDir: string;
|
|
146
|
+
|
|
147
|
+
beforeEach(() => {
|
|
148
|
+
tmpDir = makeTempDir("bg-state-backfill");
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
afterEach(() => {
|
|
152
|
+
rmSync(tmpDir, { recursive: true, force: true });
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it("loads an old-format state file (no lastEventAt) and backfills it to startedAt", async () => {
|
|
156
|
+
const instanceId = "bgr_old_no_lastEventAt";
|
|
157
|
+
const startedAt = 1_700_000_000_000;
|
|
158
|
+
writeOldStateFile(tmpDir, instanceId, {
|
|
159
|
+
instanceId,
|
|
160
|
+
sessionId: "sess_abc",
|
|
161
|
+
agent: "mimir",
|
|
162
|
+
status: "running",
|
|
163
|
+
startedAt,
|
|
164
|
+
model: "minimax/MiniMax-M3",
|
|
165
|
+
promptPreview: "Do the thing",
|
|
166
|
+
toolCallCount: 0,
|
|
167
|
+
parentAgent: "odin",
|
|
168
|
+
logPath: "/tmp/test.log",
|
|
169
|
+
timeoutMs: 300_000,
|
|
170
|
+
// lastEventAt is intentionally absent
|
|
171
|
+
lastToolOrTextAt: startedAt,
|
|
172
|
+
interventionCount: 0,
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
const store = new BackgroundStateStore(tmpDir, silentLogger);
|
|
176
|
+
const loaded = await store.load(instanceId);
|
|
177
|
+
|
|
178
|
+
expect(loaded).not.toBeNull();
|
|
179
|
+
expect(loaded!.lastEventAt).toBe(startedAt);
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
it("loads an old-format state file (no lastToolOrTextAt) and backfills it to startedAt", async () => {
|
|
183
|
+
const instanceId = "bgr_old_no_lastToolOrTextAt";
|
|
184
|
+
const startedAt = 1_700_000_000_000;
|
|
185
|
+
writeOldStateFile(tmpDir, instanceId, {
|
|
186
|
+
instanceId,
|
|
187
|
+
sessionId: "sess_def",
|
|
188
|
+
agent: "mimir",
|
|
189
|
+
status: "running",
|
|
190
|
+
startedAt,
|
|
191
|
+
model: "minimax/MiniMax-M3",
|
|
192
|
+
promptPreview: "Do the thing",
|
|
193
|
+
toolCallCount: 0,
|
|
194
|
+
parentAgent: "odin",
|
|
195
|
+
logPath: "/tmp/test.log",
|
|
196
|
+
timeoutMs: 300_000,
|
|
197
|
+
lastEventAt: startedAt,
|
|
198
|
+
// lastToolOrTextAt is intentionally absent
|
|
199
|
+
interventionCount: 0,
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
const store = new BackgroundStateStore(tmpDir, silentLogger);
|
|
203
|
+
const loaded = await store.load(instanceId);
|
|
204
|
+
|
|
205
|
+
expect(loaded).not.toBeNull();
|
|
206
|
+
expect(loaded!.lastToolOrTextAt).toBe(startedAt);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it("loads an old-format state file (no interventionCount) and backfills it to 0", async () => {
|
|
210
|
+
const instanceId = "bgr_old_no_interventionCount";
|
|
211
|
+
const startedAt = 1_700_000_000_000;
|
|
212
|
+
writeOldStateFile(tmpDir, instanceId, {
|
|
213
|
+
instanceId,
|
|
214
|
+
sessionId: "sess_ghi",
|
|
215
|
+
agent: "mimir",
|
|
216
|
+
status: "running",
|
|
217
|
+
startedAt,
|
|
218
|
+
model: "minimax/MiniMax-M3",
|
|
219
|
+
promptPreview: "Do the thing",
|
|
220
|
+
toolCallCount: 0,
|
|
221
|
+
parentAgent: "odin",
|
|
222
|
+
logPath: "/tmp/test.log",
|
|
223
|
+
timeoutMs: 300_000,
|
|
224
|
+
lastEventAt: startedAt,
|
|
225
|
+
lastToolOrTextAt: startedAt,
|
|
226
|
+
// interventionCount is intentionally absent
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
const store = new BackgroundStateStore(tmpDir, silentLogger);
|
|
230
|
+
const loaded = await store.load(instanceId);
|
|
231
|
+
|
|
232
|
+
expect(loaded).not.toBeNull();
|
|
233
|
+
expect(loaded!.interventionCount).toBe(0);
|
|
234
|
+
});
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
// ---------------------------------------------------------------------------
|
|
238
|
+
// Group 4 — Stall + thinking-loop detection logic
|
|
239
|
+
//
|
|
240
|
+
// FakeInstanceManager replicates the v0.3.0 stall and thinking-loop
|
|
241
|
+
// detection algorithm from the real InstanceManager. It mirrors the
|
|
242
|
+
// real class surface and implements the exact same logic.
|
|
243
|
+
// ---------------------------------------------------------------------------
|
|
244
|
+
|
|
245
|
+
import type { BackgroundState, BackgroundStatus } from "../src/background-state.js";
|
|
246
|
+
import { TERMINAL_STATUSES as TERMINAL } from "../src/background-state.js";
|
|
247
|
+
|
|
248
|
+
/** Format duration as the checker does internally. */
|
|
249
|
+
function formatDuration(ms: number): string {
|
|
250
|
+
const safeMs = Math.max(0, Math.floor(ms));
|
|
251
|
+
const minutes = Math.floor(safeMs / 60_000);
|
|
252
|
+
const seconds = Math.floor((safeMs % 60_000) / 1000);
|
|
253
|
+
return minutes > 0 ? `${minutes}m ${seconds}s` : `${seconds}s`;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/** Minimal Logger for tests */
|
|
257
|
+
const noopLogger = {
|
|
258
|
+
log(_opts: { level: "debug" | "info" | "warn" | "error"; message: string }) {},
|
|
259
|
+
debug(_m: string) {},
|
|
260
|
+
info(_m: string) {},
|
|
261
|
+
warn(_m: string) {},
|
|
262
|
+
error(_m: string) {},
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* FakeInstanceManager — mirrors the v0.3.0 InstanceManager surface with
|
|
267
|
+
* full stall and thinking-loop detection logic. Uses in-memory state only
|
|
268
|
+
* (no HTTP, no EventStream, no serve child). Exposes sentPrompts and
|
|
269
|
+
* abortedSessions for test assertions.
|
|
270
|
+
*/
|
|
271
|
+
class FakeInstanceManagerForStall {
|
|
272
|
+
/** Exposed for test assertions */
|
|
273
|
+
sentPrompts: Array<{ sessionId: string; text: string }> = [];
|
|
274
|
+
abortedSessions: string[] = [];
|
|
275
|
+
|
|
276
|
+
private instances = new Map<string, BackgroundState>();
|
|
277
|
+
private stallTimeoutMs: number;
|
|
278
|
+
private thinkingLoopTimeoutMs: number;
|
|
279
|
+
private maxInterventions: number;
|
|
280
|
+
private stallCheckerDisabled = false;
|
|
281
|
+
|
|
282
|
+
constructor(opts: {
|
|
283
|
+
stallTimeoutMs?: number;
|
|
284
|
+
thinkingLoopTimeoutMs?: number;
|
|
285
|
+
maxInterventions?: number;
|
|
286
|
+
} = {}) {
|
|
287
|
+
this.stallTimeoutMs = opts.stallTimeoutMs ?? 180_000;
|
|
288
|
+
this.thinkingLoopTimeoutMs = opts.thinkingLoopTimeoutMs ?? 300_000;
|
|
289
|
+
this.maxInterventions = opts.maxInterventions ?? 1;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
get stallTimeoutMsValue(): number {
|
|
293
|
+
return this.stallTimeoutMs;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
get thinkingLoopTimeoutMsValue(): number {
|
|
297
|
+
return this.thinkingLoopTimeoutMs;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
get maxInterventionsValue(): number {
|
|
301
|
+
return this.maxInterventions;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
disablePeriodicChecks(): void {
|
|
305
|
+
this.stallCheckerDisabled = true;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// ---------------------------------------------------------------------------
|
|
309
|
+
// Fake public API used by tests to set up instance state
|
|
310
|
+
// ---------------------------------------------------------------------------
|
|
311
|
+
|
|
312
|
+
/** Add a fake instance directly to the in-memory map */
|
|
313
|
+
addInstance(state: BackgroundState): void {
|
|
314
|
+
this.instances.set(state.instanceId, { ...state });
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
getInstance(instanceId: string): BackgroundState | undefined {
|
|
318
|
+
return this.instances.get(instanceId);
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// ---------------------------------------------------------------------------
|
|
322
|
+
// runStallAndLoopChecks — exact replica of InstanceManager.runStallAndLoopChecks
|
|
323
|
+
// ---------------------------------------------------------------------------
|
|
324
|
+
|
|
325
|
+
async runStallAndLoopChecks(): Promise<void> {
|
|
326
|
+
if (this.stallCheckerDisabled) return;
|
|
327
|
+
|
|
328
|
+
const ids: string[] = [];
|
|
329
|
+
for (const inst of this.instances.values()) {
|
|
330
|
+
if (TERMINAL.has(inst.status)) continue;
|
|
331
|
+
ids.push(inst.instanceId);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
for (const id of ids) {
|
|
335
|
+
const inst = this.instances.get(id);
|
|
336
|
+
if (!inst || TERMINAL.has(inst.status)) continue;
|
|
337
|
+
|
|
338
|
+
const now = Date.now();
|
|
339
|
+
const lastEventAt = inst.lastEventAt ?? 0;
|
|
340
|
+
const lastToolOrTextAt = inst.lastToolOrTextAt ?? 0;
|
|
341
|
+
|
|
342
|
+
// Stall check
|
|
343
|
+
if (now - lastEventAt > this.stallTimeoutMs) {
|
|
344
|
+
await this._abortAsStalled(inst);
|
|
345
|
+
continue;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// Thinking-loop check — only for running instances
|
|
349
|
+
if (inst.status === "running") {
|
|
350
|
+
const since = now - lastToolOrTextAt;
|
|
351
|
+
if (since > this.thinkingLoopTimeoutMs) {
|
|
352
|
+
const currentCount = inst.interventionCount ?? 0;
|
|
353
|
+
if (currentCount < this.maxInterventions) {
|
|
354
|
+
await this._sendIntervention(inst, since);
|
|
355
|
+
} else {
|
|
356
|
+
await this._abortAsThinkingLoop(inst, since);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// ---------------------------------------------------------------------------
|
|
364
|
+
// simulatePartUpdated — replicates InstanceManager.onPartUpdated
|
|
365
|
+
// ---------------------------------------------------------------------------
|
|
366
|
+
|
|
367
|
+
simulatePartUpdated(
|
|
368
|
+
instanceId: string,
|
|
369
|
+
partType: "tool" | "text" | "thinking",
|
|
370
|
+
): void {
|
|
371
|
+
const inst = this.instances.get(instanceId);
|
|
372
|
+
if (!inst) return;
|
|
373
|
+
|
|
374
|
+
// Every event advances lastEventAt (heartbeat)
|
|
375
|
+
inst.lastEventAt = Date.now();
|
|
376
|
+
|
|
377
|
+
if (partType === "tool" || partType === "text") {
|
|
378
|
+
inst.lastToolOrTextAt = Date.now();
|
|
379
|
+
// Reset intervention counter after progress
|
|
380
|
+
if ((inst.interventionCount ?? 0) > 0) {
|
|
381
|
+
inst.interventionCount = 0;
|
|
382
|
+
delete inst.interventionAt;
|
|
383
|
+
delete inst.interventionReason;
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
// 'thinking' parts do NOT advance lastToolOrTextAt — that is intentional
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// ---------------------------------------------------------------------------
|
|
390
|
+
// Private helpers — exact replicas of InstanceManager private methods
|
|
391
|
+
// ---------------------------------------------------------------------------
|
|
392
|
+
|
|
393
|
+
private async _abortAsStalled(inst: BackgroundState): Promise<void> {
|
|
394
|
+
const lastEventAt = inst.lastEventAt ?? 0;
|
|
395
|
+
const sinceMs = Date.now() - lastEventAt;
|
|
396
|
+
noopLogger.warn(
|
|
397
|
+
`bizar: instance ${inst.instanceId} stalled (no event for ${sinceMs}ms); aborting`,
|
|
398
|
+
);
|
|
399
|
+
this.abortedSessions.push(inst.sessionId);
|
|
400
|
+
inst.status = "failed";
|
|
401
|
+
inst.error = `No activity for ${this.stallTimeoutMs}ms — LLM appears stalled`;
|
|
402
|
+
inst.completedAt = Date.now();
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
private async _sendIntervention(inst: BackgroundState, sinceMs: number): Promise<void> {
|
|
406
|
+
const prompt = researchInterventionPrompt(sinceMs);
|
|
407
|
+
const currentCount = inst.interventionCount ?? 0;
|
|
408
|
+
noopLogger.warn(
|
|
409
|
+
`bizar: instance ${inst.instanceId} thinking loop (${sinceMs}ms without tool/text); sending intervention #${currentCount + 1}/${this.maxInterventions}`,
|
|
410
|
+
);
|
|
411
|
+
this.sentPrompts.push({ sessionId: inst.sessionId, text: prompt });
|
|
412
|
+
const reason = `thinking loop (${formatDuration(sinceMs)} without tool/text)`;
|
|
413
|
+
inst.interventionCount = currentCount + 1;
|
|
414
|
+
inst.interventionAt = Date.now();
|
|
415
|
+
inst.interventionReason = reason;
|
|
416
|
+
// Bumping lastEventAt here is intentional (mirrors real impl)
|
|
417
|
+
inst.lastEventAt = Date.now();
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
private async _abortAsThinkingLoop(inst: BackgroundState, sinceMs: number): Promise<void> {
|
|
421
|
+
noopLogger.warn(
|
|
422
|
+
`bizar: instance ${inst.instanceId} thinking loop exhausted ${this.maxInterventions} intervention(s) over ${sinceMs}ms; aborting`,
|
|
423
|
+
);
|
|
424
|
+
this.abortedSessions.push(inst.sessionId);
|
|
425
|
+
inst.status = "failed";
|
|
426
|
+
inst.error = `Thinking loop detected: ${formatDuration(sinceMs)} of thinking without tool calls or output. Spawn a Mimir agent for research.`;
|
|
427
|
+
inst.completedAt = Date.now();
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
function makeBgState(overrides: Partial<BackgroundState> = {}): BackgroundState {
|
|
432
|
+
const now = Date.now();
|
|
433
|
+
return {
|
|
434
|
+
instanceId: `bgr_${Math.random().toString(36).slice(2, 10)}`,
|
|
435
|
+
sessionId: `sess_${Math.random().toString(36).slice(2, 10)}`,
|
|
436
|
+
agent: "mimir",
|
|
437
|
+
status: "running",
|
|
438
|
+
startedAt: now,
|
|
439
|
+
model: "minimax/MiniMax-M3",
|
|
440
|
+
promptPreview: "Do the thing",
|
|
441
|
+
resultPreview: undefined,
|
|
442
|
+
resultMessageIds: [],
|
|
443
|
+
error: undefined,
|
|
444
|
+
parentAgent: "odin",
|
|
445
|
+
parentInstanceId: undefined,
|
|
446
|
+
logPath: "~/.cache/bizar/logs/test.log",
|
|
447
|
+
timeoutMs: 300_000,
|
|
448
|
+
toolCallCount: 0,
|
|
449
|
+
loopGuardTool: undefined,
|
|
450
|
+
lastEventAt: now,
|
|
451
|
+
lastToolOrTextAt: now,
|
|
452
|
+
interventionCount: 0,
|
|
453
|
+
...overrides,
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
describe("Stall + thinking-loop detection logic", () => {
|
|
458
|
+
// Test 1: Stall timeout fires when no events
|
|
459
|
+
it("stall timeout fires when no events (lastEventAt is old)", async () => {
|
|
460
|
+
const mgr = new FakeInstanceManagerForStall({
|
|
461
|
+
stallTimeoutMs: 180_000,
|
|
462
|
+
thinkingLoopTimeoutMs: 300_000,
|
|
463
|
+
maxInterventions: 1,
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
const oldTime = Date.now() - (180_000 + 1000); // past the stall threshold
|
|
467
|
+
mgr.addInstance(
|
|
468
|
+
makeBgState({
|
|
469
|
+
instanceId: "bgr_stall_fire",
|
|
470
|
+
status: "running",
|
|
471
|
+
lastEventAt: oldTime,
|
|
472
|
+
lastToolOrTextAt: oldTime,
|
|
473
|
+
}),
|
|
474
|
+
);
|
|
475
|
+
|
|
476
|
+
await mgr.runStallAndLoopChecks();
|
|
477
|
+
|
|
478
|
+
expect(mgr.abortedSessions).toContain(
|
|
479
|
+
mgr.getInstance("bgr_stall_fire")!.sessionId,
|
|
480
|
+
);
|
|
481
|
+
const inst = mgr.getInstance("bgr_stall_fire")!;
|
|
482
|
+
expect(inst.status).toBe("failed");
|
|
483
|
+
expect(inst.error!).toContain("No activity for");
|
|
484
|
+
});
|
|
485
|
+
|
|
486
|
+
// Test 2: Stall timeout does NOT fire when events are recent
|
|
487
|
+
it("stall timeout does NOT fire when events are recent", async () => {
|
|
488
|
+
const mgr = new FakeInstanceManagerForStall({ stallTimeoutMs: 180_000 });
|
|
489
|
+
|
|
490
|
+
const recentTime = Date.now();
|
|
491
|
+
mgr.addInstance(
|
|
492
|
+
makeBgState({
|
|
493
|
+
instanceId: "bgr_stall_recent",
|
|
494
|
+
status: "running",
|
|
495
|
+
lastEventAt: recentTime,
|
|
496
|
+
lastToolOrTextAt: recentTime,
|
|
497
|
+
}),
|
|
498
|
+
);
|
|
499
|
+
|
|
500
|
+
await mgr.runStallAndLoopChecks();
|
|
501
|
+
|
|
502
|
+
expect(mgr.abortedSessions).toHaveLength(0);
|
|
503
|
+
const inst = mgr.getInstance("bgr_stall_recent")!;
|
|
504
|
+
expect(inst.status).toBe("running");
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
// Test 3: Stall timeout does NOT fire for terminal instances
|
|
508
|
+
it("stall timeout does NOT fire for terminal instances", async () => {
|
|
509
|
+
const mgr = new FakeInstanceManagerForStall({ stallTimeoutMs: 180_000 });
|
|
510
|
+
|
|
511
|
+
const oldTime = Date.now() - (180_000 + 1000);
|
|
512
|
+
mgr.addInstance(
|
|
513
|
+
makeBgState({
|
|
514
|
+
instanceId: "bgr_stall_terminal",
|
|
515
|
+
status: "done",
|
|
516
|
+
lastEventAt: oldTime, // would fire if not terminal
|
|
517
|
+
lastToolOrTextAt: oldTime,
|
|
518
|
+
}),
|
|
519
|
+
);
|
|
520
|
+
|
|
521
|
+
await mgr.runStallAndLoopChecks();
|
|
522
|
+
|
|
523
|
+
expect(mgr.abortedSessions).toHaveLength(0);
|
|
524
|
+
const inst = mgr.getInstance("bgr_stall_terminal")!;
|
|
525
|
+
expect(inst.status).toBe("done");
|
|
526
|
+
});
|
|
527
|
+
|
|
528
|
+
// Test 4: Thinking loop detection fires after threshold
|
|
529
|
+
it("thinking loop detection fires after threshold, sends intervention, increments counter", async () => {
|
|
530
|
+
const mgr = new FakeInstanceManagerForStall({
|
|
531
|
+
stallTimeoutMs: 180_000,
|
|
532
|
+
thinkingLoopTimeoutMs: 300_000,
|
|
533
|
+
maxInterventions: 1,
|
|
534
|
+
});
|
|
535
|
+
|
|
536
|
+
// lastEventAt must be within stall timeout so we reach the thinking-loop
|
|
537
|
+
// check. lastToolOrTextAt must be beyond the thinking-loop threshold.
|
|
538
|
+
const now = Date.now();
|
|
539
|
+
const lastEventAt = now - 60_000; // 60s ago — within 180s stall timeout
|
|
540
|
+
const lastToolOrTextAt = now - 301_000; // 301s ago — beyond 300s loop threshold
|
|
541
|
+
|
|
542
|
+
mgr.addInstance(
|
|
543
|
+
makeBgState({
|
|
544
|
+
instanceId: "bgr_thinking_loop",
|
|
545
|
+
status: "running",
|
|
546
|
+
lastEventAt,
|
|
547
|
+
lastToolOrTextAt,
|
|
548
|
+
interventionCount: 0,
|
|
549
|
+
}),
|
|
550
|
+
);
|
|
551
|
+
|
|
552
|
+
await mgr.runStallAndLoopChecks();
|
|
553
|
+
|
|
554
|
+
expect(mgr.sentPrompts).toHaveLength(1);
|
|
555
|
+
expect(mgr.sentPrompts[0]!.text).toContain("[SYSTEM REMINDER — Thinking Loop Detected]");
|
|
556
|
+
|
|
557
|
+
const inst = mgr.getInstance("bgr_thinking_loop")!;
|
|
558
|
+
expect(inst.interventionCount).toBe(1);
|
|
559
|
+
expect(inst.interventionAt).toBeDefined();
|
|
560
|
+
expect(inst.interventionReason).toContain("thinking loop");
|
|
561
|
+
expect(inst.status).toBe("running"); // still running — not failed yet
|
|
562
|
+
});
|
|
563
|
+
|
|
564
|
+
// Test 5: Thinking loop intervention respects maxInterventions
|
|
565
|
+
it("when interventionCount >= maxInterventions, instance is aborted as thinking loop", async () => {
|
|
566
|
+
const mgr = new FakeInstanceManagerForStall({
|
|
567
|
+
stallTimeoutMs: 180_000,
|
|
568
|
+
thinkingLoopTimeoutMs: 300_000,
|
|
569
|
+
maxInterventions: 1,
|
|
570
|
+
});
|
|
571
|
+
|
|
572
|
+
// lastEventAt recent (within stall timeout) so we reach the thinking-loop check
|
|
573
|
+
const now = Date.now();
|
|
574
|
+
const lastEventAt = now - 60_000; // 60s ago — within 180s stall timeout
|
|
575
|
+
const lastToolOrTextAt = now - 301_000; // 301s ago — beyond 300s loop threshold
|
|
576
|
+
|
|
577
|
+
mgr.addInstance(
|
|
578
|
+
makeBgState({
|
|
579
|
+
instanceId: "bgr_max_interventions",
|
|
580
|
+
status: "running",
|
|
581
|
+
lastEventAt,
|
|
582
|
+
lastToolOrTextAt,
|
|
583
|
+
// Already at max interventions — should abort immediately without sending prompt
|
|
584
|
+
interventionCount: 1,
|
|
585
|
+
}),
|
|
586
|
+
);
|
|
587
|
+
|
|
588
|
+
await mgr.runStallAndLoopChecks();
|
|
589
|
+
|
|
590
|
+
expect(mgr.sentPrompts).toHaveLength(0); // No new prompt sent
|
|
591
|
+
expect(mgr.abortedSessions).toContain(
|
|
592
|
+
mgr.getInstance("bgr_max_interventions")!.sessionId,
|
|
593
|
+
);
|
|
594
|
+
const inst = mgr.getInstance("bgr_max_interventions")!;
|
|
595
|
+
expect(inst.status).toBe("failed");
|
|
596
|
+
expect(inst.error!).toMatch(/thinking loop/i); // case-insensitive
|
|
597
|
+
});
|
|
598
|
+
|
|
599
|
+
// Test 6: Tool/text events reset intervention counter
|
|
600
|
+
it("a tool or text part after intervention resets interventionCount to 0", async () => {
|
|
601
|
+
const mgr = new FakeInstanceManagerForStall({
|
|
602
|
+
stallTimeoutMs: 180_000,
|
|
603
|
+
thinkingLoopTimeoutMs: 300_000,
|
|
604
|
+
maxInterventions: 1,
|
|
605
|
+
});
|
|
606
|
+
|
|
607
|
+
const oldTime = Date.now() - (300_000 + 1000);
|
|
608
|
+
mgr.addInstance(
|
|
609
|
+
makeBgState({
|
|
610
|
+
instanceId: "bgr_reset_counter",
|
|
611
|
+
status: "running",
|
|
612
|
+
lastEventAt: oldTime,
|
|
613
|
+
lastToolOrTextAt: oldTime,
|
|
614
|
+
interventionCount: 1,
|
|
615
|
+
interventionAt: oldTime,
|
|
616
|
+
interventionReason: "thinking loop (1m 0s without tool/text)",
|
|
617
|
+
}),
|
|
618
|
+
);
|
|
619
|
+
|
|
620
|
+
// Simulate a text part arriving (progress signal)
|
|
621
|
+
mgr.simulatePartUpdated("bgr_reset_counter", "text");
|
|
622
|
+
|
|
623
|
+
const inst = mgr.getInstance("bgr_reset_counter")!;
|
|
624
|
+
expect(inst.interventionCount).toBe(0);
|
|
625
|
+
expect(inst.interventionAt).toBeUndefined();
|
|
626
|
+
expect(inst.interventionReason).toBeUndefined();
|
|
627
|
+
expect(inst.lastToolOrTextAt).toBeGreaterThanOrEqual(oldTime);
|
|
628
|
+
});
|
|
629
|
+
|
|
630
|
+
// Test 7: Thinking-only events do NOT reset intervention counter
|
|
631
|
+
it("a thinking part does NOT update lastToolOrTextAt or reset interventionCount", async () => {
|
|
632
|
+
const mgr = new FakeInstanceManagerForStall({
|
|
633
|
+
stallTimeoutMs: 180_000,
|
|
634
|
+
thinkingLoopTimeoutMs: 300_000,
|
|
635
|
+
maxInterventions: 1,
|
|
636
|
+
});
|
|
637
|
+
|
|
638
|
+
const baseTime = Date.now() - 60_000;
|
|
639
|
+
mgr.addInstance(
|
|
640
|
+
makeBgState({
|
|
641
|
+
instanceId: "bgr_thinking_only",
|
|
642
|
+
status: "running",
|
|
643
|
+
lastEventAt: baseTime,
|
|
644
|
+
lastToolOrTextAt: baseTime, // set to 60s ago
|
|
645
|
+
interventionCount: 0,
|
|
646
|
+
}),
|
|
647
|
+
);
|
|
648
|
+
|
|
649
|
+
// Simulate a thinking part arriving
|
|
650
|
+
mgr.simulatePartUpdated("bgr_thinking_only", "thinking");
|
|
651
|
+
|
|
652
|
+
const inst = mgr.getInstance("bgr_thinking_only")!;
|
|
653
|
+
// lastToolOrTextAt should NOT have been updated by the thinking part
|
|
654
|
+
expect(inst.lastToolOrTextAt).toBe(baseTime);
|
|
655
|
+
// interventionCount stays at 0 (was already 0 in this case)
|
|
656
|
+
expect(inst.interventionCount).toBe(0);
|
|
657
|
+
});
|
|
658
|
+
});
|
|
659
|
+
|
|
660
|
+
// ---------------------------------------------------------------------------
|
|
661
|
+
// Group 5 — bg-status toView includes v0.3.0 fields
|
|
662
|
+
// ---------------------------------------------------------------------------
|
|
663
|
+
|
|
664
|
+
import type { InstanceView } from "../src/background.js";
|
|
665
|
+
|
|
666
|
+
function toViewForTest(inst: import("../src/background-state.js").BackgroundState): InstanceView {
|
|
667
|
+
const v: InstanceView = {
|
|
668
|
+
instanceId: inst.instanceId,
|
|
669
|
+
agent: inst.agent,
|
|
670
|
+
status: inst.status,
|
|
671
|
+
startedAt: inst.startedAt,
|
|
672
|
+
toolCallCount: inst.toolCallCount,
|
|
673
|
+
promptPreview: inst.promptPreview,
|
|
674
|
+
parentAgent: inst.parentAgent,
|
|
675
|
+
sessionId: inst.sessionId,
|
|
676
|
+
lastEventAt: inst.lastEventAt,
|
|
677
|
+
};
|
|
678
|
+
if (inst.completedAt !== undefined) v.completedAt = inst.completedAt;
|
|
679
|
+
if (inst.resultPreview !== undefined) v.resultPreview = inst.resultPreview;
|
|
680
|
+
if (inst.error !== undefined) v.error = inst.error;
|
|
681
|
+
if (inst.parentInstanceId !== undefined) v.parentInstanceId = inst.parentInstanceId;
|
|
682
|
+
// v0.3.0: only surface intervention metadata when at least one
|
|
683
|
+
// intervention has actually been sent.
|
|
684
|
+
const interventionCount = inst.interventionCount ?? 0;
|
|
685
|
+
if (interventionCount > 0) {
|
|
686
|
+
v.interventionCount = interventionCount;
|
|
687
|
+
if (inst.interventionAt !== undefined) v.interventionAt = inst.interventionAt;
|
|
688
|
+
if (inst.interventionReason !== undefined) v.interventionReason = inst.interventionReason;
|
|
689
|
+
}
|
|
690
|
+
return v;
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
describe("bg-status toView — v0.3.0 fields", () => {
|
|
694
|
+
it("toView includes lastEventAt, interventionCount, interventionAt, interventionReason after intervention", () => {
|
|
695
|
+
const now = Date.now();
|
|
696
|
+
const inst: import("../src/background-state.js").BackgroundState = {
|
|
697
|
+
instanceId: "bgr_view_test",
|
|
698
|
+
sessionId: "sess_view",
|
|
699
|
+
agent: "mimir",
|
|
700
|
+
status: "running",
|
|
701
|
+
startedAt: now - 600_000,
|
|
702
|
+
model: "minimax/MiniMax-M3",
|
|
703
|
+
promptPreview: "Research X",
|
|
704
|
+
toolCallCount: 0,
|
|
705
|
+
parentAgent: "odin",
|
|
706
|
+
logPath: "/tmp/test.log",
|
|
707
|
+
timeoutMs: 300_000,
|
|
708
|
+
lastEventAt: now - 60_000,
|
|
709
|
+
lastToolOrTextAt: now - 60_000,
|
|
710
|
+
interventionCount: 2,
|
|
711
|
+
interventionAt: now - 60_000,
|
|
712
|
+
interventionReason: "thinking loop (5m 0s without tool/text)",
|
|
713
|
+
};
|
|
714
|
+
|
|
715
|
+
const view = toViewForTest(inst);
|
|
716
|
+
|
|
717
|
+
expect(view.lastEventAt).toBe(now - 60_000);
|
|
718
|
+
expect(view.interventionCount).toBe(2);
|
|
719
|
+
expect(view.interventionAt).toBe(now - 60_000);
|
|
720
|
+
expect(view.interventionReason).toBe("thinking loop (5m 0s without tool/text)");
|
|
721
|
+
});
|
|
722
|
+
|
|
723
|
+
it("toView omits intervention fields (not null) when interventionCount is 0 or undefined", () => {
|
|
724
|
+
const now = Date.now();
|
|
725
|
+
const inst: import("../src/background-state.js").BackgroundState = {
|
|
726
|
+
instanceId: "bgr_view_no_intervention",
|
|
727
|
+
sessionId: "sess_view2",
|
|
728
|
+
agent: "mimir",
|
|
729
|
+
status: "running",
|
|
730
|
+
startedAt: now,
|
|
731
|
+
model: "minimax/MiniMax-M3",
|
|
732
|
+
promptPreview: "Research Y",
|
|
733
|
+
toolCallCount: 0,
|
|
734
|
+
parentAgent: "odin",
|
|
735
|
+
logPath: "/tmp/test.log",
|
|
736
|
+
timeoutMs: 300_000,
|
|
737
|
+
// v0.3.0 fields are absent (no intervention yet)
|
|
738
|
+
};
|
|
739
|
+
|
|
740
|
+
const view = toViewForTest(inst);
|
|
741
|
+
|
|
742
|
+
// lastEventAt should still be present (always surfaced)
|
|
743
|
+
expect(view.lastEventAt).toBeUndefined(); // not set in this instance
|
|
744
|
+
// Intervention fields should be absent — not null, not 0, not present
|
|
745
|
+
expect("interventionCount" in view).toBe(false);
|
|
746
|
+
expect("interventionAt" in view).toBe(false);
|
|
747
|
+
expect("interventionReason" in view).toBe(false);
|
|
748
|
+
});
|
|
749
|
+
});
|