@polderlabs/bizar-plugin 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +448 -0
  3. package/bun.lock +88 -0
  4. package/index.ts +1113 -0
  5. package/package.json +42 -0
  6. package/scripts/check-forbidden-imports.sh +33 -0
  7. package/src/background-state.ts +463 -0
  8. package/src/background.ts +964 -0
  9. package/src/commands-impl.ts +369 -0
  10. package/src/commands.ts +880 -0
  11. package/src/event-stream.ts +574 -0
  12. package/src/fingerprint.ts +120 -0
  13. package/src/handoff.ts +79 -0
  14. package/src/http-client.ts +467 -0
  15. package/src/logger.ts +144 -0
  16. package/src/loop.ts +176 -0
  17. package/src/options.ts +421 -0
  18. package/src/plan-fs.ts +323 -0
  19. package/src/report.ts +178 -0
  20. package/src/research-prompt.ts +35 -0
  21. package/src/serve.ts +476 -0
  22. package/src/settings.ts +349 -0
  23. package/src/state.ts +298 -0
  24. package/src/tools/bg-collect.ts +104 -0
  25. package/src/tools/bg-get-comments.ts +239 -0
  26. package/src/tools/bg-kill.ts +87 -0
  27. package/src/tools/bg-spawn.ts +263 -0
  28. package/src/tools/bg-status.ts +99 -0
  29. package/src/tools/plan-action.ts +767 -0
  30. package/src/tools/wait-for-feedback.ts +402 -0
  31. package/tests/attach-handler-bug.test.ts +166 -0
  32. package/tests/background-state.test.ts +277 -0
  33. package/tests/background.test.ts +402 -0
  34. package/tests/block.test.ts +193 -0
  35. package/tests/canonical-key-order.test.ts +71 -0
  36. package/tests/commands-impl.test.ts +442 -0
  37. package/tests/commands.test.ts +548 -0
  38. package/tests/config.test.ts +122 -0
  39. package/tests/dispose.test.ts +336 -0
  40. package/tests/event-stream.test.ts +409 -0
  41. package/tests/event.test.ts +262 -0
  42. package/tests/fingerprint.test.ts +161 -0
  43. package/tests/http-client.test.ts +403 -0
  44. package/tests/init-helpers.test.ts +203 -0
  45. package/tests/integration/slash-command.test.ts +348 -0
  46. package/tests/integration/tool-routing.test.ts +314 -0
  47. package/tests/loop.test.ts +397 -0
  48. package/tests/options.test.ts +274 -0
  49. package/tests/serve.test.ts +335 -0
  50. package/tests/settings.test.ts +351 -0
  51. package/tests/stall-think.test.ts +749 -0
  52. package/tests/state.test.ts +275 -0
  53. package/tests/tools/bg-collect.test.ts +337 -0
  54. package/tests/tools/bg-get-comments.test.ts +485 -0
  55. package/tests/tools/bg-kill.test.ts +231 -0
  56. package/tests/tools/bg-spawn.test.ts +311 -0
  57. package/tests/tools/bg-status.test.ts +216 -0
  58. package/tests/tools/plan-action.test.ts +599 -0
  59. package/tests/tools/wait-for-feedback.test.ts +390 -0
  60. package/tsconfig.json +29 -0
@@ -0,0 +1,749 @@
1
+ /**
2
+ * stall-think.test.ts
3
+ *
4
+ * v0.3.0 stall timeout + thinking-loop protection tests.
5
+ *
6
+ * Groups:
7
+ * 1. researchInterventionPrompt() — 5 tests
8
+ * 2. normalizeOptions() v0.3.0 fields — 6 tests
9
+ * 3. BackgroundState schema backfill — 3 tests
10
+ * 4. Stall + thinking-loop detection logic — 7 tests
11
+ * 5. bg-status toView v0.3.0 fields — 2 tests
12
+ *
13
+ * Total: 23 tests
14
+ */
15
+
16
+ import { describe, it, expect, beforeEach, afterEach } from "bun:test";
17
+ import { writeFileSync, mkdirSync, unlinkSync, rmSync } from "node:fs";
18
+ import path from "node:path";
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Group 1 — researchInterventionPrompt
22
+ // ---------------------------------------------------------------------------
23
+
24
+ import { researchInterventionPrompt } from "../src/research-prompt.js";
25
+
26
+ describe("researchInterventionPrompt", () => {
27
+ it("returns a string containing '[SYSTEM REMINDER — Thinking Loop Detected]'", () => {
28
+ const result = researchInterventionPrompt(60_000);
29
+ expect(result).toContain("[SYSTEM REMINDER — Thinking Loop Detected]");
30
+ });
31
+
32
+ it("includes the duration formatted as 'Xm Ys' for multi-minute durations", () => {
33
+ const result = researchInterventionPrompt(330_000); // 5m 30s
34
+ expect(result).toContain("5m 30s");
35
+ });
36
+
37
+ it("formats under-a-minute durations as 'Ys' (not '0m Ys')", () => {
38
+ const result = researchInterventionPrompt(45_000); // 45s
39
+ expect(result).toContain("45s");
40
+ // Must NOT contain "0m" in the duration portion
41
+ expect(result).not.toMatch(/0m \d+s/);
42
+ });
43
+
44
+ it("clamps negative or zero duration to 0s", () => {
45
+ const neg = researchInterventionPrompt(-99_000);
46
+ const zero = researchInterventionPrompt(0);
47
+ // Both should produce "0s" (Math.max(0, …) in the function)
48
+ expect(neg).toContain("0s");
49
+ expect(zero).toContain("0s");
50
+ });
51
+
52
+ it("mentions 'task tool', 'Mimir', and 'bash' — the three action options", () => {
53
+ const result = researchInterventionPrompt(60_000);
54
+ expect(result).toContain("task tool");
55
+ expect(result).toContain("Mimir");
56
+ expect(result).toContain("bash");
57
+ });
58
+ });
59
+
60
+ // ---------------------------------------------------------------------------
61
+ // Group 2 — normalizeOptions v0.3.0 fields
62
+ // ---------------------------------------------------------------------------
63
+
64
+ import { normalizeOptions } from "../src/options.js";
65
+
66
+ describe("normalizeOptions — v0.3.0 fields", () => {
67
+ it("defaults: backgroundStallTimeoutMs === 180_000, backgroundThinkingLoopTimeoutMs === 300_000, backgroundMaxInterventions === 1", () => {
68
+ const { options } = normalizeOptions(undefined);
69
+ expect(options.backgroundStallTimeoutMs).toBe(180_000);
70
+ expect(options.backgroundThinkingLoopTimeoutMs).toBe(300_000);
71
+ expect(options.backgroundMaxInterventions).toBe(1);
72
+ });
73
+
74
+ it("clamps backgroundStallTimeoutMs < 10000 to 10000 and pushes a note", () => {
75
+ const { options, notes } = normalizeOptions({ backgroundStallTimeoutMs: 999 });
76
+ expect(options.backgroundStallTimeoutMs).toBe(10_000);
77
+ expect(notes.some((n) => n.includes("backgroundStallTimeoutMs") && n.includes("clamped"))).toBe(true);
78
+ });
79
+
80
+ it("clamps backgroundStallTimeoutMs > 600000 to 600000 and pushes a note", () => {
81
+ const { options, notes } = normalizeOptions({ backgroundStallTimeoutMs: 999_999 });
82
+ expect(options.backgroundStallTimeoutMs).toBe(600_000);
83
+ expect(notes.some((n) => n.includes("backgroundStallTimeoutMs") && n.includes("clamped"))).toBe(true);
84
+ });
85
+
86
+ it("clamps backgroundMaxInterventions < 1 to 1", () => {
87
+ const { options, notes } = normalizeOptions({ backgroundMaxInterventions: 0 });
88
+ expect(options.backgroundMaxInterventions).toBe(1);
89
+ expect(notes.some((n) => n.includes("backgroundMaxInterventions") && n.includes("clamped"))).toBe(true);
90
+ });
91
+
92
+ it("clamps backgroundMaxInterventions > 3 to 3", () => {
93
+ const { options, notes } = normalizeOptions({ backgroundMaxInterventions: 99 });
94
+ expect(options.backgroundMaxInterventions).toBe(3);
95
+ expect(notes.some((n) => n.includes("backgroundMaxInterventions") && n.includes("clamped"))).toBe(true);
96
+ });
97
+
98
+ it("honors BIZAR_STALL_TIMEOUT_MS env var when no option is set", async () => {
99
+ // Set the env var before importing/evaluating normalizeOptions.
100
+ // Static imports are hoisted and evaluated before test code runs,
101
+ // so we must use dynamic import AFTER setting the env var.
102
+ process.env.BIZAR_STALL_TIMEOUT_MS = "42000";
103
+ try {
104
+ // Dynamically import AFTER the env var is set so the module's
105
+ // function closure picks up the updated process.env at evaluation time.
106
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
107
+ const mod = await import("../src/options.js") as any;
108
+ const { options } = mod.normalizeOptions(undefined);
109
+ expect(options.backgroundStallTimeoutMs).toBe(42000);
110
+ } finally {
111
+ delete process.env.BIZAR_STALL_TIMEOUT_MS;
112
+ }
113
+ });
114
+ });
115
+
116
+ // ---------------------------------------------------------------------------
117
+ // Group 3 — BackgroundState schema backfill
118
+ // ---------------------------------------------------------------------------
119
+
120
+ import { BackgroundStateStore } from "../src/background-state.js";
121
+ import { TERMINAL_STATUSES } from "../src/background-state.js";
122
+
123
+ // Minimal Logger for tests
124
+ const silentLogger = {
125
+ log(_opts: { level: "debug" | "info" | "warn" | "error"; message: string }) {},
126
+ debug(_m: string) {},
127
+ info(_m: string) {},
128
+ warn(_m: string) {},
129
+ error(_m: string) {},
130
+ };
131
+
132
+ function makeTempDir(prefix: string): string {
133
+ const dir = `/tmp/bizar-stall-test-${prefix}-${process.pid}`;
134
+ mkdirSync(dir, { recursive: true });
135
+ return dir;
136
+ }
137
+
138
+ function writeOldStateFile(dir: string, instanceId: string, state: Record<string, unknown>): void {
139
+ const bgDir = path.join(dir, "bg");
140
+ mkdirSync(bgDir, { recursive: true });
141
+ writeFileSync(path.join(bgDir, `${instanceId}.json`), JSON.stringify(state), "utf8");
142
+ }
143
+
144
+ describe("BackgroundState schema backfill", () => {
145
+ let tmpDir: string;
146
+
147
+ beforeEach(() => {
148
+ tmpDir = makeTempDir("bg-state-backfill");
149
+ });
150
+
151
+ afterEach(() => {
152
+ rmSync(tmpDir, { recursive: true, force: true });
153
+ });
154
+
155
+ it("loads an old-format state file (no lastEventAt) and backfills it to startedAt", async () => {
156
+ const instanceId = "bgr_old_no_lastEventAt";
157
+ const startedAt = 1_700_000_000_000;
158
+ writeOldStateFile(tmpDir, instanceId, {
159
+ instanceId,
160
+ sessionId: "sess_abc",
161
+ agent: "mimir",
162
+ status: "running",
163
+ startedAt,
164
+ model: "minimax/MiniMax-M3",
165
+ promptPreview: "Do the thing",
166
+ toolCallCount: 0,
167
+ parentAgent: "odin",
168
+ logPath: "/tmp/test.log",
169
+ timeoutMs: 300_000,
170
+ // lastEventAt is intentionally absent
171
+ lastToolOrTextAt: startedAt,
172
+ interventionCount: 0,
173
+ });
174
+
175
+ const store = new BackgroundStateStore(tmpDir, silentLogger);
176
+ const loaded = await store.load(instanceId);
177
+
178
+ expect(loaded).not.toBeNull();
179
+ expect(loaded!.lastEventAt).toBe(startedAt);
180
+ });
181
+
182
+ it("loads an old-format state file (no lastToolOrTextAt) and backfills it to startedAt", async () => {
183
+ const instanceId = "bgr_old_no_lastToolOrTextAt";
184
+ const startedAt = 1_700_000_000_000;
185
+ writeOldStateFile(tmpDir, instanceId, {
186
+ instanceId,
187
+ sessionId: "sess_def",
188
+ agent: "mimir",
189
+ status: "running",
190
+ startedAt,
191
+ model: "minimax/MiniMax-M3",
192
+ promptPreview: "Do the thing",
193
+ toolCallCount: 0,
194
+ parentAgent: "odin",
195
+ logPath: "/tmp/test.log",
196
+ timeoutMs: 300_000,
197
+ lastEventAt: startedAt,
198
+ // lastToolOrTextAt is intentionally absent
199
+ interventionCount: 0,
200
+ });
201
+
202
+ const store = new BackgroundStateStore(tmpDir, silentLogger);
203
+ const loaded = await store.load(instanceId);
204
+
205
+ expect(loaded).not.toBeNull();
206
+ expect(loaded!.lastToolOrTextAt).toBe(startedAt);
207
+ });
208
+
209
+ it("loads an old-format state file (no interventionCount) and backfills it to 0", async () => {
210
+ const instanceId = "bgr_old_no_interventionCount";
211
+ const startedAt = 1_700_000_000_000;
212
+ writeOldStateFile(tmpDir, instanceId, {
213
+ instanceId,
214
+ sessionId: "sess_ghi",
215
+ agent: "mimir",
216
+ status: "running",
217
+ startedAt,
218
+ model: "minimax/MiniMax-M3",
219
+ promptPreview: "Do the thing",
220
+ toolCallCount: 0,
221
+ parentAgent: "odin",
222
+ logPath: "/tmp/test.log",
223
+ timeoutMs: 300_000,
224
+ lastEventAt: startedAt,
225
+ lastToolOrTextAt: startedAt,
226
+ // interventionCount is intentionally absent
227
+ });
228
+
229
+ const store = new BackgroundStateStore(tmpDir, silentLogger);
230
+ const loaded = await store.load(instanceId);
231
+
232
+ expect(loaded).not.toBeNull();
233
+ expect(loaded!.interventionCount).toBe(0);
234
+ });
235
+ });
236
+
237
+ // ---------------------------------------------------------------------------
238
+ // Group 4 — Stall + thinking-loop detection logic
239
+ //
240
+ // FakeInstanceManager replicates the v0.3.0 stall and thinking-loop
241
+ // detection algorithm from the real InstanceManager. It mirrors the
242
+ // real class surface and implements the exact same logic.
243
+ // ---------------------------------------------------------------------------
244
+
245
+ import type { BackgroundState, BackgroundStatus } from "../src/background-state.js";
246
+ import { TERMINAL_STATUSES as TERMINAL } from "../src/background-state.js";
247
+
248
+ /** Format duration as the checker does internally. */
249
+ function formatDuration(ms: number): string {
250
+ const safeMs = Math.max(0, Math.floor(ms));
251
+ const minutes = Math.floor(safeMs / 60_000);
252
+ const seconds = Math.floor((safeMs % 60_000) / 1000);
253
+ return minutes > 0 ? `${minutes}m ${seconds}s` : `${seconds}s`;
254
+ }
255
+
256
+ /** Minimal Logger for tests */
257
+ const noopLogger = {
258
+ log(_opts: { level: "debug" | "info" | "warn" | "error"; message: string }) {},
259
+ debug(_m: string) {},
260
+ info(_m: string) {},
261
+ warn(_m: string) {},
262
+ error(_m: string) {},
263
+ };
264
+
265
+ /**
266
+ * FakeInstanceManager — mirrors the v0.3.0 InstanceManager surface with
267
+ * full stall and thinking-loop detection logic. Uses in-memory state only
268
+ * (no HTTP, no EventStream, no serve child). Exposes sentPrompts and
269
+ * abortedSessions for test assertions.
270
+ */
271
+ class FakeInstanceManagerForStall {
272
+ /** Exposed for test assertions */
273
+ sentPrompts: Array<{ sessionId: string; text: string }> = [];
274
+ abortedSessions: string[] = [];
275
+
276
+ private instances = new Map<string, BackgroundState>();
277
+ private stallTimeoutMs: number;
278
+ private thinkingLoopTimeoutMs: number;
279
+ private maxInterventions: number;
280
+ private stallCheckerDisabled = false;
281
+
282
+ constructor(opts: {
283
+ stallTimeoutMs?: number;
284
+ thinkingLoopTimeoutMs?: number;
285
+ maxInterventions?: number;
286
+ } = {}) {
287
+ this.stallTimeoutMs = opts.stallTimeoutMs ?? 180_000;
288
+ this.thinkingLoopTimeoutMs = opts.thinkingLoopTimeoutMs ?? 300_000;
289
+ this.maxInterventions = opts.maxInterventions ?? 1;
290
+ }
291
+
292
+ get stallTimeoutMsValue(): number {
293
+ return this.stallTimeoutMs;
294
+ }
295
+
296
+ get thinkingLoopTimeoutMsValue(): number {
297
+ return this.thinkingLoopTimeoutMs;
298
+ }
299
+
300
+ get maxInterventionsValue(): number {
301
+ return this.maxInterventions;
302
+ }
303
+
304
+ disablePeriodicChecks(): void {
305
+ this.stallCheckerDisabled = true;
306
+ }
307
+
308
+ // ---------------------------------------------------------------------------
309
+ // Fake public API used by tests to set up instance state
310
+ // ---------------------------------------------------------------------------
311
+
312
+ /** Add a fake instance directly to the in-memory map */
313
+ addInstance(state: BackgroundState): void {
314
+ this.instances.set(state.instanceId, { ...state });
315
+ }
316
+
317
+ getInstance(instanceId: string): BackgroundState | undefined {
318
+ return this.instances.get(instanceId);
319
+ }
320
+
321
+ // ---------------------------------------------------------------------------
322
+ // runStallAndLoopChecks — exact replica of InstanceManager.runStallAndLoopChecks
323
+ // ---------------------------------------------------------------------------
324
+
325
+ async runStallAndLoopChecks(): Promise<void> {
326
+ if (this.stallCheckerDisabled) return;
327
+
328
+ const ids: string[] = [];
329
+ for (const inst of this.instances.values()) {
330
+ if (TERMINAL.has(inst.status)) continue;
331
+ ids.push(inst.instanceId);
332
+ }
333
+
334
+ for (const id of ids) {
335
+ const inst = this.instances.get(id);
336
+ if (!inst || TERMINAL.has(inst.status)) continue;
337
+
338
+ const now = Date.now();
339
+ const lastEventAt = inst.lastEventAt ?? 0;
340
+ const lastToolOrTextAt = inst.lastToolOrTextAt ?? 0;
341
+
342
+ // Stall check
343
+ if (now - lastEventAt > this.stallTimeoutMs) {
344
+ await this._abortAsStalled(inst);
345
+ continue;
346
+ }
347
+
348
+ // Thinking-loop check — only for running instances
349
+ if (inst.status === "running") {
350
+ const since = now - lastToolOrTextAt;
351
+ if (since > this.thinkingLoopTimeoutMs) {
352
+ const currentCount = inst.interventionCount ?? 0;
353
+ if (currentCount < this.maxInterventions) {
354
+ await this._sendIntervention(inst, since);
355
+ } else {
356
+ await this._abortAsThinkingLoop(inst, since);
357
+ }
358
+ }
359
+ }
360
+ }
361
+ }
362
+
363
+ // ---------------------------------------------------------------------------
364
+ // simulatePartUpdated — replicates InstanceManager.onPartUpdated
365
+ // ---------------------------------------------------------------------------
366
+
367
+ simulatePartUpdated(
368
+ instanceId: string,
369
+ partType: "tool" | "text" | "thinking",
370
+ ): void {
371
+ const inst = this.instances.get(instanceId);
372
+ if (!inst) return;
373
+
374
+ // Every event advances lastEventAt (heartbeat)
375
+ inst.lastEventAt = Date.now();
376
+
377
+ if (partType === "tool" || partType === "text") {
378
+ inst.lastToolOrTextAt = Date.now();
379
+ // Reset intervention counter after progress
380
+ if ((inst.interventionCount ?? 0) > 0) {
381
+ inst.interventionCount = 0;
382
+ delete inst.interventionAt;
383
+ delete inst.interventionReason;
384
+ }
385
+ }
386
+ // 'thinking' parts do NOT advance lastToolOrTextAt — that is intentional
387
+ }
388
+
389
+ // ---------------------------------------------------------------------------
390
+ // Private helpers — exact replicas of InstanceManager private methods
391
+ // ---------------------------------------------------------------------------
392
+
393
+ private async _abortAsStalled(inst: BackgroundState): Promise<void> {
394
+ const lastEventAt = inst.lastEventAt ?? 0;
395
+ const sinceMs = Date.now() - lastEventAt;
396
+ noopLogger.warn(
397
+ `bizar: instance ${inst.instanceId} stalled (no event for ${sinceMs}ms); aborting`,
398
+ );
399
+ this.abortedSessions.push(inst.sessionId);
400
+ inst.status = "failed";
401
+ inst.error = `No activity for ${this.stallTimeoutMs}ms — LLM appears stalled`;
402
+ inst.completedAt = Date.now();
403
+ }
404
+
405
+ private async _sendIntervention(inst: BackgroundState, sinceMs: number): Promise<void> {
406
+ const prompt = researchInterventionPrompt(sinceMs);
407
+ const currentCount = inst.interventionCount ?? 0;
408
+ noopLogger.warn(
409
+ `bizar: instance ${inst.instanceId} thinking loop (${sinceMs}ms without tool/text); sending intervention #${currentCount + 1}/${this.maxInterventions}`,
410
+ );
411
+ this.sentPrompts.push({ sessionId: inst.sessionId, text: prompt });
412
+ const reason = `thinking loop (${formatDuration(sinceMs)} without tool/text)`;
413
+ inst.interventionCount = currentCount + 1;
414
+ inst.interventionAt = Date.now();
415
+ inst.interventionReason = reason;
416
+ // Bumping lastEventAt here is intentional (mirrors real impl)
417
+ inst.lastEventAt = Date.now();
418
+ }
419
+
420
+ private async _abortAsThinkingLoop(inst: BackgroundState, sinceMs: number): Promise<void> {
421
+ noopLogger.warn(
422
+ `bizar: instance ${inst.instanceId} thinking loop exhausted ${this.maxInterventions} intervention(s) over ${sinceMs}ms; aborting`,
423
+ );
424
+ this.abortedSessions.push(inst.sessionId);
425
+ inst.status = "failed";
426
+ inst.error = `Thinking loop detected: ${formatDuration(sinceMs)} of thinking without tool calls or output. Spawn a Mimir agent for research.`;
427
+ inst.completedAt = Date.now();
428
+ }
429
+ }
430
+
431
+ function makeBgState(overrides: Partial<BackgroundState> = {}): BackgroundState {
432
+ const now = Date.now();
433
+ return {
434
+ instanceId: `bgr_${Math.random().toString(36).slice(2, 10)}`,
435
+ sessionId: `sess_${Math.random().toString(36).slice(2, 10)}`,
436
+ agent: "mimir",
437
+ status: "running",
438
+ startedAt: now,
439
+ model: "minimax/MiniMax-M3",
440
+ promptPreview: "Do the thing",
441
+ resultPreview: undefined,
442
+ resultMessageIds: [],
443
+ error: undefined,
444
+ parentAgent: "odin",
445
+ parentInstanceId: undefined,
446
+ logPath: "~/.cache/bizar/logs/test.log",
447
+ timeoutMs: 300_000,
448
+ toolCallCount: 0,
449
+ loopGuardTool: undefined,
450
+ lastEventAt: now,
451
+ lastToolOrTextAt: now,
452
+ interventionCount: 0,
453
+ ...overrides,
454
+ };
455
+ }
456
+
457
+ describe("Stall + thinking-loop detection logic", () => {
458
+ // Test 1: Stall timeout fires when no events
459
+ it("stall timeout fires when no events (lastEventAt is old)", async () => {
460
+ const mgr = new FakeInstanceManagerForStall({
461
+ stallTimeoutMs: 180_000,
462
+ thinkingLoopTimeoutMs: 300_000,
463
+ maxInterventions: 1,
464
+ });
465
+
466
+ const oldTime = Date.now() - (180_000 + 1000); // past the stall threshold
467
+ mgr.addInstance(
468
+ makeBgState({
469
+ instanceId: "bgr_stall_fire",
470
+ status: "running",
471
+ lastEventAt: oldTime,
472
+ lastToolOrTextAt: oldTime,
473
+ }),
474
+ );
475
+
476
+ await mgr.runStallAndLoopChecks();
477
+
478
+ expect(mgr.abortedSessions).toContain(
479
+ mgr.getInstance("bgr_stall_fire")!.sessionId,
480
+ );
481
+ const inst = mgr.getInstance("bgr_stall_fire")!;
482
+ expect(inst.status).toBe("failed");
483
+ expect(inst.error!).toContain("No activity for");
484
+ });
485
+
486
+ // Test 2: Stall timeout does NOT fire when events are recent
487
+ it("stall timeout does NOT fire when events are recent", async () => {
488
+ const mgr = new FakeInstanceManagerForStall({ stallTimeoutMs: 180_000 });
489
+
490
+ const recentTime = Date.now();
491
+ mgr.addInstance(
492
+ makeBgState({
493
+ instanceId: "bgr_stall_recent",
494
+ status: "running",
495
+ lastEventAt: recentTime,
496
+ lastToolOrTextAt: recentTime,
497
+ }),
498
+ );
499
+
500
+ await mgr.runStallAndLoopChecks();
501
+
502
+ expect(mgr.abortedSessions).toHaveLength(0);
503
+ const inst = mgr.getInstance("bgr_stall_recent")!;
504
+ expect(inst.status).toBe("running");
505
+ });
506
+
507
+ // Test 3: Stall timeout does NOT fire for terminal instances
508
+ it("stall timeout does NOT fire for terminal instances", async () => {
509
+ const mgr = new FakeInstanceManagerForStall({ stallTimeoutMs: 180_000 });
510
+
511
+ const oldTime = Date.now() - (180_000 + 1000);
512
+ mgr.addInstance(
513
+ makeBgState({
514
+ instanceId: "bgr_stall_terminal",
515
+ status: "done",
516
+ lastEventAt: oldTime, // would fire if not terminal
517
+ lastToolOrTextAt: oldTime,
518
+ }),
519
+ );
520
+
521
+ await mgr.runStallAndLoopChecks();
522
+
523
+ expect(mgr.abortedSessions).toHaveLength(0);
524
+ const inst = mgr.getInstance("bgr_stall_terminal")!;
525
+ expect(inst.status).toBe("done");
526
+ });
527
+
528
+ // Test 4: Thinking loop detection fires after threshold
529
+ it("thinking loop detection fires after threshold, sends intervention, increments counter", async () => {
530
+ const mgr = new FakeInstanceManagerForStall({
531
+ stallTimeoutMs: 180_000,
532
+ thinkingLoopTimeoutMs: 300_000,
533
+ maxInterventions: 1,
534
+ });
535
+
536
+ // lastEventAt must be within stall timeout so we reach the thinking-loop
537
+ // check. lastToolOrTextAt must be beyond the thinking-loop threshold.
538
+ const now = Date.now();
539
+ const lastEventAt = now - 60_000; // 60s ago — within 180s stall timeout
540
+ const lastToolOrTextAt = now - 301_000; // 301s ago — beyond 300s loop threshold
541
+
542
+ mgr.addInstance(
543
+ makeBgState({
544
+ instanceId: "bgr_thinking_loop",
545
+ status: "running",
546
+ lastEventAt,
547
+ lastToolOrTextAt,
548
+ interventionCount: 0,
549
+ }),
550
+ );
551
+
552
+ await mgr.runStallAndLoopChecks();
553
+
554
+ expect(mgr.sentPrompts).toHaveLength(1);
555
+ expect(mgr.sentPrompts[0]!.text).toContain("[SYSTEM REMINDER — Thinking Loop Detected]");
556
+
557
+ const inst = mgr.getInstance("bgr_thinking_loop")!;
558
+ expect(inst.interventionCount).toBe(1);
559
+ expect(inst.interventionAt).toBeDefined();
560
+ expect(inst.interventionReason).toContain("thinking loop");
561
+ expect(inst.status).toBe("running"); // still running — not failed yet
562
+ });
563
+
564
+ // Test 5: Thinking loop intervention respects maxInterventions
565
+ it("when interventionCount >= maxInterventions, instance is aborted as thinking loop", async () => {
566
+ const mgr = new FakeInstanceManagerForStall({
567
+ stallTimeoutMs: 180_000,
568
+ thinkingLoopTimeoutMs: 300_000,
569
+ maxInterventions: 1,
570
+ });
571
+
572
+ // lastEventAt recent (within stall timeout) so we reach the thinking-loop check
573
+ const now = Date.now();
574
+ const lastEventAt = now - 60_000; // 60s ago — within 180s stall timeout
575
+ const lastToolOrTextAt = now - 301_000; // 301s ago — beyond 300s loop threshold
576
+
577
+ mgr.addInstance(
578
+ makeBgState({
579
+ instanceId: "bgr_max_interventions",
580
+ status: "running",
581
+ lastEventAt,
582
+ lastToolOrTextAt,
583
+ // Already at max interventions — should abort immediately without sending prompt
584
+ interventionCount: 1,
585
+ }),
586
+ );
587
+
588
+ await mgr.runStallAndLoopChecks();
589
+
590
+ expect(mgr.sentPrompts).toHaveLength(0); // No new prompt sent
591
+ expect(mgr.abortedSessions).toContain(
592
+ mgr.getInstance("bgr_max_interventions")!.sessionId,
593
+ );
594
+ const inst = mgr.getInstance("bgr_max_interventions")!;
595
+ expect(inst.status).toBe("failed");
596
+ expect(inst.error!).toMatch(/thinking loop/i); // case-insensitive
597
+ });
598
+
599
+ // Test 6: Tool/text events reset intervention counter
600
+ it("a tool or text part after intervention resets interventionCount to 0", async () => {
601
+ const mgr = new FakeInstanceManagerForStall({
602
+ stallTimeoutMs: 180_000,
603
+ thinkingLoopTimeoutMs: 300_000,
604
+ maxInterventions: 1,
605
+ });
606
+
607
+ const oldTime = Date.now() - (300_000 + 1000);
608
+ mgr.addInstance(
609
+ makeBgState({
610
+ instanceId: "bgr_reset_counter",
611
+ status: "running",
612
+ lastEventAt: oldTime,
613
+ lastToolOrTextAt: oldTime,
614
+ interventionCount: 1,
615
+ interventionAt: oldTime,
616
+ interventionReason: "thinking loop (1m 0s without tool/text)",
617
+ }),
618
+ );
619
+
620
+ // Simulate a text part arriving (progress signal)
621
+ mgr.simulatePartUpdated("bgr_reset_counter", "text");
622
+
623
+ const inst = mgr.getInstance("bgr_reset_counter")!;
624
+ expect(inst.interventionCount).toBe(0);
625
+ expect(inst.interventionAt).toBeUndefined();
626
+ expect(inst.interventionReason).toBeUndefined();
627
+ expect(inst.lastToolOrTextAt).toBeGreaterThanOrEqual(oldTime);
628
+ });
629
+
630
+ // Test 7: Thinking-only events do NOT reset intervention counter
631
+ it("a thinking part does NOT update lastToolOrTextAt or reset interventionCount", async () => {
632
+ const mgr = new FakeInstanceManagerForStall({
633
+ stallTimeoutMs: 180_000,
634
+ thinkingLoopTimeoutMs: 300_000,
635
+ maxInterventions: 1,
636
+ });
637
+
638
+ const baseTime = Date.now() - 60_000;
639
+ mgr.addInstance(
640
+ makeBgState({
641
+ instanceId: "bgr_thinking_only",
642
+ status: "running",
643
+ lastEventAt: baseTime,
644
+ lastToolOrTextAt: baseTime, // set to 60s ago
645
+ interventionCount: 0,
646
+ }),
647
+ );
648
+
649
+ // Simulate a thinking part arriving
650
+ mgr.simulatePartUpdated("bgr_thinking_only", "thinking");
651
+
652
+ const inst = mgr.getInstance("bgr_thinking_only")!;
653
+ // lastToolOrTextAt should NOT have been updated by the thinking part
654
+ expect(inst.lastToolOrTextAt).toBe(baseTime);
655
+ // interventionCount stays at 0 (was already 0 in this case)
656
+ expect(inst.interventionCount).toBe(0);
657
+ });
658
+ });
659
+
660
+ // ---------------------------------------------------------------------------
661
+ // Group 5 — bg-status toView includes v0.3.0 fields
662
+ // ---------------------------------------------------------------------------
663
+
664
+ import type { InstanceView } from "../src/background.js";
665
+
666
+ function toViewForTest(inst: import("../src/background-state.js").BackgroundState): InstanceView {
667
+ const v: InstanceView = {
668
+ instanceId: inst.instanceId,
669
+ agent: inst.agent,
670
+ status: inst.status,
671
+ startedAt: inst.startedAt,
672
+ toolCallCount: inst.toolCallCount,
673
+ promptPreview: inst.promptPreview,
674
+ parentAgent: inst.parentAgent,
675
+ sessionId: inst.sessionId,
676
+ lastEventAt: inst.lastEventAt,
677
+ };
678
+ if (inst.completedAt !== undefined) v.completedAt = inst.completedAt;
679
+ if (inst.resultPreview !== undefined) v.resultPreview = inst.resultPreview;
680
+ if (inst.error !== undefined) v.error = inst.error;
681
+ if (inst.parentInstanceId !== undefined) v.parentInstanceId = inst.parentInstanceId;
682
+ // v0.3.0: only surface intervention metadata when at least one
683
+ // intervention has actually been sent.
684
+ const interventionCount = inst.interventionCount ?? 0;
685
+ if (interventionCount > 0) {
686
+ v.interventionCount = interventionCount;
687
+ if (inst.interventionAt !== undefined) v.interventionAt = inst.interventionAt;
688
+ if (inst.interventionReason !== undefined) v.interventionReason = inst.interventionReason;
689
+ }
690
+ return v;
691
+ }
692
+
693
+ describe("bg-status toView — v0.3.0 fields", () => {
694
+ it("toView includes lastEventAt, interventionCount, interventionAt, interventionReason after intervention", () => {
695
+ const now = Date.now();
696
+ const inst: import("../src/background-state.js").BackgroundState = {
697
+ instanceId: "bgr_view_test",
698
+ sessionId: "sess_view",
699
+ agent: "mimir",
700
+ status: "running",
701
+ startedAt: now - 600_000,
702
+ model: "minimax/MiniMax-M3",
703
+ promptPreview: "Research X",
704
+ toolCallCount: 0,
705
+ parentAgent: "odin",
706
+ logPath: "/tmp/test.log",
707
+ timeoutMs: 300_000,
708
+ lastEventAt: now - 60_000,
709
+ lastToolOrTextAt: now - 60_000,
710
+ interventionCount: 2,
711
+ interventionAt: now - 60_000,
712
+ interventionReason: "thinking loop (5m 0s without tool/text)",
713
+ };
714
+
715
+ const view = toViewForTest(inst);
716
+
717
+ expect(view.lastEventAt).toBe(now - 60_000);
718
+ expect(view.interventionCount).toBe(2);
719
+ expect(view.interventionAt).toBe(now - 60_000);
720
+ expect(view.interventionReason).toBe("thinking loop (5m 0s without tool/text)");
721
+ });
722
+
723
+ it("toView omits intervention fields (not null) when interventionCount is 0 or undefined", () => {
724
+ const now = Date.now();
725
+ const inst: import("../src/background-state.js").BackgroundState = {
726
+ instanceId: "bgr_view_no_intervention",
727
+ sessionId: "sess_view2",
728
+ agent: "mimir",
729
+ status: "running",
730
+ startedAt: now,
731
+ model: "minimax/MiniMax-M3",
732
+ promptPreview: "Research Y",
733
+ toolCallCount: 0,
734
+ parentAgent: "odin",
735
+ logPath: "/tmp/test.log",
736
+ timeoutMs: 300_000,
737
+ // v0.3.0 fields are absent (no intervention yet)
738
+ };
739
+
740
+ const view = toViewForTest(inst);
741
+
742
+ // lastEventAt should still be present (always surfaced)
743
+ expect(view.lastEventAt).toBeUndefined(); // not set in this instance
744
+ // Intervention fields should be absent — not null, not 0, not present
745
+ expect("interventionCount" in view).toBe(false);
746
+ expect("interventionAt" in view).toBe(false);
747
+ expect("interventionReason" in view).toBe(false);
748
+ });
749
+ });