@polderlabs/bizar-plugin 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +448 -0
  3. package/bun.lock +88 -0
  4. package/index.ts +1113 -0
  5. package/package.json +42 -0
  6. package/scripts/check-forbidden-imports.sh +33 -0
  7. package/src/background-state.ts +463 -0
  8. package/src/background.ts +964 -0
  9. package/src/commands-impl.ts +369 -0
  10. package/src/commands.ts +880 -0
  11. package/src/event-stream.ts +574 -0
  12. package/src/fingerprint.ts +120 -0
  13. package/src/handoff.ts +79 -0
  14. package/src/http-client.ts +467 -0
  15. package/src/logger.ts +144 -0
  16. package/src/loop.ts +176 -0
  17. package/src/options.ts +421 -0
  18. package/src/plan-fs.ts +323 -0
  19. package/src/report.ts +178 -0
  20. package/src/research-prompt.ts +35 -0
  21. package/src/serve.ts +476 -0
  22. package/src/settings.ts +349 -0
  23. package/src/state.ts +298 -0
  24. package/src/tools/bg-collect.ts +104 -0
  25. package/src/tools/bg-get-comments.ts +239 -0
  26. package/src/tools/bg-kill.ts +87 -0
  27. package/src/tools/bg-spawn.ts +263 -0
  28. package/src/tools/bg-status.ts +99 -0
  29. package/src/tools/plan-action.ts +767 -0
  30. package/src/tools/wait-for-feedback.ts +402 -0
  31. package/tests/attach-handler-bug.test.ts +166 -0
  32. package/tests/background-state.test.ts +277 -0
  33. package/tests/background.test.ts +402 -0
  34. package/tests/block.test.ts +193 -0
  35. package/tests/canonical-key-order.test.ts +71 -0
  36. package/tests/commands-impl.test.ts +442 -0
  37. package/tests/commands.test.ts +548 -0
  38. package/tests/config.test.ts +122 -0
  39. package/tests/dispose.test.ts +336 -0
  40. package/tests/event-stream.test.ts +409 -0
  41. package/tests/event.test.ts +262 -0
  42. package/tests/fingerprint.test.ts +161 -0
  43. package/tests/http-client.test.ts +403 -0
  44. package/tests/init-helpers.test.ts +203 -0
  45. package/tests/integration/slash-command.test.ts +348 -0
  46. package/tests/integration/tool-routing.test.ts +314 -0
  47. package/tests/loop.test.ts +397 -0
  48. package/tests/options.test.ts +274 -0
  49. package/tests/serve.test.ts +335 -0
  50. package/tests/settings.test.ts +351 -0
  51. package/tests/stall-think.test.ts +749 -0
  52. package/tests/state.test.ts +275 -0
  53. package/tests/tools/bg-collect.test.ts +337 -0
  54. package/tests/tools/bg-get-comments.test.ts +485 -0
  55. package/tests/tools/bg-kill.test.ts +231 -0
  56. package/tests/tools/bg-spawn.test.ts +311 -0
  57. package/tests/tools/bg-status.test.ts +216 -0
  58. package/tests/tools/plan-action.test.ts +599 -0
  59. package/tests/tools/wait-for-feedback.test.ts +390 -0
  60. package/tsconfig.json +29 -0
@@ -0,0 +1,964 @@
1
+ /**
2
+ * background.ts
3
+ *
4
+ * InstanceManager — owns the in-memory map of background instances and
5
+ * orchestrates the per-instance event handlers (v0.4.2 spec §2.2, §4, §5.4, §6.2).
6
+ *
7
+ * Responsibilities:
8
+ * - `add()` is the single entry point for inserting a new instance. The
9
+ * cap check and the map insertion happen inside one async mutex, so
10
+ * concurrent `add()` calls can never exceed the cap (HIGH-10 / HIGH-12 /
11
+ * HIGH-21 / HIGH-38).
12
+ * - `update()` patches the in-memory state and persists to disk. The
13
+ * per-instance mutex from {@link BackgroundStateStore} serializes
14
+ * concurrent updates to the same instance.
15
+ * - `kill()` and `collect()` operate on the in-memory state; the HTTP
16
+ * calls go through {@link HttpClient}.
17
+ * - `rebuildInMemoryMap()` is called on init (spec §5.4). Any in-flight
18
+ * `running` or `pending` instance is marked `failed` because the
19
+ * serve child is new and the opencode sessions are gone.
20
+ * - `shutdownAll()` is called on `dispose` / SIGTERM. Marks all in-memory
21
+ * instances `failed` with `error: "plugin shutting down"`, aborts
22
+ * each via `POST /session/{id}/abort` (best-effort, 5s timeout per
23
+ * call), then waits for the serve child to exit.
24
+ *
25
+ * Per-instance event handler (spec §4.1, §4.3, §6.2):
26
+ * - For every `EventMessagePartUpdated` of `type: "tool"`, increment
27
+ * `toolCallCount`. If the count reaches the per-instance cap, abort
28
+ * the session and mark the instance `failed` with
29
+ * `error: "Tool-call cap reached (N). Aborted to prevent cost runaway."`.
30
+ * - If the tool part's error matches the loop-guard regex
31
+ * `Loop protection: 12 identical calls to (\S+)`, capture the tool
32
+ * name into `loopGuardTool`, set `error` to the canonical string,
33
+ * and mark the instance `failed`.
34
+ * - For every `EventMessagePartUpdated` of `type: "text"` on an
35
+ * assistant message, refresh `resultPreview` (last 200 chars).
36
+ * - On `EventSessionIdle`, mark the instance `done`.
37
+ * - On `EventSessionError`, mark the instance `failed` with the error.
38
+ *
39
+ * v0.3.0 — stall and thinking-loop protection:
40
+ * - Every event handler updates `lastEventAt` (the "heartbeat"). The
41
+ * stall checker fires every `STALL_CHECK_INTERVAL_MS`; if a non-terminal
42
+ * instance has `now - lastEventAt > backgroundStallTimeoutMs`, the
43
+ * session is aborted and the instance marked `failed`.
44
+ * - `tool` and `text` parts advance `lastToolOrTextAt`. `thinking`
45
+ * parts do NOT advance it; that is the loop indicator.
46
+ * - The thinking-loop checker fires every `STALL_CHECK_INTERVAL_MS`. For
47
+ * a `running` instance with `now - lastToolOrTextAt >
48
+ * backgroundThinkingLoopTimeoutMs`:
49
+ * - If `interventionCount < backgroundMaxInterventions`: send a
50
+ * research-intervention prompt (fire-and-forget) and increment the
51
+ * counter.
52
+ * - Otherwise: abort the session and mark `failed`.
53
+ * - When a `tool` or `text` part arrives after one or more interventions,
54
+ * the counter is reset to 0 (sign of progress). The intervention
55
+ * metadata is cleared so a later status check does not show stale
56
+ * intervention info.
57
+ *
58
+ * "Track BEFORE HTTP" invariant (spec §2.2 / HIGH-21):
59
+ * - The instance is added to the map (status `pending`) BEFORE any HTTP
60
+ * call. If the HTTP call fails, the instance is marked `failed`. The
61
+ * map is never left in a half-state.
62
+ */
63
+
64
+ import type { BackgroundState, BackgroundStateStore, Logger } from "./background-state.js";
65
+ import { TERMINAL_STATUSES } from "./background-state.js";
66
+ import type { HttpClient } from "./http-client.js";
67
+ import type { EventStream, StreamEvent, SessionEventHandler } from "./event-stream.js";
68
+ import type { ServeLifecycle } from "./serve.js";
69
+ import { researchInterventionPrompt } from "./research-prompt.js";
70
+
71
+ // --- Public surface -------------------------------------------------------
72
+
73
+ /** A snapshot of an instance for the `bizar_status` tool. */
74
+ export interface InstanceView {
75
+ instanceId: string;
76
+ agent: string;
77
+ status: BackgroundState["status"];
78
+ startedAt: number;
79
+ completedAt?: number;
80
+ toolCallCount: number;
81
+ promptPreview: string;
82
+ resultPreview?: string;
83
+ error?: string;
84
+ parentAgent: string;
85
+ parentInstanceId?: string;
86
+ sessionId: string;
87
+ // v0.3.0 — stall and thinking-loop protection
88
+ lastEventAt?: number;
89
+ interventionCount?: number;
90
+ interventionAt?: number;
91
+ interventionReason?: string;
92
+ }
93
+
94
+ /** The return shape of `bizar_collect`. */
95
+ export interface CollectResult {
96
+ status: BackgroundState["status"];
97
+ result: string;
98
+ toolCallCount: number;
99
+ durationMs: number;
100
+ error?: string;
101
+ }
102
+
103
+ /** Filter shape for `list()`. */
104
+ export interface InstanceListFilter {
105
+ agent?: string;
106
+ status?: BackgroundState["status"];
107
+ }
108
+
109
+ /** Shape passed to `add()`. The status is forced to `pending` and the
110
+ * startedAt is stamped by the manager. */
111
+ export type AddDraft = Omit<BackgroundState, "status" | "startedAt">;
112
+
113
+ /** Return type of `add()`. `"cap_reached"` is a sentinel for the
114
+ * overshoot path; the populated state is the success path. */
115
+ export type AddResult = BackgroundState | "cap_reached";
116
+
117
+ // --- Constants ------------------------------------------------------------
118
+
119
+ /** Maximum length of `resultPreview` per spec §3.2. */
120
+ const RESULT_PREVIEW_MAX = 200;
121
+ /** Maximum length of `promptPreview` stored in the JSON. */
122
+ const PROMPT_PREVIEW_MAX = 200;
123
+ /** Tool-call cap regex (spec §4.1, NEW-H8 pin). */
124
+ const LOOP_GUARD_RE = /Loop protection: 12 identical calls to (\S+)/;
125
+
126
+ /**
127
+ * How often the stall + thinking-loop checker fires. 15 seconds is short
128
+ * enough to detect stalls within one tick of the default 3-min stall
129
+ * timeout, and long enough that the per-instance mutex is not constantly
130
+ * contested. Spec §v0.3.0.
131
+ */
132
+ const STALL_CHECK_INTERVAL_MS = 15_000;
133
+
134
+ // --- Class ---------------------------------------------------------------
135
+
136
+ /**
137
+ * Manages the in-memory map of background instances. Created once at
138
+ * plugin init; lives for the life of the plugin process.
139
+ */
140
+ export class InstanceManager {
141
+ private instances = new Map<string, BackgroundState>();
142
+ private addLock: Promise<unknown> = Promise.resolve();
143
+ private stateStore: BackgroundStateStore;
144
+ private maxConcurrent: number;
145
+ private toolCallCap: number;
146
+ private logger: Logger;
147
+ private serve: ServeLifecycle;
148
+ private http: HttpClient;
149
+ private stream: EventStream;
150
+ private worktree: string;
151
+ // v0.3.0 — stall and thinking-loop protection
152
+ private stallTimeoutMs: number;
153
+ private thinkingLoopTimeoutMs: number;
154
+ private maxInterventions: number;
155
+ /** Interval handle for the periodic stall + thinking-loop checker. */
156
+ private stallCheckerTimer: ReturnType<typeof setInterval> | null = null;
157
+ /** Guard so tests can disable the interval without monkey-patching. */
158
+ private stallCheckerDisabled = false;
159
+
160
+ constructor(opts: {
161
+ stateStore: BackgroundStateStore;
162
+ maxConcurrent: number;
163
+ toolCallCap: number;
164
+ logger: Logger;
165
+ serve: ServeLifecycle;
166
+ http: HttpClient;
167
+ stream: EventStream;
168
+ // v0.3.0
169
+ stallTimeoutMs?: number;
170
+ thinkingLoopTimeoutMs?: number;
171
+ maxInterventions?: number;
172
+ }) {
173
+ this.stateStore = opts.stateStore;
174
+ this.maxConcurrent = Math.max(1, Math.floor(opts.maxConcurrent));
175
+ this.toolCallCap = Math.max(1, Math.floor(opts.toolCallCap));
176
+ this.logger = opts.logger;
177
+ this.serve = opts.serve;
178
+ this.http = opts.http;
179
+ this.stream = opts.stream;
180
+ this.worktree = opts.serve.worktree;
181
+ this.stallTimeoutMs = Math.max(
182
+ 1_000,
183
+ Math.floor(opts.stallTimeoutMs ?? 180_000),
184
+ );
185
+ this.thinkingLoopTimeoutMs = Math.max(
186
+ 1_000,
187
+ Math.floor(opts.thinkingLoopTimeoutMs ?? 300_000),
188
+ );
189
+ this.maxInterventions = Math.max(1, Math.floor(opts.maxInterventions ?? 1));
190
+ // Schedule the periodic stall + thinking-loop checker. The interval
191
+ // reference is stored so `shutdownAll` / `dispose` can clear it.
192
+ this.stallCheckerTimer = setInterval(
193
+ () => void this.runStallAndLoopChecks(),
194
+ STALL_CHECK_INTERVAL_MS,
195
+ );
196
+ }
197
+
198
+ // --- Getters ------------------------------------------------------------
199
+
200
+ get size(): number {
201
+ return this.instances.size;
202
+ }
203
+
204
+ /** Current stall timeout (ms). Exposed for tests. */
205
+ get stallTimeoutMsValue(): number {
206
+ return this.stallTimeoutMs;
207
+ }
208
+
209
+ /** Current thinking-loop timeout (ms). Exposed for tests. */
210
+ get thinkingLoopTimeoutMsValue(): number {
211
+ return this.thinkingLoopTimeoutMs;
212
+ }
213
+
214
+ /** Current max interventions. Exposed for tests. */
215
+ get maxInterventionsValue(): number {
216
+ return this.maxInterventions;
217
+ }
218
+
219
+ /**
220
+ * Disable the periodic stall + thinking-loop checker. Used by tests
221
+ * that want to call `runStallAndLoopChecks()` directly without racing
222
+ * the interval. Idempotent.
223
+ */
224
+ disablePeriodicChecks(): void {
225
+ this.stallCheckerDisabled = true;
226
+ if (this.stallCheckerTimer !== null) {
227
+ clearInterval(this.stallCheckerTimer);
228
+ this.stallCheckerTimer = null;
229
+ }
230
+ }
231
+
232
+ /**
233
+ * Run one iteration of the stall + thinking-loop checker. Public so
234
+ * tests can invoke it deterministically. Production code drives this
235
+ * via the `setInterval` registered in the constructor.
236
+ */
237
+ async runStallAndLoopChecks(): Promise<void> {
238
+ if (this.stallCheckerDisabled) return;
239
+ // Snapshot the instance ids so we do not iterate while the map mutates.
240
+ const ids: string[] = [];
241
+ for (const inst of this.instances.values()) {
242
+ if (TERMINAL_STATUSES.has(inst.status)) continue;
243
+ ids.push(inst.instanceId);
244
+ }
245
+ for (const id of ids) {
246
+ const inst = this.instances.get(id);
247
+ if (!inst || TERMINAL_STATUSES.has(inst.status)) continue;
248
+ const now = Date.now();
249
+ // `lastEventAt` / `lastToolOrTextAt` are seeded by `add()` and
250
+ // backfilled in `readState`, so they are guaranteed to be set on
251
+ // any instance that ever reached this method. We coalesce with
252
+ // `?? 0` because TS strict mode treats the schema field as
253
+ // optional — the value is informational in the rare case where
254
+ // it is missing (an old or corrupt state file).
255
+ const lastEventAt = inst.lastEventAt ?? 0;
256
+ const lastToolOrTextAt = inst.lastToolOrTextAt ?? 0;
257
+ // Stall check fires first; it is the more severe failure.
258
+ if (now - lastEventAt > this.stallTimeoutMs) {
259
+ await this._abortAsStalled(inst);
260
+ continue;
261
+ }
262
+ // Thinking-loop check applies to `running` instances only. A
263
+ // `pending` instance has not yet started generating, so it is not
264
+ // a candidate for the loop detector.
265
+ if (inst.status === "running") {
266
+ const since = now - lastToolOrTextAt;
267
+ if (since > this.thinkingLoopTimeoutMs) {
268
+ const currentCount = inst.interventionCount ?? 0;
269
+ if (currentCount < this.maxInterventions) {
270
+ await this._sendIntervention(inst, since);
271
+ } else {
272
+ await this._abortAsThinkingLoop(inst, since);
273
+ }
274
+ }
275
+ }
276
+ }
277
+ }
278
+
279
+ // --- Atomic add (spec §2.2) ---------------------------------------------
280
+
281
+ /**
282
+ * Add a new instance. The cap check and the map insertion are inside
283
+ * one async mutex — no half-state on overshoot. Returns `"cap_reached"`
284
+ * on overshoot; the full `BackgroundState` on success.
285
+ */
286
+ async add(draft: AddDraft): Promise<AddResult> {
287
+ return (await (this.addLock = this.addLock.then(async () => {
288
+ // Count "live" instances: anything not yet terminal.
289
+ let live = 0;
290
+ for (const inst of this.instances.values()) {
291
+ if (!TERMINAL_STATUSES.has(inst.status)) live += 1;
292
+ }
293
+ if (live >= this.maxConcurrent) {
294
+ this.logger.warn(
295
+ `bizar: max concurrent instances reached (${this.maxConcurrent}); rejecting add`,
296
+ );
297
+ return "cap_reached" as const;
298
+ }
299
+ const now = Date.now();
300
+ const full: BackgroundState = {
301
+ ...draft,
302
+ status: "pending",
303
+ startedAt: now,
304
+ toolCallCount: draft.toolCallCount ?? 0,
305
+ // Trim the prompt preview so the JSON stays small.
306
+ promptPreview: (draft.promptPreview ?? "").slice(0, PROMPT_PREVIEW_MAX),
307
+ // v0.3.0 — seed the liveness timestamps so the stall and
308
+ // thinking-loop checkers have a baseline. We seed BOTH from
309
+ // `startedAt` so a freshly-spawned instance is not immediately
310
+ // flagged as stalled while the session is still being created
311
+ // (the first event typically arrives within seconds).
312
+ lastEventAt: now,
313
+ lastToolOrTextAt: now,
314
+ interventionCount: 0,
315
+ };
316
+ this.instances.set(draft.instanceId, full);
317
+ // Persist asynchronously; failure is logged but does not roll back
318
+ // the in-memory insert (the instance is "tracked" either way).
319
+ this.stateStore.save(full).catch((err: unknown) => {
320
+ this.logger.warn(
321
+ `bizar: failed to persist new instance ${draft.instanceId}: ${
322
+ err instanceof Error ? err.message : String(err)
323
+ }`,
324
+ );
325
+ });
326
+ // BUGFIX (v0.5.1): Do NOT call attachEventHandler() here. The
327
+ // instance was just added with sessionId="" (filled in later by
328
+ // POST /session). EventStream.onSessionEvent rejects empty strings,
329
+ // so attaching here threw and the spawn failed before the HTTP
330
+ // call could run. Callers must call attachEventHandler() explicitly
331
+ // after the real sessionId is known. See test in
332
+ // tests/background.test.ts "add() does not attach event handler
333
+ // (empty sessionId)".
334
+ return full;
335
+ }))) as AddResult;
336
+ }
337
+
338
+ // --- Read access --------------------------------------------------------
339
+
340
+ /**
341
+ * Look up an instance by id. Returns null if not found.
342
+ */
343
+ async get(instanceId: string): Promise<BackgroundState | null> {
344
+ return this.instances.get(instanceId) ?? null;
345
+ }
346
+
347
+ /**
348
+ * Snapshot of in-memory instances, filtered. Used by `bizar_status`.
349
+ */
350
+ async list(filter?: InstanceListFilter): Promise<InstanceView[]> {
351
+ const out: InstanceView[] = [];
352
+ for (const inst of this.instances.values()) {
353
+ if (filter?.agent && inst.agent !== filter.agent) continue;
354
+ if (filter?.status && inst.status !== filter.status) continue;
355
+ out.push(toView(inst));
356
+ }
357
+ // Sort by startedAt ascending so callers see the oldest first.
358
+ out.sort((a, b) => a.startedAt - b.startedAt);
359
+ return out;
360
+ }
361
+
362
+ // --- Update -------------------------------------------------------------
363
+
364
+ /**
365
+ * Patch an instance in-memory and persist. Returns silently if the
366
+ * instance is not found. Mutations that would set a terminal state
367
+ * stamp `completedAt` automatically.
368
+ */
369
+ async update(instanceId: string, patch: Partial<BackgroundState>): Promise<void> {
370
+ const inst = this.instances.get(instanceId);
371
+ if (!inst) return;
372
+ await this.stateStore.withLock(instanceId, async () => {
373
+ const current = this.instances.get(instanceId);
374
+ if (!current) return;
375
+ Object.assign(current, patch);
376
+ if (TERMINAL_STATUSES.has(patch.status ?? current.status) && !current.completedAt) {
377
+ current.completedAt = Date.now();
378
+ }
379
+ try {
380
+ await this.stateStore.save(current);
381
+ } catch (err: unknown) {
382
+ this.logger.warn(
383
+ `bizar: failed to persist update for ${instanceId}: ${
384
+ err instanceof Error ? err.message : String(err)
385
+ }`,
386
+ );
387
+ }
388
+ });
389
+ }
390
+
391
+ // --- Kill ---------------------------------------------------------------
392
+
393
+ /**
394
+ * Abort the opencode session and mark the instance `killed`. If the
395
+ * instance is already in a terminal state, this is a no-op (spec §1.5,
396
+ * MEDIUM-40).
397
+ */
398
+ async kill(instanceId: string): Promise<void> {
399
+ const inst = this.instances.get(instanceId);
400
+ if (!inst) return;
401
+ if (TERMINAL_STATUSES.has(inst.status)) {
402
+ this.logger.debug(
403
+ `bizar: kill(${instanceId}) is a no-op (status=${inst.status})`,
404
+ );
405
+ return;
406
+ }
407
+ // Abort the opencode session. The next SSE event for this session
408
+ // (EventSessionIdle or EventSessionError) will finalize the status.
409
+ const abort = await this.http.abortSession(inst.sessionId, this.worktree);
410
+ if (!abort.ok) {
411
+ this.logger.warn(
412
+ `bizar: kill(${instanceId}): abort failed: ${abort.error}`,
413
+ );
414
+ // Even if the abort call failed, we still want the in-memory state
415
+ // to reflect a deliberate kill so the user sees it. The next SSE
416
+ // event will overwrite if it disagrees.
417
+ }
418
+ await this.update(instanceId, {
419
+ status: "killed",
420
+ completedAt: Date.now(),
421
+ });
422
+ this.logger.info(`bizar: killed background instance ${instanceId}`);
423
+ }
424
+ // --- Collect ------------------------------------------------------------
425
+
426
+ /**
427
+ * Wait for the instance to reach a terminal state (or until
428
+ * `timeoutMs` elapses), then build the result string per spec §4.4.
429
+ *
430
+ * If the instance is already terminal on entry, we skip the wait and
431
+ * go straight to result construction.
432
+ */
433
+ async collect(instanceId: string, timeoutMs: number): Promise<CollectResult> {
434
+ const inst = this.instances.get(instanceId);
435
+ if (!inst) {
436
+ throw new Error(`collect: instance ${instanceId} not found`);
437
+ }
438
+ const startedAt = inst.startedAt;
439
+ const deadline = Date.now() + Math.max(0, timeoutMs);
440
+
441
+ // 1. Wait for terminal state.
442
+ if (!TERMINAL_STATUSES.has(inst.status)) {
443
+ const reachedTerminal = await new Promise<boolean>((resolve) => {
444
+ const remaining = Math.max(0, deadline - Date.now());
445
+ if (remaining === 0) {
446
+ resolve(false);
447
+ return;
448
+ }
449
+ const timer = setTimeout(() => {
450
+ unsubscribe();
451
+ resolve(false);
452
+ }, remaining);
453
+ const unsubscribe = this.stream.onSessionEvent(inst.sessionId, (ev) => {
454
+ if (
455
+ ev.type === "session.idle" ||
456
+ ev.type === "session.error"
457
+ ) {
458
+ clearTimeout(timer);
459
+ unsubscribe();
460
+ resolve(true);
461
+ return;
462
+ }
463
+ // Also resolve on tool-cap / loop-guard (which we set ourselves).
464
+ const cur = this.instances.get(instanceId);
465
+ if (cur && TERMINAL_STATUSES.has(cur.status)) {
466
+ clearTimeout(timer);
467
+ unsubscribe();
468
+ resolve(true);
469
+ }
470
+ });
471
+ // Re-check after subscribing in case the state already changed.
472
+ const cur = this.instances.get(instanceId);
473
+ if (cur && TERMINAL_STATUSES.has(cur.status)) {
474
+ clearTimeout(timer);
475
+ unsubscribe();
476
+ resolve(true);
477
+ }
478
+ });
479
+ if (!reachedTerminal) {
480
+ // Timed out. Return what we have.
481
+ const final = this.instances.get(instanceId);
482
+ if (final && !TERMINAL_STATUSES.has(final.status)) {
483
+ await this.update(instanceId, {
484
+ status: "timed_out",
485
+ completedAt: Date.now(),
486
+ });
487
+ }
488
+ const dur = Date.now() - startedAt;
489
+ const final2 = this.instances.get(instanceId);
490
+ const out: CollectResult = {
491
+ status: final2?.status ?? "timed_out",
492
+ result: final2?.resultPreview ?? "",
493
+ toolCallCount: final2?.toolCallCount ?? 0,
494
+ durationMs: dur,
495
+ error: `collect timed out after ${timeoutMs}ms`,
496
+ };
497
+ return out;
498
+ }
499
+ }
500
+
501
+ // 2. Build the result. Fetch messages from the opencode server and
502
+ // concatenate the assistant text parts.
503
+ const final = this.instances.get(instanceId);
504
+ if (!final) {
505
+ throw new Error(`collect: instance ${instanceId} disappeared`);
506
+ }
507
+ const resultText = await this.buildResultText(final);
508
+ const dur = (final.completedAt ?? Date.now()) - startedAt;
509
+ const out: CollectResult = {
510
+ status: final.status,
511
+ result: resultText,
512
+ toolCallCount: final.toolCallCount,
513
+ durationMs: dur,
514
+ };
515
+ if (final.error !== undefined) out.error = final.error;
516
+ return out;
517
+ }
518
+
519
+ // --- Rebuild on init (spec §5.4) ----------------------------------------
520
+
521
+ /**
522
+ * Scan the bg directory, load every instance, and rebuild the in-memory
523
+ * map. Any `running` or `pending` instance is marked `failed` because
524
+ * the serve child is new and the opencode sessions are gone.
525
+ * Historical records (done, failed, killed, timed_out) are preserved.
526
+ */
527
+ async rebuildInMemoryMap(): Promise<void> {
528
+ let all: BackgroundState[];
529
+ try {
530
+ all = await this.stateStore.list();
531
+ } catch (err: unknown) {
532
+ this.logger.warn(
533
+ `bizar: rebuildInMemoryMap: list() failed: ${
534
+ err instanceof Error ? err.message : String(err)
535
+ }`,
536
+ );
537
+ return;
538
+ }
539
+ let rebuilt = 0;
540
+ let failed = 0;
541
+ for (const inst of all) {
542
+ this.instances.set(inst.instanceId, inst);
543
+ rebuilt += 1;
544
+ if (inst.status === "running" || inst.status === "pending") {
545
+ const message =
546
+ inst.status === "pending"
547
+ ? "plugin restarted while instance was pending"
548
+ : "plugin restarted; serve child is new";
549
+ await this.update(inst.instanceId, {
550
+ status: "failed",
551
+ error: message,
552
+ completedAt: Date.now(),
553
+ });
554
+ failed += 1;
555
+ }
556
+ }
557
+ if (rebuilt > 0) {
558
+ this.logger.info(
559
+ `bizar: rebuilt in-memory map (${rebuilt} instances, ${failed} marked failed)`,
560
+ );
561
+ }
562
+ }
563
+
564
+ // --- Shutdown (spec §5.3) ----------------------------------------------
565
+
566
+ /**
567
+ * Mark all in-memory instances as failed with `error: "plugin shutting down"`,
568
+ * abort all running sessions best-effort (5s timeout per call, in
569
+ * parallel), then return. The serve child termination is the
570
+ * caller's responsibility.
571
+ *
572
+ * Also clears the v0.3.0 stall-checker interval. After `shutdownAll`,
573
+ * the manager is effectively inert — no more periodic checks will
574
+ * fire even though the InstanceManager object itself is still alive.
575
+ */
576
+ async shutdownAll(): Promise<void> {
577
+ // v0.3.0 — clear the periodic checker first so it does not race
578
+ // the in-flight updates below.
579
+ if (this.stallCheckerTimer !== null) {
580
+ clearInterval(this.stallCheckerTimer);
581
+ this.stallCheckerTimer = null;
582
+ }
583
+ this.stallCheckerDisabled = true;
584
+ const live: BackgroundState[] = [];
585
+ for (const inst of this.instances.values()) {
586
+ if (!TERMINAL_STATUSES.has(inst.status)) {
587
+ live.push(inst);
588
+ }
589
+ }
590
+ // Phase 1: mark failed first (spec §5.3 step 1).
591
+ for (const inst of live) {
592
+ await this.update(inst.instanceId, {
593
+ status: "failed",
594
+ error: "plugin shutting down",
595
+ completedAt: Date.now(),
596
+ });
597
+ }
598
+ // Phase 2: best-effort aborts in parallel, 5s per call.
599
+ const abortPromises = live.map((inst) =>
600
+ withTimeout(this.http.abortSession(inst.sessionId, this.worktree), 5_000).catch(
601
+ () => undefined,
602
+ ),
603
+ );
604
+ await Promise.allSettled(abortPromises);
605
+ this.logger.info(`bizar: shutdownAll complete (${live.length} instances aborted)`);
606
+ }
607
+
608
+ // --- v0.3.0 stall and thinking-loop helpers ----------------------------
609
+
610
+ /**
611
+ * Mark an instance `failed` with the canonical stall message and
612
+ * fire-and-forget the opencode abort call. The stall timeout is
613
+ * intentionally short enough that an abort that fails gracefully
614
+ * (the serve child is dead, etc.) does not leave the user waiting.
615
+ */
616
+ private async _abortAsStalled(inst: BackgroundState): Promise<void> {
617
+ const lastEventAt = inst.lastEventAt ?? 0;
618
+ const sinceMs = Date.now() - lastEventAt;
619
+ this.logger.warn(
620
+ `bizar: instance ${inst.instanceId} stalled (no event for ${sinceMs}ms); aborting`,
621
+ );
622
+ // Fire-and-forget. If the serve child is dead, this returns a
623
+ // failure result but we still mark the instance failed in-memory.
624
+ this.http
625
+ .abortSession(inst.sessionId, this.worktree)
626
+ .catch(() => undefined);
627
+ await this.update(inst.instanceId, {
628
+ status: "failed",
629
+ error: `No activity for ${this.stallTimeoutMs}ms — LLM appears stalled`,
630
+ completedAt: Date.now(),
631
+ });
632
+ }
633
+
634
+ /**
635
+ * Send the research-intervention prompt to the running session. The
636
+ * message interrupts the current generation and starts a new turn
637
+ * with the prompt as the next user message. This is fire-and-forget:
638
+ * we do not wait for the prompt to complete, only for the HTTP call
639
+ * to return.
640
+ */
641
+ private async _sendIntervention(
642
+ inst: BackgroundState,
643
+ sinceMs: number,
644
+ ): Promise<void> {
645
+ const messageID = generateMessageId();
646
+ const prompt = researchInterventionPrompt(sinceMs);
647
+ const currentCount = inst.interventionCount ?? 0;
648
+ this.logger.warn(
649
+ `bizar: instance ${inst.instanceId} thinking loop (${sinceMs}ms without tool/text); sending intervention #${currentCount + 1}/${this.maxInterventions}`,
650
+ );
651
+ try {
652
+ await this.http.sendPrompt(
653
+ {
654
+ sessionId: inst.sessionId,
655
+ messageID,
656
+ agent: inst.agent,
657
+ parts: [{ type: "text", text: prompt }],
658
+ },
659
+ this.worktree,
660
+ );
661
+ } catch (err: unknown) {
662
+ // We swallow the error: the periodic checker will try again next
663
+ // tick. The intervention counter is still incremented below so
664
+ // we eventually escalate to an abort if the prompt keeps failing.
665
+ this.logger.warn(
666
+ `bizar: intervention prompt send failed for ${inst.instanceId}: ${
667
+ err instanceof Error ? err.message : String(err)
668
+ }`,
669
+ );
670
+ }
671
+ const reason = `thinking loop (${formatDuration(sinceMs)} without tool/text)`;
672
+ await this.update(inst.instanceId, {
673
+ interventionCount: currentCount + 1,
674
+ interventionAt: Date.now(),
675
+ interventionReason: reason,
676
+ // Bumping lastEventAt here is intentional: the intervention call
677
+ // counted as an HTTP-driven activity, so the stall checker does
678
+ // not fire immediately after.
679
+ lastEventAt: Date.now(),
680
+ });
681
+ }
682
+
683
+ /**
684
+ * Mark an instance `failed` with the canonical thinking-loop message
685
+ * and fire-and-forget the abort call.
686
+ */
687
+ private async _abortAsThinkingLoop(
688
+ inst: BackgroundState,
689
+ sinceMs: number,
690
+ ): Promise<void> {
691
+ this.logger.warn(
692
+ `bizar: instance ${inst.instanceId} thinking loop exhausted ${this.maxInterventions} intervention(s) over ${sinceMs}ms; aborting`,
693
+ );
694
+ this.http
695
+ .abortSession(inst.sessionId, this.worktree)
696
+ .catch(() => undefined);
697
+ await this.update(inst.instanceId, {
698
+ status: "failed",
699
+ error: `Thinking loop detected: ${formatDuration(sinceMs)} of thinking without tool calls or output. Spawn a Mimir agent for research.`,
700
+ completedAt: Date.now(),
701
+ });
702
+ }
703
+
704
+ // --- Internal: per-session event handler -------------------------------
705
+
706
+ public attachEventHandler(inst: BackgroundState): () => void {
707
+ const handler: SessionEventHandler = (ev: StreamEvent) => {
708
+ void this.handleInstanceEvent(inst.instanceId, ev);
709
+ };
710
+ const unsubscribe = this.stream.onSessionEvent(inst.sessionId, handler);
711
+ return unsubscribe;
712
+ }
713
+
714
+ private async handleInstanceEvent(
715
+ instanceId: string,
716
+ ev: StreamEvent,
717
+ ): Promise<void> {
718
+ const inst = this.instances.get(instanceId);
719
+ if (!inst) return;
720
+ // Already terminal — ignore further events (e.g., after kill, an
721
+ // EventSessionError may still arrive).
722
+ if (TERMINAL_STATUSES.has(inst.status)) return;
723
+
724
+ // v0.3.0 — every event advances the heartbeat. We do this BEFORE
725
+ // any further work (or inside the per-instance mutex in update())
726
+ // so the stall checker sees the freshest timestamp regardless of
727
+ // how the rest of the handler proceeds.
728
+ inst.lastEventAt = Date.now();
729
+
730
+ if (ev.type === "message.part.updated") {
731
+ await this.onPartUpdated(instanceId, ev);
732
+ } else if (ev.type === "session.idle") {
733
+ await this.update(instanceId, {
734
+ status: "done",
735
+ completedAt: Date.now(),
736
+ });
737
+ } else if (ev.type === "session.error") {
738
+ const errMsg = ev.error ?? "session error";
739
+ await this.update(instanceId, {
740
+ status: "failed",
741
+ error: errMsg,
742
+ completedAt: Date.now(),
743
+ });
744
+ }
745
+ }
746
+
747
+ private async onPartUpdated(
748
+ instanceId: string,
749
+ ev: Extract<StreamEvent, { type: "message.part.updated" }>,
750
+ ): Promise<void> {
751
+ const inst = this.instances.get(instanceId);
752
+ if (!inst) return;
753
+ const part = ev.part;
754
+
755
+ // v0.3.0 — tool and text parts advance the "progress" timestamp.
756
+ // `thinking` parts do NOT, because that is the loop indicator.
757
+ if (part.type === "tool" || part.type === "text") {
758
+ inst.lastToolOrTextAt = Date.now();
759
+ // The agent has shown concrete progress after one or more
760
+ // interventions — reset the intervention counter so the next
761
+ // thinking loop has a fresh budget. Clear the intervention
762
+ // metadata so a later status check does not show stale info.
763
+ if ((inst.interventionCount ?? 0) > 0) {
764
+ inst.interventionCount = 0;
765
+ delete inst.interventionAt;
766
+ delete inst.interventionReason;
767
+ }
768
+ }
769
+
770
+ // --- Tool-call cap (spec §6.2) ---
771
+ if (part.type === "tool") {
772
+ const nextCount = inst.toolCallCount + 1;
773
+ const patch: Partial<BackgroundState> = { toolCallCount: nextCount };
774
+ if (nextCount >= this.toolCallCap) {
775
+ // Abort and mark failed. Use a fire-and-forget abort because we
776
+ // do not want to block the handler on a network call.
777
+ this.http
778
+ .abortSession(inst.sessionId, this.worktree)
779
+ .catch(() => undefined);
780
+ patch.status = "failed";
781
+ patch.error = `Tool-call cap reached (${nextCount}). Aborted to prevent cost runaway.`;
782
+ patch.completedAt = Date.now();
783
+ }
784
+ await this.update(instanceId, patch);
785
+ if (patch.status === "failed") return;
786
+ }
787
+
788
+ // --- Loop-guard threshold-12 detection (spec §4.1) ---
789
+ if (part.type === "tool" && !inst.loopGuardTool) {
790
+ const errorText = readToolError(part);
791
+ if (errorText) {
792
+ const m = errorText.match(LOOP_GUARD_RE);
793
+ if (m && m[1]) {
794
+ const tool = m[1];
795
+ await this.update(instanceId, {
796
+ status: "failed",
797
+ error: `Loop protection: 12 identical calls to ${tool}`,
798
+ loopGuardTool: tool,
799
+ completedAt: Date.now(),
800
+ });
801
+ return;
802
+ }
803
+ }
804
+ }
805
+
806
+ // --- Text-part result preview refresh (spec §3.2) ---
807
+ if (part.type === "text" && typeof part.text === "string") {
808
+ const preview = part.text.slice(-RESULT_PREVIEW_MAX);
809
+ const newIds = [...(inst.resultMessageIds ?? []), ev.messageID];
810
+ // Deduplicate messageIDs.
811
+ const seen = new Set<string>();
812
+ const uniq = newIds.filter((id) => {
813
+ if (seen.has(id)) return false;
814
+ seen.add(id);
815
+ return true;
816
+ });
817
+ await this.update(instanceId, {
818
+ resultPreview: preview,
819
+ resultMessageIds: uniq,
820
+ });
821
+ }
822
+ }
823
+
824
+ /**
825
+ * Build the result text for `collect` per spec §4.4:
826
+ * - Fetch assistant messages via `GET /session/{id}/message`.
827
+ * - Concatenate `TextPart.text` in order; skip everything else.
828
+ * - If `loopGuardTool` is set, prepend the marker.
829
+ */
830
+ private async buildResultText(inst: BackgroundState): Promise<string> {
831
+ const res = await this.http.listMessages(inst.sessionId, this.worktree);
832
+ if (!res.ok) {
833
+ this.logger.warn(`bizar: collect: listMessages failed: ${res.error}`);
834
+ return inst.resultPreview ?? "";
835
+ }
836
+ const textParts: string[] = [];
837
+ for (const msg of res.value) {
838
+ if (msg.role !== "assistant") continue;
839
+ for (const p of msg.parts) {
840
+ if (p.type !== "text") continue;
841
+ if (typeof p.text === "string" && p.text.length > 0) {
842
+ textParts.push(p.text);
843
+ }
844
+ }
845
+ }
846
+ const body = textParts.join("");
847
+ if (inst.loopGuardTool) {
848
+ return `[loop guard: 12 identical calls to ${inst.loopGuardTool}]\n${body}`;
849
+ }
850
+ return body;
851
+ }
852
+ }
853
+
854
+ // --- Helpers --------------------------------------------------------------
855
+
856
+ /**
857
+ * Format a millisecond duration as `Xm Ys` (or just `Ys` if under a minute).
858
+ * Used in stall and thinking-loop error messages.
859
+ */
860
+ function formatDuration(ms: number): string {
861
+ const safeMs = Math.max(0, Math.floor(ms));
862
+ const minutes = Math.floor(safeMs / 60_000);
863
+ const seconds = Math.floor((safeMs % 60_000) / 1000);
864
+ return minutes > 0 ? `${minutes}m ${seconds}s` : `${seconds}s`;
865
+ }
866
+
867
+ /**
868
+ * Generate a unique instance id: `bgr_<22-char base32>` (ULID-like).
869
+ * We use 16 random bytes encoded as 22 base32 characters. The prefix
870
+ * `bgr_` makes the file naming scheme obvious.
871
+ */
872
+ export function generateInstanceId(): string {
873
+ const bytes = new Uint8Array(16);
874
+ globalThis.crypto.getRandomValues(bytes);
875
+ return `bgr_${base32(bytes)}`;
876
+ }
877
+
878
+ /**
879
+ * Crockford base32 (no I, L, O, U) encoder for 16 bytes → 26 chars.
880
+ * We use 16 bytes (128 bits) to give plenty of entropy; only the first
881
+ * 22 chars are used for the actual id and the last 4 are dropped.
882
+ */
883
+ function base32(bytes: Uint8Array): string {
884
+ const ALPH = "0123456789ABCDEFGHJKMNPQRSTVWXYZ";
885
+ // Encode 5 bytes → 8 chars; pad the last group with zeros.
886
+ let bits = 0;
887
+ let value = 0;
888
+ let out = "";
889
+ for (let i = 0; i < bytes.length; i++) {
890
+ value = (value << 8) | (bytes[i] ?? 0);
891
+ bits += 8;
892
+ while (bits >= 5) {
893
+ out += ALPH[(value >>> (bits - 5)) & 0x1f];
894
+ bits -= 5;
895
+ }
896
+ }
897
+ if (bits > 0) out += ALPH[(value << (5 - bits)) & 0x1f];
898
+ return out.slice(0, 22);
899
+ }
900
+
901
+ /**
902
+ * Generate a unique message id: `msg_<22-char base32>`. Same encoding
903
+ * as `generateInstanceId`. Used for `POST /session/{id}/prompt_async`.
904
+ */
905
+ export function generateMessageId(): string {
906
+ const bytes = new Uint8Array(16);
907
+ globalThis.crypto.getRandomValues(bytes);
908
+ return `msg_${base32(bytes)}`;
909
+ }
910
+
911
+ function toView(inst: BackgroundState): InstanceView {
912
+ const v: InstanceView = {
913
+ instanceId: inst.instanceId,
914
+ agent: inst.agent,
915
+ status: inst.status,
916
+ startedAt: inst.startedAt,
917
+ toolCallCount: inst.toolCallCount,
918
+ promptPreview: inst.promptPreview,
919
+ parentAgent: inst.parentAgent,
920
+ sessionId: inst.sessionId,
921
+ // v0.3.0 — stall and thinking-loop protection. Always include
922
+ // lastEventAt so a status caller can see how fresh the activity is.
923
+ lastEventAt: inst.lastEventAt,
924
+ };
925
+ if (inst.completedAt !== undefined) v.completedAt = inst.completedAt;
926
+ if (inst.resultPreview !== undefined) v.resultPreview = inst.resultPreview;
927
+ if (inst.error !== undefined) v.error = inst.error;
928
+ if (inst.parentInstanceId !== undefined) v.parentInstanceId = inst.parentInstanceId;
929
+ // Only surface intervention metadata when we have actually intervened.
930
+ // `interventionCount > 0` is the canonical signal; absent fields mean
931
+ // "no intervention has been sent yet", which is the common case.
932
+ const interventionCount = inst.interventionCount ?? 0;
933
+ if (interventionCount > 0) {
934
+ v.interventionCount = interventionCount;
935
+ if (inst.interventionAt !== undefined) v.interventionAt = inst.interventionAt;
936
+ if (inst.interventionReason !== undefined) v.interventionReason = inst.interventionReason;
937
+ }
938
+ return v;
939
+ }
940
+
941
+ /**
942
+ * Extract the canonical loop-guard error string from a tool part. The
943
+ * part may carry the error either on `part.error` or on
944
+ * `part.state.error` (per spec §4.1).
945
+ */
946
+ function readToolError(part: { error?: string; state?: { error?: string } }): string | null {
947
+ if (typeof part.error === "string" && part.error.length > 0) return part.error;
948
+ if (part.state && typeof part.state.error === "string" && part.state.error.length > 0) {
949
+ return part.state.error;
950
+ }
951
+ return null;
952
+ }
953
+
954
+ async function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
955
+ let timer: ReturnType<typeof setTimeout> | null = null;
956
+ const timeout = new Promise<never>((_, reject) => {
957
+ timer = setTimeout(() => reject(new Error(`timed out after ${ms}ms`)), ms);
958
+ });
959
+ try {
960
+ return await Promise.race([promise, timeout]);
961
+ } finally {
962
+ if (timer !== null) clearTimeout(timer);
963
+ }
964
+ }