opencode-auto-resume 1.0.10 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/dist/index.js +60 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# opencode-auto-resume
|
|
2
2
|
|
|
3
|
-
**Plugin for [OpenCode](https://github.com/
|
|
3
|
+
**Plugin for [OpenCode](https://github.com/anomalyco/opencode) that automatically detects and recovers from LLM session failures — stalls, broken tool calls, hallucination loops, and stuck subagent parents. Fully silent, zero UI pollution.**
|
|
4
4
|
|
|
5
5
|
## What it does
|
|
6
6
|
|
|
@@ -9,32 +9,32 @@ LLM sessions fail in predictable ways. This plugin monitors all sessions and aut
|
|
|
9
9
|
**Stall recovery** — the stream goes silent but the session stays "busy". The UI shows a blinking cursor with no progress. If no events arrive for 48 seconds, the plugin sends `"continue"` with exponential backoff. After 3 failed attempts it gives up.
|
|
10
10
|
|
|
11
11
|
The plugin extracts the **agent, model, and provider** from the last session message, so it resumes with the exact same configuration the user was using (build, sisyphus, prometheus, etc.).
|
|
12
|
-
_( [#55](https://github.com/
|
|
12
|
+
_( [#55](https://github.com/anomalyco/opencode/issues/55), [#199](https://github.com/anomalyco/opencode/issues/199), [#283](https://github.com/anomalyco/opencode/issues/283) )_
|
|
13
13
|
|
|
14
14
|
**Tool calls as raw text** — the model prints tool invocations as raw XML (`<function=edit>...`) instead of executing them. The session goes idle normally but the tool was never run. On idle, the plugin fetches the last messages and scans for XML tool-call patterns (including truncated and alternative formats). If found, it sends a specific recovery prompt.
|
|
15
|
-
_( [#150](https://github.com/
|
|
15
|
+
_( [#150](https://github.com/anomalyco/opencode/issues/150), [#313](https://github.com/anomalyco/opencode/issues/313), [#353](https://github.com/anomalyco/opencode/issues/353) )_
|
|
16
16
|
|
|
17
17
|
**Hallucination loop** — the model generates the same broken output repeatedly. Each continue just picks up the broken generation. If a session needs 3+ continues within 10 minutes, the plugin aborts the request and sends `"continue"` fresh, forcing a clean restart.
|
|
18
|
-
_( [#283](https://github.com/
|
|
18
|
+
_( [#283](https://github.com/anomalyco/opencode/issues/283), [#353](https://github.com/anomalyco/opencode/issues/353) )_
|
|
19
19
|
|
|
20
20
|
**Orphan parent** — a subagent finishes but the parent session stays stuck as "busy" forever. The plugin detects when busyCount drops from >1 to 1, waits 15 seconds, then aborts and resumes the parent.
|
|
21
|
-
_( [#122](https://github.com/
|
|
21
|
+
_( [#122](https://github.com/anomalyco/opencode/issues/122), [#199](https://github.com/anomalyco/opencode/issues/199), [#246](https://github.com/anomalyco/opencode/issues/246) )_
|
|
22
22
|
|
|
23
23
|
**False positives during subagent work** — long tool execution or active subagents can look like a stall. Only the session emitting events gets its timer reset (not all sessions). When multiple sessions are busy, stall detection is paused entirely.
|
|
24
|
-
_( [#55](https://github.com/
|
|
24
|
+
_( [#55](https://github.com/anomalyco/opencode/issues/55), [#221](https://github.com/anomalyco/opencode/issues/221) )_
|
|
25
25
|
|
|
26
26
|
**ESC cancel** — user presses ESC to cancel a request. The plugin detects `MessageAbortedError` and marks all busy sessions as cancelled, never resuming them.
|
|
27
27
|
|
|
28
28
|
**Spurious error messages** — after normal completion, OpenCode sometimes fires a `session.error`. All logging goes through `ctx.client.app.log()` (zero `console.log`), and errors on already-idle sessions are silently ignored.
|
|
29
|
-
_( [#128](https://github.com/
|
|
29
|
+
_( [#128](https://github.com/anomalyco/opencode/pulls/128), [#22](https://github.com/anomalyco/opencode/pulls/22) )_
|
|
30
30
|
|
|
31
31
|
**Session discovery** — periodically calls `session.list()` to pick up sessions that were missed by event tracking. Idle sessions are cleaned up after 10 minutes to prevent memory leaks.
|
|
32
32
|
|
|
33
33
|
**Model preservation** — when resuming with "continue", the plugin extracts agent, model and provider from the last session message (not from `info` field) to preserve the user's UI selection.
|
|
34
|
-
_( [#111](https://github.com/
|
|
34
|
+
_( [#111](https://github.com/anomalyco/opencode/issues/111), [#277](https://github.com/anomalyco/opencode/issues/277) )_
|
|
35
35
|
|
|
36
36
|
**Subagent stuck detection** — detects when a subagent hasn't received new text for >1 minute (or >3 minutes if a tool call is in progress). If stuck, sends a recovery prompt before triggering abort+resume on the parent.
|
|
37
|
-
_( [#55](https://github.com/
|
|
37
|
+
_( [#55](https://github.com/anomalyco/opencode/issues/55), [#60](https://github.com/anomalyco/opencode/issues/60), [#246](https://github.com/anomalyco/opencode/issues/246) )_
|
|
38
38
|
|
|
39
39
|
## Architecture
|
|
40
40
|
|
package/dist/index.js
CHANGED
|
@@ -121,7 +121,10 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
121
121
|
toolTextAttempts: 0,
|
|
122
122
|
continueTimestamps: [],
|
|
123
123
|
idleSince: null,
|
|
124
|
-
continuing: false
|
|
124
|
+
continuing: false,
|
|
125
|
+
todos: [],
|
|
126
|
+
todoCheckAttempts: 0,
|
|
127
|
+
toolTextTimer: null
|
|
125
128
|
};
|
|
126
129
|
sessions.set(sid, w);
|
|
127
130
|
}
|
|
@@ -269,6 +272,11 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
269
272
|
}
|
|
270
273
|
} finally {
|
|
271
274
|
w.continuing = false;
|
|
275
|
+
w.todoCheckAttempts = 0;
|
|
276
|
+
if (w.toolTextTimer) {
|
|
277
|
+
clearTimeout(w.toolTextTimer);
|
|
278
|
+
w.toolTextTimer = null;
|
|
279
|
+
}
|
|
272
280
|
}
|
|
273
281
|
}
|
|
274
282
|
function extractMessages(response) {
|
|
@@ -347,6 +355,11 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
347
355
|
w.continueTimestamps = [];
|
|
348
356
|
w.idleSince = null;
|
|
349
357
|
w.continuing = false;
|
|
358
|
+
w.todoCheckAttempts = 0;
|
|
359
|
+
if (w.toolTextTimer) {
|
|
360
|
+
clearTimeout(w.toolTextTimer);
|
|
361
|
+
w.toolTextTimer = null;
|
|
362
|
+
}
|
|
350
363
|
}
|
|
351
364
|
function resetIdleFlags(w) {
|
|
352
365
|
w.userCancelled = false;
|
|
@@ -359,6 +372,8 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
359
372
|
return;
|
|
360
373
|
if (w.userCancelled || w.toolTextRecovered)
|
|
361
374
|
return;
|
|
375
|
+
if (w.status !== "idle")
|
|
376
|
+
return;
|
|
362
377
|
if (w.toolTextAttempts > 0) {
|
|
363
378
|
const elapsed = Date.now() - w.lastRetryAt;
|
|
364
379
|
const requiredBackoff = backoffMs(w.toolTextAttempts);
|
|
@@ -375,6 +390,7 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
375
390
|
const messages = extractMessages(response);
|
|
376
391
|
const recent = messages.slice(-3);
|
|
377
392
|
let bestCandidate = null;
|
|
393
|
+
let allAssistantText = "";
|
|
378
394
|
for (const msg of recent) {
|
|
379
395
|
const rawRole = msg.role ?? msg.info?.role;
|
|
380
396
|
if (rawRole !== "assistant")
|
|
@@ -394,6 +410,8 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
394
410
|
} else {
|
|
395
411
|
continue;
|
|
396
412
|
}
|
|
413
|
+
allAssistantText += text + `
|
|
414
|
+
`;
|
|
397
415
|
if (containsToolCallAsText(text)) {
|
|
398
416
|
const candidate = {
|
|
399
417
|
prompt: isReasoning ? THINKING_TOOL_RECOVERY_PROMPT : TOOL_TEXT_RECOVERY_PROMPT,
|
|
@@ -405,6 +423,25 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
405
423
|
}
|
|
406
424
|
}
|
|
407
425
|
if (containsReadyToContinuePattern(text)) {
|
|
426
|
+
const todos = w.todos || [];
|
|
427
|
+
const hasOpenTodos = todos.some((t) => t.status === "pending" || t.status === "in_progress");
|
|
428
|
+
if (!hasOpenTodos && todos.length > 0) {
|
|
429
|
+
w.todoCheckAttempts++;
|
|
430
|
+
if (w.todoCheckAttempts >= 2) {
|
|
431
|
+
await log("info", `${short(sid)} - todos completed but agent hasn't closed them. Sending reminder...`);
|
|
432
|
+
const candidate2 = {
|
|
433
|
+
prompt: "Please close all completed todos and finish your message.",
|
|
434
|
+
source: "todo-reminder",
|
|
435
|
+
priority: 1
|
|
436
|
+
};
|
|
437
|
+
if (!bestCandidate || candidate2.priority < bestCandidate.priority) {
|
|
438
|
+
bestCandidate = candidate2;
|
|
439
|
+
}
|
|
440
|
+
continue;
|
|
441
|
+
}
|
|
442
|
+
await log("info", `${short(sid)} - skipping continue, todos appear completed (attempt ${w.todoCheckAttempts}/2)`);
|
|
443
|
+
continue;
|
|
444
|
+
}
|
|
408
445
|
const candidate = {
|
|
409
446
|
prompt: "continue",
|
|
410
447
|
source: "ready-to-continue",
|
|
@@ -421,6 +458,11 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
421
458
|
w.toolTextRecovered = true;
|
|
422
459
|
w.toolTextAttempts++;
|
|
423
460
|
await log("info", `${bestCandidate.source} detected on ${short(sid)}! ` + `Attempt ${w.toolTextAttempts}/${maxRetries}. Sending recovery prompt...`);
|
|
461
|
+
const timeSinceActivity = Date.now() - w.lastActivityAt;
|
|
462
|
+
if (timeSinceActivity < TOOL_TEXT_CHECK_DELAY_MS) {
|
|
463
|
+
await log("info", `${short(sid)} - skipping ${bestCandidate.source}, session was active ${Math.round(timeSinceActivity / 1000)}s ago`);
|
|
464
|
+
return;
|
|
465
|
+
}
|
|
424
466
|
if (isHallucinationLoop(sid)) {
|
|
425
467
|
await log("warn", `Hallucination loop detected on ${short(sid)} \u2014 aborting instead`);
|
|
426
468
|
await tryAbortAndResume(sid, w);
|
|
@@ -640,7 +682,7 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
640
682
|
prevBusyCount = currentBusy;
|
|
641
683
|
log("debug", `${short(sid)} -> idle (${currentBusy})`);
|
|
642
684
|
if (!w.toolTextRecovered && w.toolTextAttempts < maxRetries) {
|
|
643
|
-
setTimeout(() => {
|
|
685
|
+
w.toolTextTimer = setTimeout(() => {
|
|
644
686
|
checkForToolCallAsText(sid, w);
|
|
645
687
|
}, TOOL_TEXT_CHECK_DELAY_MS);
|
|
646
688
|
}
|
|
@@ -670,13 +712,28 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
670
712
|
w.status = "idle";
|
|
671
713
|
resetIdleFlags(w);
|
|
672
714
|
if (!w.toolTextRecovered && w.toolTextAttempts < maxRetries) {
|
|
673
|
-
setTimeout(() => {
|
|
715
|
+
w.toolTextTimer = setTimeout(() => {
|
|
674
716
|
checkForToolCallAsText(sid, w);
|
|
675
717
|
}, TOOL_TEXT_CHECK_DELAY_MS);
|
|
676
718
|
}
|
|
677
719
|
}
|
|
678
720
|
break;
|
|
679
721
|
}
|
|
722
|
+
case "todo.updated": {
|
|
723
|
+
if (!sid)
|
|
724
|
+
break;
|
|
725
|
+
const props = ev.properties;
|
|
726
|
+
const todos = props?.todos ?? [];
|
|
727
|
+
const w = sessions.get(sid);
|
|
728
|
+
if (w) {
|
|
729
|
+
w.todos = todos.map((t) => ({
|
|
730
|
+
content: t.content ?? "",
|
|
731
|
+
status: t.status ?? "pending",
|
|
732
|
+
priority: t.priority ?? "medium"
|
|
733
|
+
}));
|
|
734
|
+
}
|
|
735
|
+
break;
|
|
736
|
+
}
|
|
680
737
|
case "session.error": {
|
|
681
738
|
const errorObj = getError(ev);
|
|
682
739
|
const errorName = errorObj?.name ?? "";
|
package/package.json
CHANGED