opencode-auto-resume 1.0.9 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/dist/index.js +67 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# opencode-auto-resume
|
|
2
2
|
|
|
3
|
-
**Plugin for [OpenCode](https://github.com/
|
|
3
|
+
**Plugin for [OpenCode](https://github.com/anomalyco/opencode) that automatically detects and recovers from LLM session failures — stalls, broken tool calls, hallucination loops, and stuck subagent parents. Fully silent, zero UI pollution.**
|
|
4
4
|
|
|
5
5
|
## What it does
|
|
6
6
|
|
|
@@ -9,32 +9,32 @@ LLM sessions fail in predictable ways. This plugin monitors all sessions and aut
|
|
|
9
9
|
**Stall recovery** — the stream goes silent but the session stays "busy". The UI shows a blinking cursor with no progress. If no events arrive for 48 seconds, the plugin sends `"continue"` with exponential backoff. After 3 failed attempts it gives up.
|
|
10
10
|
|
|
11
11
|
The plugin extracts the **agent, model, and provider** from the last session message, so it resumes with the exact same configuration the user was using (build, sisyphus, prometheus, etc.).
|
|
12
|
-
_( [#55](https://github.com/
|
|
12
|
+
_( [#55](https://github.com/anomalyco/opencode/issues/55), [#199](https://github.com/anomalyco/opencode/issues/199), [#283](https://github.com/anomalyco/opencode/issues/283) )_
|
|
13
13
|
|
|
14
14
|
**Tool calls as raw text** — the model prints tool invocations as raw XML (`<function=edit>...`) instead of executing them. The session goes idle normally but the tool was never run. On idle, the plugin fetches the last messages and scans for XML tool-call patterns (including truncated and alternative formats). If found, it sends a specific recovery prompt.
|
|
15
|
-
_( [#150](https://github.com/
|
|
15
|
+
_( [#150](https://github.com/anomalyco/opencode/issues/150), [#313](https://github.com/anomalyco/opencode/issues/313), [#353](https://github.com/anomalyco/opencode/issues/353) )_
|
|
16
16
|
|
|
17
17
|
**Hallucination loop** — the model generates the same broken output repeatedly. Each continue just picks up the broken generation. If a session needs 3+ continues within 10 minutes, the plugin aborts the request and sends `"continue"` fresh, forcing a clean restart.
|
|
18
|
-
_( [#283](https://github.com/
|
|
18
|
+
_( [#283](https://github.com/anomalyco/opencode/issues/283), [#353](https://github.com/anomalyco/opencode/issues/353) )_
|
|
19
19
|
|
|
20
20
|
**Orphan parent** — a subagent finishes but the parent session stays stuck as "busy" forever. The plugin detects when busyCount drops from >1 to 1, waits 15 seconds, then aborts and resumes the parent.
|
|
21
|
-
_( [#122](https://github.com/
|
|
21
|
+
_( [#122](https://github.com/anomalyco/opencode/issues/122), [#199](https://github.com/anomalyco/opencode/issues/199), [#246](https://github.com/anomalyco/opencode/issues/246) )_
|
|
22
22
|
|
|
23
23
|
**False positives during subagent work** — long tool execution or active subagents can look like a stall. Only the session emitting events gets its timer reset (not all sessions). When multiple sessions are busy, stall detection is paused entirely.
|
|
24
|
-
_( [#55](https://github.com/
|
|
24
|
+
_( [#55](https://github.com/anomalyco/opencode/issues/55), [#221](https://github.com/anomalyco/opencode/issues/221) )_
|
|
25
25
|
|
|
26
26
|
**ESC cancel** — user presses ESC to cancel a request. The plugin detects `MessageAbortedError` and marks all busy sessions as cancelled, never resuming them.
|
|
27
27
|
|
|
28
28
|
**Spurious error messages** — after normal completion, OpenCode sometimes fires a `session.error`. All logging goes through `ctx.client.app.log()` (zero `console.log`), and errors on already-idle sessions are silently ignored.
|
|
29
|
-
_( [#128](https://github.com/
|
|
29
|
+
_( [#128](https://github.com/anomalyco/opencode/pulls/128), [#22](https://github.com/anomalyco/opencode/pulls/22) )_
|
|
30
30
|
|
|
31
31
|
**Session discovery** — periodically calls `session.list()` to pick up sessions that were missed by event tracking. Idle sessions are cleaned up after 10 minutes to prevent memory leaks.
|
|
32
32
|
|
|
33
33
|
**Model preservation** — when resuming with "continue", the plugin extracts agent, model and provider from the last session message (not from `info` field) to preserve the user's UI selection.
|
|
34
|
-
_( [#111](https://github.com/
|
|
34
|
+
_( [#111](https://github.com/anomalyco/opencode/issues/111), [#277](https://github.com/anomalyco/opencode/issues/277) )_
|
|
35
35
|
|
|
36
36
|
**Subagent stuck detection** — detects when a subagent hasn't received new text for >1 minute (or >3 minutes if a tool call is in progress). If stuck, sends a recovery prompt before triggering abort+resume on the parent.
|
|
37
|
-
_( [#55](https://github.com/
|
|
37
|
+
_( [#55](https://github.com/anomalyco/opencode/issues/55), [#60](https://github.com/anomalyco/opencode/issues/60), [#246](https://github.com/anomalyco/opencode/issues/246) )_
|
|
38
38
|
|
|
39
39
|
## Architecture
|
|
40
40
|
|
package/dist/index.js
CHANGED
|
@@ -121,7 +121,10 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
121
121
|
toolTextAttempts: 0,
|
|
122
122
|
continueTimestamps: [],
|
|
123
123
|
idleSince: null,
|
|
124
|
-
continuing: false
|
|
124
|
+
continuing: false,
|
|
125
|
+
todos: [],
|
|
126
|
+
todoCheckAttempts: 0,
|
|
127
|
+
toolTextTimer: null
|
|
125
128
|
};
|
|
126
129
|
sessions.set(sid, w);
|
|
127
130
|
}
|
|
@@ -220,8 +223,14 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
220
223
|
const role = msg.role ?? msg.info?.role;
|
|
221
224
|
if (role === "user") {
|
|
222
225
|
const rawAgent = msg.agent;
|
|
223
|
-
if (typeof rawAgent === "string")
|
|
226
|
+
if (typeof rawAgent === "string") {
|
|
224
227
|
agent2 = rawAgent;
|
|
228
|
+
} else {
|
|
229
|
+
const fallbackAgent = msg.info?.agent;
|
|
230
|
+
if (typeof fallbackAgent === "string") {
|
|
231
|
+
agent2 = fallbackAgent;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
225
234
|
let rawModel = msg.model;
|
|
226
235
|
if (!rawModel) {
|
|
227
236
|
rawModel = msg.info?.model;
|
|
@@ -263,6 +272,11 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
263
272
|
}
|
|
264
273
|
} finally {
|
|
265
274
|
w.continuing = false;
|
|
275
|
+
w.todoCheckAttempts = 0;
|
|
276
|
+
if (w.toolTextTimer) {
|
|
277
|
+
clearTimeout(w.toolTextTimer);
|
|
278
|
+
w.toolTextTimer = null;
|
|
279
|
+
}
|
|
266
280
|
}
|
|
267
281
|
}
|
|
268
282
|
function extractMessages(response) {
|
|
@@ -341,6 +355,11 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
341
355
|
w.continueTimestamps = [];
|
|
342
356
|
w.idleSince = null;
|
|
343
357
|
w.continuing = false;
|
|
358
|
+
w.todoCheckAttempts = 0;
|
|
359
|
+
if (w.toolTextTimer) {
|
|
360
|
+
clearTimeout(w.toolTextTimer);
|
|
361
|
+
w.toolTextTimer = null;
|
|
362
|
+
}
|
|
344
363
|
}
|
|
345
364
|
function resetIdleFlags(w) {
|
|
346
365
|
w.userCancelled = false;
|
|
@@ -353,6 +372,8 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
353
372
|
return;
|
|
354
373
|
if (w.userCancelled || w.toolTextRecovered)
|
|
355
374
|
return;
|
|
375
|
+
if (w.status !== "idle")
|
|
376
|
+
return;
|
|
356
377
|
if (w.toolTextAttempts > 0) {
|
|
357
378
|
const elapsed = Date.now() - w.lastRetryAt;
|
|
358
379
|
const requiredBackoff = backoffMs(w.toolTextAttempts);
|
|
@@ -369,6 +390,7 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
369
390
|
const messages = extractMessages(response);
|
|
370
391
|
const recent = messages.slice(-3);
|
|
371
392
|
let bestCandidate = null;
|
|
393
|
+
let allAssistantText = "";
|
|
372
394
|
for (const msg of recent) {
|
|
373
395
|
const rawRole = msg.role ?? msg.info?.role;
|
|
374
396
|
if (rawRole !== "assistant")
|
|
@@ -388,6 +410,8 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
388
410
|
} else {
|
|
389
411
|
continue;
|
|
390
412
|
}
|
|
413
|
+
allAssistantText += text + `
|
|
414
|
+
`;
|
|
391
415
|
if (containsToolCallAsText(text)) {
|
|
392
416
|
const candidate = {
|
|
393
417
|
prompt: isReasoning ? THINKING_TOOL_RECOVERY_PROMPT : TOOL_TEXT_RECOVERY_PROMPT,
|
|
@@ -399,6 +423,25 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
399
423
|
}
|
|
400
424
|
}
|
|
401
425
|
if (containsReadyToContinuePattern(text)) {
|
|
426
|
+
const todos = w.todos || [];
|
|
427
|
+
const hasOpenTodos = todos.some((t) => t.status === "pending" || t.status === "in_progress");
|
|
428
|
+
if (!hasOpenTodos && todos.length > 0) {
|
|
429
|
+
w.todoCheckAttempts++;
|
|
430
|
+
if (w.todoCheckAttempts >= 2) {
|
|
431
|
+
await log("info", `${short(sid)} - todos completed but agent hasn't closed them. Sending reminder...`);
|
|
432
|
+
const candidate2 = {
|
|
433
|
+
prompt: "Please close all completed todos and finish your message.",
|
|
434
|
+
source: "todo-reminder",
|
|
435
|
+
priority: 1
|
|
436
|
+
};
|
|
437
|
+
if (!bestCandidate || candidate2.priority < bestCandidate.priority) {
|
|
438
|
+
bestCandidate = candidate2;
|
|
439
|
+
}
|
|
440
|
+
continue;
|
|
441
|
+
}
|
|
442
|
+
await log("info", `${short(sid)} - skipping continue, todos appear completed (attempt ${w.todoCheckAttempts}/2)`);
|
|
443
|
+
continue;
|
|
444
|
+
}
|
|
402
445
|
const candidate = {
|
|
403
446
|
prompt: "continue",
|
|
404
447
|
source: "ready-to-continue",
|
|
@@ -415,6 +458,11 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
415
458
|
w.toolTextRecovered = true;
|
|
416
459
|
w.toolTextAttempts++;
|
|
417
460
|
await log("info", `${bestCandidate.source} detected on ${short(sid)}! ` + `Attempt ${w.toolTextAttempts}/${maxRetries}. Sending recovery prompt...`);
|
|
461
|
+
const timeSinceActivity = Date.now() - w.lastActivityAt;
|
|
462
|
+
if (timeSinceActivity < TOOL_TEXT_CHECK_DELAY_MS) {
|
|
463
|
+
await log("info", `${short(sid)} - skipping ${bestCandidate.source}, session was active ${Math.round(timeSinceActivity / 1000)}s ago`);
|
|
464
|
+
return;
|
|
465
|
+
}
|
|
418
466
|
if (isHallucinationLoop(sid)) {
|
|
419
467
|
await log("warn", `Hallucination loop detected on ${short(sid)} \u2014 aborting instead`);
|
|
420
468
|
await tryAbortAndResume(sid, w);
|
|
@@ -634,7 +682,7 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
634
682
|
prevBusyCount = currentBusy;
|
|
635
683
|
log("debug", `${short(sid)} -> idle (${currentBusy})`);
|
|
636
684
|
if (!w.toolTextRecovered && w.toolTextAttempts < maxRetries) {
|
|
637
|
-
setTimeout(() => {
|
|
685
|
+
w.toolTextTimer = setTimeout(() => {
|
|
638
686
|
checkForToolCallAsText(sid, w);
|
|
639
687
|
}, TOOL_TEXT_CHECK_DELAY_MS);
|
|
640
688
|
}
|
|
@@ -664,13 +712,28 @@ var AutoResumePlugin = async (ctx, options) => {
|
|
|
664
712
|
w.status = "idle";
|
|
665
713
|
resetIdleFlags(w);
|
|
666
714
|
if (!w.toolTextRecovered && w.toolTextAttempts < maxRetries) {
|
|
667
|
-
setTimeout(() => {
|
|
715
|
+
w.toolTextTimer = setTimeout(() => {
|
|
668
716
|
checkForToolCallAsText(sid, w);
|
|
669
717
|
}, TOOL_TEXT_CHECK_DELAY_MS);
|
|
670
718
|
}
|
|
671
719
|
}
|
|
672
720
|
break;
|
|
673
721
|
}
|
|
722
|
+
case "todo.updated": {
|
|
723
|
+
if (!sid)
|
|
724
|
+
break;
|
|
725
|
+
const props = ev.properties;
|
|
726
|
+
const todos = props?.todos ?? [];
|
|
727
|
+
const w = sessions.get(sid);
|
|
728
|
+
if (w) {
|
|
729
|
+
w.todos = todos.map((t) => ({
|
|
730
|
+
content: t.content ?? "",
|
|
731
|
+
status: t.status ?? "pending",
|
|
732
|
+
priority: t.priority ?? "medium"
|
|
733
|
+
}));
|
|
734
|
+
}
|
|
735
|
+
break;
|
|
736
|
+
}
|
|
674
737
|
case "session.error": {
|
|
675
738
|
const errorObj = getError(ev);
|
|
676
739
|
const errorName = errorObj?.name ?? "";
|
package/package.json
CHANGED