opencode-auto-resume 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -14
- package/dist/index.js +209 -12433
- package/package.json +8 -3
- package/src/index.ts +0 -770
- package/tsconfig.json +0 -14
package/src/index.ts
DELETED
|
@@ -1,770 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* OpenCode Auto-Resume Plugin
|
|
3
|
-
*
|
|
4
|
-
* Detects when an LLM session stalls mid-stream and automatically
|
|
5
|
-
* sends a continuation prompt.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import type { Plugin } from "@opencode-ai/plugin"
|
|
9
|
-
import { tool } from "@opencode-ai/plugin"
|
|
10
|
-
|
|
11
|
-
// ---------------------------------------------------------------------------
|
|
12
|
-
// Types
|
|
13
|
-
// ---------------------------------------------------------------------------
|
|
14
|
-
|
|
15
|
-
interface SessionWatch {
|
|
16
|
-
lastActivityAt: number
|
|
17
|
-
status: "busy" | "idle" | "retry" | "unknown"
|
|
18
|
-
userCancelled: boolean
|
|
19
|
-
resumeAttempts: number
|
|
20
|
-
lastRetryAt: number
|
|
21
|
-
gaveUp: boolean
|
|
22
|
-
orphanWatchStartAt: number | null
|
|
23
|
-
aborting: boolean
|
|
24
|
-
/** True if we already sent a tool-call-as-text recovery for this idle cycle. */
|
|
25
|
-
toolTextRecovered: boolean
|
|
26
|
-
/** Tool-text recovery attempt count (separate from stall retries). */
|
|
27
|
-
toolTextAttempts: number
|
|
28
|
-
/** Per-session hallucination loop timestamps. */
|
|
29
|
-
continueTimestamps: number[]
|
|
30
|
-
/** Timestamp when session was last marked idle (for cleanup). */
|
|
31
|
-
idleSince: number | null
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
// ---------------------------------------------------------------------------
|
|
35
|
-
// Defaults
|
|
36
|
-
// ---------------------------------------------------------------------------
|
|
37
|
-
|
|
38
|
-
const DEFAULT_CHUNK_TIMEOUT_MS = 45_000
|
|
39
|
-
const DEFAULT_CHECK_INTERVAL_MS = 5_000
|
|
40
|
-
const DEFAULT_GRACE_PERIOD_MS = 3_000
|
|
41
|
-
const DEFAULT_MAX_RETRIES = 3
|
|
42
|
-
const DEFAULT_MAX_BACKOFF_MS = 8_000
|
|
43
|
-
const DEFAULT_BASE_BACKOFF_MS = 1_000
|
|
44
|
-
const DEFAULT_SUBAGENT_WAIT_MS = 15_000
|
|
45
|
-
const ABORT_CONTINUE_DELAY_MS = 2_000
|
|
46
|
-
const DEFAULT_LOOP_MAX_CONTINUES = 3
|
|
47
|
-
const DEFAULT_LOOP_WINDOW_MS = 10 * 60_000
|
|
48
|
-
|
|
49
|
-
/** Delay after session goes idle before checking for tool-call-as-text. */
|
|
50
|
-
const TOOL_TEXT_CHECK_DELAY_MS = 1_500
|
|
51
|
-
|
|
52
|
-
/** Max idle sessions to keep in memory before cleanup. */
|
|
53
|
-
const MAX_IDLE_SESSIONS = 50
|
|
54
|
-
|
|
55
|
-
/** How long an idle session stays in memory before cleanup (10 min). */
|
|
56
|
-
const IDLE_CLEANUP_MS = 10 * 60_000
|
|
57
|
-
|
|
58
|
-
/** Interval for periodic session discovery via session.list(). */
|
|
59
|
-
const SESSION_DISCOVERY_INTERVAL_MS = 60_000
|
|
60
|
-
|
|
61
|
-
/** Specific recovery prompt for tool-call-as-text. */
|
|
62
|
-
const TOOL_TEXT_RECOVERY_PROMPT =
|
|
63
|
-
"Your last message contained a raw tool call printed as text instead of being executed. " +
|
|
64
|
-
"Please use the proper tool calling mechanism to execute it."
|
|
65
|
-
|
|
66
|
-
// ---------------------------------------------------------------------------
|
|
67
|
-
// Patterns that indicate a tool call was printed as text, not executed.
|
|
68
|
-
// v8.0: Expanded to cover truncated tags, alternative formats, and partial XML.
|
|
69
|
-
// ---------------------------------------------------------------------------
|
|
70
|
-
|
|
71
|
-
const TOOL_TEXT_PATTERNS = [
|
|
72
|
-
// Standard Anthropic-style function tags
|
|
73
|
-
/<function\s*=/i,
|
|
74
|
-
/<function>/i,
|
|
75
|
-
/<\/function>/i,
|
|
76
|
-
/<parameter\s*=/i,
|
|
77
|
-
/<parameter>/i,
|
|
78
|
-
/<\/parameter>/i,
|
|
79
|
-
// Alternative tool call formats
|
|
80
|
-
/<tool_call[\s>]/i,
|
|
81
|
-
/<\/tool_call>/i,
|
|
82
|
-
/<tool[\s_]name\s*=/i,
|
|
83
|
-
/<invoke\s+/i,
|
|
84
|
-
// Truncated/incomplete tags (generation cut off mid-tag)
|
|
85
|
-
/<func(?:t|ti|tio|tion)?$/im,
|
|
86
|
-
/<par(?:a|am|ame|amet|amete|ameter)?$/im,
|
|
87
|
-
// XML tool blocks with common tool names
|
|
88
|
-
/<(?:edit|write|read|bash|grep|glob|search|replace|execute|run)\s*(?:\s[^>]*)?\s*(?:\/>|>)/i,
|
|
89
|
-
]
|
|
90
|
-
|
|
91
|
-
/** Patterns for truncated XML (opened but never closed in the same text). */
|
|
92
|
-
const TRUNCATED_XML_PATTERNS = [
|
|
93
|
-
// Opening tag without matching close (within reasonable text length)
|
|
94
|
-
{ open: /<function[^>]*>/i, close: /<\/function>/i },
|
|
95
|
-
{ open: /<parameter[^>]*>/i, close: /<\/parameter>/i },
|
|
96
|
-
{ open: /<tool_call[^>]*>/i, close: /<\/tool_call>/i },
|
|
97
|
-
]
|
|
98
|
-
|
|
99
|
-
function containsToolCallAsText(text: string): boolean {
|
|
100
|
-
if (text.length <= 10) return false
|
|
101
|
-
|
|
102
|
-
// Check direct pattern matches
|
|
103
|
-
if (TOOL_TEXT_PATTERNS.some((pat) => pat.test(text))) return true
|
|
104
|
-
|
|
105
|
-
// Check for truncated XML: opening tag present but no closing tag
|
|
106
|
-
for (const { open, close } of TRUNCATED_XML_PATTERNS) {
|
|
107
|
-
if (open.test(text) && !close.test(text)) return true
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
return false
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
// ---------------------------------------------------------------------------
|
|
114
|
-
// Plugin
|
|
115
|
-
// ---------------------------------------------------------------------------
|
|
116
|
-
|
|
117
|
-
export const AutoResumePlugin: Plugin = async (ctx, options) => {
|
|
118
|
-
const chunkTimeoutMs: number =
|
|
119
|
-
(options?.chunkTimeoutMs as number) ?? DEFAULT_CHUNK_TIMEOUT_MS
|
|
120
|
-
const checkIntervalMs: number =
|
|
121
|
-
(options?.checkIntervalMs as number) ?? DEFAULT_CHECK_INTERVAL_MS
|
|
122
|
-
const gracePeriodMs: number =
|
|
123
|
-
(options?.gracePeriodMs as number) ?? DEFAULT_GRACE_PERIOD_MS
|
|
124
|
-
const maxRetries: number =
|
|
125
|
-
(options?.maxRetries as number) ?? DEFAULT_MAX_RETRIES
|
|
126
|
-
const maxBackoffMs: number =
|
|
127
|
-
(options?.maxBackoffMs as number) ?? DEFAULT_MAX_BACKOFF_MS
|
|
128
|
-
const baseBackoffMs: number =
|
|
129
|
-
(options?.baseBackoffMs as number) ?? DEFAULT_BASE_BACKOFF_MS
|
|
130
|
-
const subagentWaitMs: number =
|
|
131
|
-
(options?.subagentWaitMs as number) ?? DEFAULT_SUBAGENT_WAIT_MS
|
|
132
|
-
const loopMaxContinues: number =
|
|
133
|
-
(options?.loopMaxContinues as number) ?? DEFAULT_LOOP_MAX_CONTINUES
|
|
134
|
-
const loopWindowMs: number =
|
|
135
|
-
(options?.loopWindowMs as number) ?? DEFAULT_LOOP_WINDOW_MS
|
|
136
|
-
|
|
137
|
-
const sessions = new Map<string, SessionWatch>()
|
|
138
|
-
let timer: ReturnType<typeof setInterval> | null = null
|
|
139
|
-
let discoveryTimer: ReturnType<typeof setInterval> | null = null
|
|
140
|
-
let initialised = false
|
|
141
|
-
let prevBusyCount = 0
|
|
142
|
-
|
|
143
|
-
// -----------------------------------------------------------------
|
|
144
|
-
// Per-session hallucination loop detection (v8.0)
|
|
145
|
-
// -----------------------------------------------------------------
|
|
146
|
-
|
|
147
|
-
function recordContinue(sid: string): void {
|
|
148
|
-
const w = sessions.get(sid)
|
|
149
|
-
if (!w) return
|
|
150
|
-
w.continueTimestamps.push(Date.now())
|
|
151
|
-
const cutoff = Date.now() - loopWindowMs
|
|
152
|
-
while (w.continueTimestamps.length > 0 && w.continueTimestamps[0] < cutoff) {
|
|
153
|
-
w.continueTimestamps.shift()
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
function isHallucinationLoop(sid: string): boolean {
|
|
158
|
-
const w = sessions.get(sid)
|
|
159
|
-
if (!w) return false
|
|
160
|
-
recordContinue(sid)
|
|
161
|
-
return w.continueTimestamps.length >= loopMaxContinues
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
// -----------------------------------------------------------------------
|
|
165
|
-
// Logging
|
|
166
|
-
// -----------------------------------------------------------------------
|
|
167
|
-
|
|
168
|
-
async function log(level: "debug" | "info" | "warn" | "error", msg: string) {
|
|
169
|
-
try {
|
|
170
|
-
await ctx.client.app.log({
|
|
171
|
-
body: { service: "auto-resume", level, message: msg },
|
|
172
|
-
})
|
|
173
|
-
} catch {
|
|
174
|
-
// NEVER console.log
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
// -----------------------------------------------------------------------
|
|
179
|
-
// Helpers
|
|
180
|
-
// -----------------------------------------------------------------------
|
|
181
|
-
|
|
182
|
-
function ensureWatch(sid: string): SessionWatch {
|
|
183
|
-
let w = sessions.get(sid)
|
|
184
|
-
if (!w) {
|
|
185
|
-
w = {
|
|
186
|
-
lastActivityAt: Date.now(),
|
|
187
|
-
status: "unknown",
|
|
188
|
-
userCancelled: false,
|
|
189
|
-
resumeAttempts: 0,
|
|
190
|
-
lastRetryAt: 0,
|
|
191
|
-
gaveUp: false,
|
|
192
|
-
orphanWatchStartAt: null,
|
|
193
|
-
aborting: false,
|
|
194
|
-
toolTextRecovered: false,
|
|
195
|
-
toolTextAttempts: 0,
|
|
196
|
-
continueTimestamps: [],
|
|
197
|
-
idleSince: null,
|
|
198
|
-
}
|
|
199
|
-
sessions.set(sid, w)
|
|
200
|
-
}
|
|
201
|
-
return w
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
/** v8.0: Only touch the specific session that emitted the event.
|
|
205
|
-
* Previously this reset ALL busy sessions, masking real stalls
|
|
206
|
-
* when a subagent was active. */
|
|
207
|
-
function touchSession(sid: string) {
|
|
208
|
-
const w = sessions.get(sid)
|
|
209
|
-
if (w && w.status === "busy" && !w.userCancelled) {
|
|
210
|
-
w.lastActivityAt = Date.now()
|
|
211
|
-
// Don't reset resumeAttempts here — only reset on new busy status
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
function busyCount(): number {
|
|
216
|
-
let count = 0
|
|
217
|
-
for (const [, w] of sessions) {
|
|
218
|
-
if (w.status === "busy" && !w.userCancelled) count++
|
|
219
|
-
}
|
|
220
|
-
return count
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
function getLoneBusySession(): { sid: string; w: SessionWatch } | null {
|
|
224
|
-
let found: { sid: string; w: SessionWatch } | null = null
|
|
225
|
-
let count = 0
|
|
226
|
-
for (const [sid, w] of sessions) {
|
|
227
|
-
if (w.status === "busy" && !w.userCancelled) {
|
|
228
|
-
count++
|
|
229
|
-
found = { sid, w }
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
return count === 1 ? found : null
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
function getSid(ev: Record<string, unknown>): string | undefined {
|
|
236
|
-
const props = ev.properties as Record<string, unknown> | undefined
|
|
237
|
-
return (
|
|
238
|
-
(ev.sessionID as string | undefined) ??
|
|
239
|
-
(props?.sessionID as string | undefined) ??
|
|
240
|
-
((props?.part as Record<string, unknown>)?.sessionID as string | undefined) ??
|
|
241
|
-
((props?.info as Record<string, unknown>)?.sessionID as string | undefined)
|
|
242
|
-
)
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
function getError(ev: Record<string, unknown>): Record<string, unknown> | undefined {
|
|
246
|
-
const props = ev.properties as Record<string, unknown> | undefined
|
|
247
|
-
return (
|
|
248
|
-
(ev.error as Record<string, unknown> | undefined) ??
|
|
249
|
-
(props?.error as Record<string, unknown> | undefined)
|
|
250
|
-
)
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
function getStatus(ev: Record<string, unknown>): Record<string, unknown> | undefined {
|
|
254
|
-
const props = ev.properties as Record<string, unknown> | undefined
|
|
255
|
-
return (
|
|
256
|
-
(ev.status as Record<string, unknown> | undefined) ??
|
|
257
|
-
(props?.status as Record<string, unknown> | undefined)
|
|
258
|
-
)
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
function short(sid: string): string {
|
|
262
|
-
return sid.length > 12 ? `...${sid.slice(-8)}` : sid
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
function backoffMs(attempt: number): number {
|
|
266
|
-
return Math.min(baseBackoffMs * Math.pow(2, attempt - 1), maxBackoffMs)
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
/** v8.0: Clean up idle sessions that have been idle too long. */
|
|
270
|
-
function cleanupIdleSessions() {
|
|
271
|
-
const now = Date.now()
|
|
272
|
-
const toDelete: string[] = []
|
|
273
|
-
let idleCount = 0
|
|
274
|
-
|
|
275
|
-
for (const [sid, w] of sessions) {
|
|
276
|
-
if (w.status !== "busy") {
|
|
277
|
-
idleCount++
|
|
278
|
-
if (w.idleSince && (now - w.idleSince) > IDLE_CLEANUP_MS) {
|
|
279
|
-
toDelete.push(sid)
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
// Also prune if too many idle sessions
|
|
285
|
-
if (idleCount > MAX_IDLE_SESSIONS) {
|
|
286
|
-
const idleEntries: Array<{ sid: string; idleSince: number }> = []
|
|
287
|
-
for (const [sid, w] of sessions) {
|
|
288
|
-
if (w.status !== "busy" && w.idleSince) {
|
|
289
|
-
idleEntries.push({ sid, idleSince: w.idleSince })
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
idleEntries.sort((a, b) => a.idleSince - b.idleSince)
|
|
293
|
-
const excess = idleCount - MAX_IDLE_SESSIONS
|
|
294
|
-
for (let i = 0; i < excess && i < idleEntries.length; i++) {
|
|
295
|
-
if (!toDelete.includes(idleEntries[i].sid)) {
|
|
296
|
-
toDelete.push(idleEntries[i].sid)
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
for (const sid of toDelete) {
|
|
302
|
-
sessions.delete(sid)
|
|
303
|
-
}
|
|
304
|
-
if (toDelete.length > 0) {
|
|
305
|
-
log("debug", `Cleaned up ${toDelete.length} idle session(s). Map size: ${sessions.size}`)
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
// -----------------------------------------------------------------------
|
|
310
|
-
// Tool-call-as-text detection (v8.0: no busyCount guard, backoff,
|
|
311
|
-
// specific recovery prompt)
|
|
312
|
-
// -----------------------------------------------------------------------
|
|
313
|
-
|
|
314
|
-
async function checkForToolCallAsText(sid: string, w: SessionWatch) {
|
|
315
|
-
if (w.userCancelled || w.toolTextRecovered) return
|
|
316
|
-
|
|
317
|
-
// v8.0: Backoff for tool-text recovery
|
|
318
|
-
if (w.toolTextAttempts > 0) {
|
|
319
|
-
const elapsed = Date.now() - w.lastRetryAt
|
|
320
|
-
const requiredBackoff = backoffMs(w.toolTextAttempts)
|
|
321
|
-
if (elapsed < requiredBackoff) return
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
// v8.0: Cap tool-text attempts like regular retries
|
|
325
|
-
if (w.toolTextAttempts >= maxRetries) return
|
|
326
|
-
|
|
327
|
-
await log("debug", `${short(sid)} - checking for tool-call-as-text (attempt ${w.toolTextAttempts + 1})`)
|
|
328
|
-
|
|
329
|
-
try {
|
|
330
|
-
const response = await ctx.client.session.messages({
|
|
331
|
-
path: { id: sid },
|
|
332
|
-
})
|
|
333
|
-
|
|
334
|
-
const data = response as Record<string, unknown>
|
|
335
|
-
let messages: Array<Record<string, unknown>> = []
|
|
336
|
-
if (Array.isArray(data)) {
|
|
337
|
-
messages = data
|
|
338
|
-
} else if (Array.isArray(data.data)) {
|
|
339
|
-
messages = data.data
|
|
340
|
-
} else if (Array.isArray(data.messages)) {
|
|
341
|
-
messages = data.messages
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
const recent = messages.slice(-3)
|
|
345
|
-
|
|
346
|
-
for (const msg of recent) {
|
|
347
|
-
const role = msg.role as string | undefined
|
|
348
|
-
if (role !== "assistant") continue
|
|
349
|
-
|
|
350
|
-
const parts = msg.parts as Array<Record<string, unknown>> | undefined
|
|
351
|
-
if (!parts) continue
|
|
352
|
-
|
|
353
|
-
for (const part of parts) {
|
|
354
|
-
if (part.type !== "text") continue
|
|
355
|
-
const text = (part.text as string) ?? ""
|
|
356
|
-
if (containsToolCallAsText(text)) {
|
|
357
|
-
w.toolTextRecovered = true
|
|
358
|
-
w.toolTextAttempts++
|
|
359
|
-
await log(
|
|
360
|
-
"info",
|
|
361
|
-
`Tool-call-as-text detected on ${short(sid)}! ` +
|
|
362
|
-
`Attempt ${w.toolTextAttempts}/${maxRetries}. Sending recovery prompt...`
|
|
363
|
-
)
|
|
364
|
-
|
|
365
|
-
if (isHallucinationLoop(sid)) {
|
|
366
|
-
await log("warn", `Hallucination loop detected on ${short(sid)} — aborting instead`)
|
|
367
|
-
await tryAbortAndResume(sid, w)
|
|
368
|
-
} else {
|
|
369
|
-
try {
|
|
370
|
-
await ctx.client.session.prompt({
|
|
371
|
-
path: { id: sid },
|
|
372
|
-
body: { parts: [{ type: "text", text: TOOL_TEXT_RECOVERY_PROMPT }] },
|
|
373
|
-
})
|
|
374
|
-
recordContinue(sid)
|
|
375
|
-
w.lastRetryAt = Date.now()
|
|
376
|
-
await log("info", `${short(sid)} - tool-call-as-text recovery sent (attempt ${w.toolTextAttempts})`)
|
|
377
|
-
} catch (err) {
|
|
378
|
-
const errMsg = err instanceof Error ? err.message : String(err)
|
|
379
|
-
await log("warn", `${short(sid)} - tool-call-as-text recovery failed: ${errMsg}`)
|
|
380
|
-
}
|
|
381
|
-
}
|
|
382
|
-
return
|
|
383
|
-
}
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
|
-
} catch (err) {
|
|
387
|
-
const errMsg = err instanceof Error ? err.message : String(err)
|
|
388
|
-
log("debug", `${short(sid)} - could not fetch messages: ${errMsg}`)
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
// -----------------------------------------------------------------------
|
|
393
|
-
// Abort + Continue
|
|
394
|
-
// -----------------------------------------------------------------------
|
|
395
|
-
|
|
396
|
-
async function tryAbortAndResume(sid: string, w: SessionWatch): Promise<boolean> {
|
|
397
|
-
if (w.aborting) return false
|
|
398
|
-
w.aborting = true
|
|
399
|
-
|
|
400
|
-
const idleSec = Math.round((Date.now() - (w.orphanWatchStartAt ?? w.lastActivityAt)) / 1000)
|
|
401
|
-
await log("info", `Abort+Resume on ${short(sid)} (${idleSec}s idle). Aborting...`)
|
|
402
|
-
|
|
403
|
-
try {
|
|
404
|
-
await ctx.client.session.abort({ sessionID: sid })
|
|
405
|
-
await log("info", `${short(sid)} - abort OK`)
|
|
406
|
-
} catch (err) {
|
|
407
|
-
const errMsg = err instanceof Error ? err.message : String(err)
|
|
408
|
-
await log("warn", `${short(sid)} - abort failed: ${errMsg}`)
|
|
409
|
-
w.aborting = false
|
|
410
|
-
return false
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
await new Promise<void>((resolve) => setTimeout(resolve, ABORT_CONTINUE_DELAY_MS))
|
|
414
|
-
|
|
415
|
-
if (w.status === "busy") w.status = "idle"
|
|
416
|
-
|
|
417
|
-
try {
|
|
418
|
-
await ctx.client.session.prompt({
|
|
419
|
-
path: { id: sid },
|
|
420
|
-
body: { parts: [{ type: "text", text: "continue" }] },
|
|
421
|
-
})
|
|
422
|
-
recordContinue(sid)
|
|
423
|
-
await log("info", `${short(sid)} - abort+continue done`)
|
|
424
|
-
w.lastRetryAt = Date.now()
|
|
425
|
-
w.orphanWatchStartAt = null
|
|
426
|
-
w.resumeAttempts++
|
|
427
|
-
w.aborting = false
|
|
428
|
-
return true
|
|
429
|
-
} catch (err) {
|
|
430
|
-
const errMsg = err instanceof Error ? err.message : String(err)
|
|
431
|
-
await log("warn", `${short(sid)} - continue after abort failed: ${errMsg}`)
|
|
432
|
-
w.aborting = false
|
|
433
|
-
return false
|
|
434
|
-
}
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
// -----------------------------------------------------------------------
|
|
438
|
-
// Resume: normal stall
|
|
439
|
-
// -----------------------------------------------------------------------
|
|
440
|
-
|
|
441
|
-
async function tryResume(sid: string, w: SessionWatch, reason: string): Promise<boolean> {
|
|
442
|
-
const now = Date.now()
|
|
443
|
-
const elapsedSinceRetry = now - w.lastRetryAt
|
|
444
|
-
const requiredBackoff = backoffMs(w.resumeAttempts)
|
|
445
|
-
if (w.lastRetryAt > 0 && elapsedSinceRetry < requiredBackoff) return false
|
|
446
|
-
|
|
447
|
-
if (isHallucinationLoop(sid)) {
|
|
448
|
-
await log("warn", `Hallucination loop on ${short(sid)}! Aborting...`)
|
|
449
|
-
return await tryAbortAndResume(sid, w)
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
w.resumeAttempts++
|
|
453
|
-
const idleSec = Math.round((now - w.lastActivityAt) / 1000)
|
|
454
|
-
await log("info", `${reason} on ${short(sid)} (${idleSec}s, retry ${w.resumeAttempts}/${maxRetries})`)
|
|
455
|
-
|
|
456
|
-
try {
|
|
457
|
-
await ctx.client.session.prompt({
|
|
458
|
-
path: { id: sid },
|
|
459
|
-
body: { parts: [{ type: "text", text: "continue" }] },
|
|
460
|
-
})
|
|
461
|
-
recordContinue(sid)
|
|
462
|
-
await log("info", `${short(sid)} - retry sent`)
|
|
463
|
-
w.lastRetryAt = now
|
|
464
|
-
return true
|
|
465
|
-
} catch (err) {
|
|
466
|
-
const errMsg = err instanceof Error ? err.message : String(err)
|
|
467
|
-
await log("warn", `${short(sid)} - retry failed: ${errMsg}`)
|
|
468
|
-
w.lastRetryAt = now
|
|
469
|
-
return false
|
|
470
|
-
}
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
// -----------------------------------------------------------------------
|
|
474
|
-
// Session discovery (v8.0: periodic session.list() to find missed sessions)
|
|
475
|
-
// -----------------------------------------------------------------------
|
|
476
|
-
|
|
477
|
-
async function discoverSessions() {
|
|
478
|
-
try {
|
|
479
|
-
const response = await ctx.client.session.list()
|
|
480
|
-
const data = response as Record<string, unknown>
|
|
481
|
-
let list: Array<Record<string, unknown>> = []
|
|
482
|
-
if (Array.isArray(data)) {
|
|
483
|
-
list = data
|
|
484
|
-
} else if (Array.isArray(data.data)) {
|
|
485
|
-
list = data.data
|
|
486
|
-
}
|
|
487
|
-
|
|
488
|
-
for (const s of list) {
|
|
489
|
-
const sid = s.id as string
|
|
490
|
-
if (sid && !sessions.has(sid)) {
|
|
491
|
-
ensureWatch(sid)
|
|
492
|
-
const status = s.status as string | undefined
|
|
493
|
-
if (status) {
|
|
494
|
-
const w = sessions.get(sid)!
|
|
495
|
-
w.status = status as SessionWatch["status"]
|
|
496
|
-
if (status === "idle") w.idleSince = Date.now()
|
|
497
|
-
}
|
|
498
|
-
log("debug", `Discovered session ${short(sid)} via list()`)
|
|
499
|
-
}
|
|
500
|
-
}
|
|
501
|
-
} catch (err) {
|
|
502
|
-
const errMsg = err instanceof Error ? err.message : String(err)
|
|
503
|
-
log("debug", `Session discovery failed: ${errMsg}`)
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
// -----------------------------------------------------------------------
|
|
508
|
-
// Timer (v8.0: cleanup + session discovery)
|
|
509
|
-
// -----------------------------------------------------------------------
|
|
510
|
-
|
|
511
|
-
function startTimer() {
|
|
512
|
-
if (timer) return
|
|
513
|
-
timer = setInterval(() => {
|
|
514
|
-
const now = Date.now()
|
|
515
|
-
const numBusy = busyCount()
|
|
516
|
-
|
|
517
|
-
for (const [sid, w] of sessions) {
|
|
518
|
-
if (w.status !== "busy") continue
|
|
519
|
-
if (w.userCancelled) continue
|
|
520
|
-
if (w.aborting) continue
|
|
521
|
-
|
|
522
|
-
if (w.orphanWatchStartAt !== null) {
|
|
523
|
-
const orphanIdle = now - w.orphanWatchStartAt
|
|
524
|
-
if (orphanIdle >= subagentWaitMs + gracePeriodMs) {
|
|
525
|
-
if (w.resumeAttempts < maxRetries) {
|
|
526
|
-
tryAbortAndResume(sid, w)
|
|
527
|
-
} else if (!w.gaveUp) {
|
|
528
|
-
w.gaveUp = true
|
|
529
|
-
w.orphanWatchStartAt = null
|
|
530
|
-
w.aborting = false
|
|
531
|
-
log("warn", `${short(sid)} - orphan retries exhausted.`)
|
|
532
|
-
}
|
|
533
|
-
}
|
|
534
|
-
continue
|
|
535
|
-
}
|
|
536
|
-
|
|
537
|
-
if (numBusy > 1) continue
|
|
538
|
-
|
|
539
|
-
const idle = now - w.lastActivityAt
|
|
540
|
-
if (idle >= chunkTimeoutMs + gracePeriodMs) {
|
|
541
|
-
if (w.resumeAttempts < maxRetries) {
|
|
542
|
-
tryResume(sid, w, "Stream stall")
|
|
543
|
-
} else if (!w.gaveUp) {
|
|
544
|
-
w.gaveUp = true
|
|
545
|
-
log("warn", `${short(sid)} - all ${maxRetries} retries exhausted.`)
|
|
546
|
-
}
|
|
547
|
-
}
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
// v8.0: Periodic cleanup
|
|
551
|
-
cleanupIdleSessions()
|
|
552
|
-
}, checkIntervalMs)
|
|
553
|
-
|
|
554
|
-
if (timer.unref) timer.unref()
|
|
555
|
-
|
|
556
|
-
// v8.0: Periodic session discovery
|
|
557
|
-
discoveryTimer = setInterval(() => {
|
|
558
|
-
discoverSessions()
|
|
559
|
-
}, SESSION_DISCOVERY_INTERVAL_MS)
|
|
560
|
-
if (discoveryTimer.unref) discoveryTimer.unref()
|
|
561
|
-
|
|
562
|
-
// Run initial discovery after a short delay
|
|
563
|
-
setTimeout(discoverSessions, 5_000)
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
startTimer()
|
|
567
|
-
|
|
568
|
-
// -----------------------------------------------------------------------
|
|
569
|
-
// Event handler
|
|
570
|
-
// -----------------------------------------------------------------------
|
|
571
|
-
|
|
572
|
-
function handleEvent(ev: Record<string, unknown>) {
|
|
573
|
-
const type = ev.type as string
|
|
574
|
-
const sid = getSid(ev)
|
|
575
|
-
|
|
576
|
-
// v8.0: Only touch the session that emitted the event
|
|
577
|
-
if (sid) {
|
|
578
|
-
touchSession(sid)
|
|
579
|
-
}
|
|
580
|
-
|
|
581
|
-
switch (type) {
|
|
582
|
-
case "session.status": {
|
|
583
|
-
if (!sid) break
|
|
584
|
-
const status = getStatus(ev)
|
|
585
|
-
const statusType = (status?.type as string) ?? "unknown"
|
|
586
|
-
const w = ensureWatch(sid)
|
|
587
|
-
w.status = statusType as SessionWatch["status"]
|
|
588
|
-
|
|
589
|
-
if (statusType === "busy") {
|
|
590
|
-
w.lastActivityAt = Date.now()
|
|
591
|
-
w.userCancelled = false
|
|
592
|
-
w.resumeAttempts = 0
|
|
593
|
-
w.gaveUp = false
|
|
594
|
-
w.orphanWatchStartAt = null
|
|
595
|
-
w.aborting = false
|
|
596
|
-
w.toolTextRecovered = false
|
|
597
|
-
w.toolTextAttempts = 0
|
|
598
|
-
w.continueTimestamps = []
|
|
599
|
-
w.idleSince = null
|
|
600
|
-
log("debug", `${short(sid)} -> busy (${busyCount()})`)
|
|
601
|
-
} else if (statusType === "idle") {
|
|
602
|
-
w.status = "idle"
|
|
603
|
-
w.userCancelled = false
|
|
604
|
-
w.aborting = false
|
|
605
|
-
w.idleSince = Date.now()
|
|
606
|
-
|
|
607
|
-
const currentBusy = busyCount()
|
|
608
|
-
if (prevBusyCount > 1 && currentBusy === 1) {
|
|
609
|
-
const lone = getLoneBusySession()
|
|
610
|
-
if (lone && lone.w.orphanWatchStartAt === null) {
|
|
611
|
-
lone.w.orphanWatchStartAt = Date.now()
|
|
612
|
-
log("info", `Subagent finished, parent ${short(lone.sid)} stuck. Orphan watch (${subagentWaitMs / 1000}s).`)
|
|
613
|
-
}
|
|
614
|
-
}
|
|
615
|
-
prevBusyCount = currentBusy
|
|
616
|
-
log("debug", `${short(sid)} -> idle (${currentBusy})`)
|
|
617
|
-
|
|
618
|
-
// v8.0: TOOL-CALL-AS-TEXT CHECK — runs regardless of busyCount
|
|
619
|
-
if (!w.toolTextRecovered && w.toolTextAttempts < maxRetries) {
|
|
620
|
-
setTimeout(() => {
|
|
621
|
-
checkForToolCallAsText(sid, w)
|
|
622
|
-
}, TOOL_TEXT_CHECK_DELAY_MS)
|
|
623
|
-
}
|
|
624
|
-
} else if (statusType === "retry") {
|
|
625
|
-
touchSession(sid)
|
|
626
|
-
log("debug", `${short(sid)} -> provider retry`)
|
|
627
|
-
}
|
|
628
|
-
break
|
|
629
|
-
}
|
|
630
|
-
|
|
631
|
-
case "session.created": {
|
|
632
|
-
if (!sid) break
|
|
633
|
-
ensureWatch(sid)
|
|
634
|
-
log("debug", `New session: ${short(sid)} (${sessions.size})`)
|
|
635
|
-
break
|
|
636
|
-
}
|
|
637
|
-
|
|
638
|
-
case "session.updated": {
|
|
639
|
-
if (sid) ensureWatch(sid)
|
|
640
|
-
break
|
|
641
|
-
}
|
|
642
|
-
|
|
643
|
-
case "session.idle": {
|
|
644
|
-
if (!sid) break
|
|
645
|
-
const w = sessions.get(sid)
|
|
646
|
-
if (w) {
|
|
647
|
-
w.status = "idle"
|
|
648
|
-
w.userCancelled = false
|
|
649
|
-
w.orphanWatchStartAt = null
|
|
650
|
-
w.aborting = false
|
|
651
|
-
w.idleSince = Date.now()
|
|
652
|
-
|
|
653
|
-
// v8.0: Also check for tool-call-as-text on legacy idle event
|
|
654
|
-
if (!w.toolTextRecovered && w.toolTextAttempts < maxRetries) {
|
|
655
|
-
setTimeout(() => {
|
|
656
|
-
checkForToolCallAsText(sid, w)
|
|
657
|
-
}, TOOL_TEXT_CHECK_DELAY_MS)
|
|
658
|
-
}
|
|
659
|
-
}
|
|
660
|
-
break
|
|
661
|
-
}
|
|
662
|
-
|
|
663
|
-
case "session.error": {
|
|
664
|
-
const errorObj = getError(ev)
|
|
665
|
-
const errorName = (errorObj?.name as string) ?? ""
|
|
666
|
-
const isMessageAborted = errorName === "MessageAbortedError"
|
|
667
|
-
|
|
668
|
-
if (isMessageAborted) {
|
|
669
|
-
for (const [wSid, w] of sessions) {
|
|
670
|
-
if (w.status === "busy") {
|
|
671
|
-
w.userCancelled = true
|
|
672
|
-
w.status = "idle"
|
|
673
|
-
w.orphanWatchStartAt = null
|
|
674
|
-
w.aborting = false
|
|
675
|
-
w.idleSince = Date.now()
|
|
676
|
-
}
|
|
677
|
-
}
|
|
678
|
-
log("info", "User abort (ESC)")
|
|
679
|
-
break
|
|
680
|
-
}
|
|
681
|
-
|
|
682
|
-
if (busyCount() === 0) break
|
|
683
|
-
|
|
684
|
-
const errorMessage =
|
|
685
|
-
(errorObj?.data as Record<string, unknown>)?.message as string | undefined ??
|
|
686
|
-
String(errorObj?.data ?? "")
|
|
687
|
-
log("debug", `Session error: ${errorName} - ${errorMessage}`)
|
|
688
|
-
break
|
|
689
|
-
}
|
|
690
|
-
|
|
691
|
-
case "command.executed": {
|
|
692
|
-
for (const [, w] of sessions) {
|
|
693
|
-
w.userCancelled = false
|
|
694
|
-
w.resumeAttempts = 0
|
|
695
|
-
w.gaveUp = false
|
|
696
|
-
w.orphanWatchStartAt = null
|
|
697
|
-
w.aborting = false
|
|
698
|
-
w.toolTextRecovered = false
|
|
699
|
-
w.toolTextAttempts = 0
|
|
700
|
-
}
|
|
701
|
-
break
|
|
702
|
-
}
|
|
703
|
-
}
|
|
704
|
-
}
|
|
705
|
-
|
|
706
|
-
// -----------------------------------------------------------------------
|
|
707
|
-
// Returned hooks
|
|
708
|
-
// -----------------------------------------------------------------------
|
|
709
|
-
|
|
710
|
-
return {
|
|
711
|
-
event: async ({ event }) => {
|
|
712
|
-
if (!initialised) {
|
|
713
|
-
initialised = true
|
|
714
|
-
log("info", `v8.0 ready. timeout=${chunkTimeoutMs}ms, orphan=${subagentWaitMs}ms, loop=${loopMaxContinues}x/${loopWindowMs / 1000}s`)
|
|
715
|
-
}
|
|
716
|
-
handleEvent(event as Record<string, unknown>)
|
|
717
|
-
},
|
|
718
|
-
|
|
719
|
-
config: async () => {
|
|
720
|
-
log("info", `v8.0 config OK`)
|
|
721
|
-
},
|
|
722
|
-
|
|
723
|
-
tool: {
|
|
724
|
-
resume: tool({
|
|
725
|
-
description: "Manually resume a stalled LLM session.",
|
|
726
|
-
args: {
|
|
727
|
-
prompt: tool.schema.string().optional().describe("Continuation prompt. Defaults to 'continue'."),
|
|
728
|
-
session_id: tool.schema.string().optional().describe("Target session ID."),
|
|
729
|
-
},
|
|
730
|
-
async execute(args, toolCtx) {
|
|
731
|
-
let targetSid = (args.session_id as string) ?? toolCtx.sessionID
|
|
732
|
-
|
|
733
|
-
if (!targetSid) {
|
|
734
|
-
let orphan: { sid: string; w: SessionWatch } | null = null
|
|
735
|
-
let best: { sid: string; last: number } | null = null
|
|
736
|
-
for (const [sid, w] of sessions) {
|
|
737
|
-
if (w.status === "busy") {
|
|
738
|
-
if (w.orphanWatchStartAt !== null && !orphan) orphan = { sid, w }
|
|
739
|
-
if (w.lastActivityAt > 0 && (!best || w.lastActivityAt > best.last)) {
|
|
740
|
-
best = { sid, last: w.lastActivityAt }
|
|
741
|
-
}
|
|
742
|
-
}
|
|
743
|
-
}
|
|
744
|
-
targetSid = orphan?.sid ?? best?.sid
|
|
745
|
-
if (!targetSid) return "No active stalled session found."
|
|
746
|
-
}
|
|
747
|
-
|
|
748
|
-
const w = sessions.get(targetSid)
|
|
749
|
-
const text = (args.prompt as string) ?? "continue"
|
|
750
|
-
log("info", `Manual resume on ${short(targetSid)}: "${text}"`)
|
|
751
|
-
|
|
752
|
-
try {
|
|
753
|
-
await ctx.client.session.prompt({
|
|
754
|
-
path: { id: targetSid },
|
|
755
|
-
body: { agent: toolCtx.agent, parts: [{ type: "text", text }] },
|
|
756
|
-
})
|
|
757
|
-
recordContinue(targetSid)
|
|
758
|
-
if (w) { w.orphanWatchStartAt = null; w.resumeAttempts = 0; w.toolTextRecovered = false; w.toolTextAttempts = 0 }
|
|
759
|
-
return `Resume sent to ${short(targetSid)}: "${text}"`
|
|
760
|
-
} catch (err) {
|
|
761
|
-
const msg = err instanceof Error ? err.message : String(err)
|
|
762
|
-
return `Failed: ${msg}`
|
|
763
|
-
}
|
|
764
|
-
},
|
|
765
|
-
}),
|
|
766
|
-
},
|
|
767
|
-
}
|
|
768
|
-
}
|
|
769
|
-
|
|
770
|
-
export default AutoResumePlugin
|