pikiclaw 0.3.61 → 0.3.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -43,7 +43,7 @@ import { tmpdir } from 'node:os';
|
|
|
43
43
|
import { Q, agentLog, agentWarn, buildStreamPreviewMeta, computeContext, joinErrorMessages, emitSessionIdUpdate, normalizeClaudeModelId, pushRecentActivity, summarizeClaudeToolUse, summarizeClaudeToolResult, previewToolCallInput, previewToolCallResult, detectClaudeApiError, } from '../utils.js';
|
|
44
44
|
import { encodePathAsDirName, getHome, whichSync } from '../../core/platform.js';
|
|
45
45
|
import { stripAnsiEscapes } from '../../core/utils.js';
|
|
46
|
-
import { AGENT_STREAM_HARD_KILL_GRACE_MS } from '../../core/constants.js';
|
|
46
|
+
import { AGENT_STREAM_HARD_KILL_GRACE_MS, CLAUDE_TUI_STALL_QUIET_MS, CLAUDE_TUI_STALL_PENDING_TOOL_MS, } from '../../core/constants.js';
|
|
47
47
|
import { claudeParse, createClaudeStreamState, claudeContextWindowFromModel, claudeEffectiveContextWindow, registerClaudeBackgroundAgentLaunch, pendingClaudeBackgroundAgentCount, } from './claude.js';
|
|
48
48
|
async function loadPty() {
|
|
49
49
|
// Dynamic import keeps node-pty an optional dependency — if it's not
|
|
@@ -643,6 +643,26 @@ export function decideClaudeTuiStop(input) {
|
|
|
643
643
|
}
|
|
644
644
|
return 'terminate';
|
|
645
645
|
}
|
|
646
|
+
/**
|
|
647
|
+
* Decide whether the turn has gone dead. claude CLI is known to freeze
|
|
648
|
+
* mid-turn (observed 2026-06-02 on 2.1.160): after a tool_result lands the
|
|
649
|
+
* next assistant segment never starts — the process stays alive, the JSONL
|
|
650
|
+
* goes permanently quiet, no Stop hook ever fires, no error surfaces. Without
|
|
651
|
+
* a watchdog the IM card spins forever.
|
|
652
|
+
*
|
|
653
|
+
* `lastProgressAt` is the freshest of every live signal the driver tracks
|
|
654
|
+
* (main JSONL, hook tool events, sub-agent sidecars, hook lifecycle state).
|
|
655
|
+
* A pending tool (PreToolUse seen, no PostToolUse) extends the threshold:
|
|
656
|
+
* the freeze can also hit mid-execution, but a legitimately long foreground
|
|
657
|
+
* command must not get shot — claude's own Bash timeout fires PostToolUse
|
|
658
|
+
* well inside CLAUDE_TUI_STALL_PENDING_TOOL_MS.
|
|
659
|
+
*/
|
|
660
|
+
export function decideClaudeTuiStall(input) {
|
|
661
|
+
const threshold = input.pendingToolCount > 0
|
|
662
|
+
? (input.pendingToolMs ?? CLAUDE_TUI_STALL_PENDING_TOOL_MS)
|
|
663
|
+
: (input.quietMs ?? CLAUDE_TUI_STALL_QUIET_MS);
|
|
664
|
+
return input.now - input.lastProgressAt > threshold ? 'stall' : 'wait';
|
|
665
|
+
}
|
|
646
666
|
// ---------------------------------------------------------------------------
|
|
647
667
|
// Main entry
|
|
648
668
|
// ---------------------------------------------------------------------------
|
|
@@ -981,6 +1001,14 @@ export async function doClaudeTuiStream(opts) {
|
|
|
981
1001
|
// Last pending-background count we logged, so the waiting state logs on
|
|
982
1002
|
// transitions instead of every 200ms poll tick.
|
|
983
1003
|
let lastLoggedPendingBg = -1;
|
|
1004
|
+
// Stall-watchdog liveness signals. Together with lastMainJsonlEventAt they
|
|
1005
|
+
// answer "is the claude process still doing anything at all?" — see
|
|
1006
|
+
// decideClaudeTuiStall for why this exists (claude CLI mid-turn freeze).
|
|
1007
|
+
let lastToolEventAt = start;
|
|
1008
|
+
let lastSidecarEventAt = 0;
|
|
1009
|
+
let stallKilled = false;
|
|
1010
|
+
/** Hook-reported tools still executing: PreToolUse seen, no PostToolUse. */
|
|
1011
|
+
const pendingHookToolIds = new Set();
|
|
984
1012
|
// Append-only tool-events log fed by PreToolUse / PostToolUse hooks. We
|
|
985
1013
|
// tail it with the same incremental reader the JSONL transcript uses, so
|
|
986
1014
|
// tool calls + plan changes surface live during the turn even while the
|
|
@@ -1004,6 +1032,18 @@ export async function doClaudeTuiStream(opts) {
|
|
|
1004
1032
|
catch {
|
|
1005
1033
|
continue;
|
|
1006
1034
|
}
|
|
1035
|
+
// Stall-watchdog bookkeeping: any hook event is proof of life, and the
|
|
1036
|
+
// Pre/Post pairing tells the watchdog whether a tool is mid-execution
|
|
1037
|
+
// (which extends the stall threshold — long foreground commands are
|
|
1038
|
+
// legitimately silent).
|
|
1039
|
+
lastToolEventAt = Date.now();
|
|
1040
|
+
const hookToolId = typeof ev?.tool_use_id === 'string' ? ev.tool_use_id : '';
|
|
1041
|
+
if (hookToolId) {
|
|
1042
|
+
if (ev?.event === 'PreToolUse')
|
|
1043
|
+
pendingHookToolIds.add(hookToolId);
|
|
1044
|
+
else if (ev?.event === 'PostToolUse')
|
|
1045
|
+
pendingHookToolIds.delete(hookToolId);
|
|
1046
|
+
}
|
|
1007
1047
|
// A Task PreToolUse and the first sub-agent tool PreToolUse can land in
|
|
1008
1048
|
// the same tick batch. If the sub-agent's hook arrives before we've
|
|
1009
1049
|
// discovered its sidecar (and thus before s.subAgentIdToParent knows
|
|
@@ -1105,6 +1145,10 @@ export async function doClaudeTuiStream(opts) {
|
|
|
1105
1145
|
any = true;
|
|
1106
1146
|
}
|
|
1107
1147
|
}
|
|
1148
|
+
// Stall-watchdog: live sub-agents count as turn progress even while the
|
|
1149
|
+
// parent thread is quietly waiting on them.
|
|
1150
|
+
if (any)
|
|
1151
|
+
lastSidecarEventAt = Date.now();
|
|
1108
1152
|
return any;
|
|
1109
1153
|
};
|
|
1110
1154
|
const tick = () => {
|
|
@@ -1248,6 +1292,35 @@ export async function doClaudeTuiStream(opts) {
|
|
|
1248
1292
|
// Continue polling so any post-Stop JSONL writes still get parsed; the
|
|
1249
1293
|
// process will exit shortly and onExit will resolve the wait.
|
|
1250
1294
|
}
|
|
1295
|
+
// Stall watchdog. claude CLI can freeze mid-turn (observed on 2.1.160):
|
|
1296
|
+
// a tool_result lands, then the next assistant segment never starts — the
|
|
1297
|
+
// process stays alive, every signal goes quiet, no Stop hook ever fires.
|
|
1298
|
+
// When ALL liveness signals have been silent past the threshold, declare
|
|
1299
|
+
// the turn stalled and SIGTERM; doClaudeWithRetry auto-resumes the session
|
|
1300
|
+
// once so the turn continues instead of spinning forever in the IM card.
|
|
1301
|
+
if (!stopHookFired && !timedOut && !interrupted && !stallKilled) {
|
|
1302
|
+
const lastProgressAt = Math.max(start, lastMainJsonlEventAt, lastToolEventAt, lastSidecarEventAt, state.stoppedAt || 0, state.promptSubmittedAt || 0);
|
|
1303
|
+
const stallDecision = decideClaudeTuiStall({
|
|
1304
|
+
now: Date.now(),
|
|
1305
|
+
lastProgressAt,
|
|
1306
|
+
pendingToolCount: pendingHookToolIds.size,
|
|
1307
|
+
});
|
|
1308
|
+
if (stallDecision === 'stall') {
|
|
1309
|
+
stallKilled = true;
|
|
1310
|
+
const quietMin = Math.round((Date.now() - lastProgressAt) / 60_000);
|
|
1311
|
+
s.stopReason = 'stalled';
|
|
1312
|
+
if (!s.errors) {
|
|
1313
|
+
s.errors = [`Claude process went silent mid-turn for ${quietMin}m (no JSONL, hook, or sub-agent events) — known claude CLI freeze. Terminated for auto-resume.`];
|
|
1314
|
+
}
|
|
1315
|
+
agentWarn(`[claude-tui] stall detected: no progress for ${quietMin}m (pendingTools=${pendingHookToolIds.size}) — terminating TUI pid=${proc.pid} for auto-resume`);
|
|
1316
|
+
pushRecentActivity(s.recentActivity, `Agent stalled (${quietMin}m silent) — restarting turn`);
|
|
1317
|
+
s.activity = s.recentActivity.join('\n');
|
|
1318
|
+
emit();
|
|
1319
|
+
killProc('SIGTERM');
|
|
1320
|
+
// Keep polling: onExit resolves the wait and the final drains pick up
|
|
1321
|
+
// whatever the dying process flushes.
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1251
1324
|
pollHandle = setTimeout(tick, POLL_INTERVAL_MS);
|
|
1252
1325
|
};
|
|
1253
1326
|
pollHandle = setTimeout(tick, POLL_INTERVAL_MS);
|
|
@@ -2275,8 +2275,53 @@ function makeOverloadFriendlyResult(result, reason, attempts) {
|
|
|
2275
2275
|
* friendly human-readable explanation in `message` so the IM card doesn't
|
|
2276
2276
|
* dump raw "API Error: Overloaded" text on the user.
|
|
2277
2277
|
*/
|
|
2278
|
+
/**
|
|
2279
|
+
* Continuation prompt for stall recovery. The frozen process already accepted
|
|
2280
|
+
* and partially executed the user's prompt (it sits in the transcript), so the
|
|
2281
|
+
* resumed process must NOT receive the original prompt again — it gets an
|
|
2282
|
+
* explicit "pick up where you left off" instead.
|
|
2283
|
+
*/
|
|
2284
|
+
const CLAUDE_STALL_RESUME_PROMPT = '[pikiclaw] The previous agent process stalled mid-turn and was restarted. '
|
|
2285
|
+
+ 'Continue the task from where it left off — do not start over or repeat work that already completed.';
|
|
2286
|
+
/** At most one automatic resume per turn; a second stall surfaces to the user. */
|
|
2287
|
+
const CLAUDE_STALL_RESUME_LIMIT = 1;
|
|
2278
2288
|
async function doClaudeWithRetry(opts) {
|
|
2279
2289
|
let lastResult = await doClaudeStreamOnce(opts);
|
|
2290
|
+
// Mid-turn stall recovery. The TUI driver SIGTERMs a frozen claude process
|
|
2291
|
+
// (stopReason 'stalled' — see decideClaudeTuiStall in claude-tui.ts) instead
|
|
2292
|
+
// of letting the IM card spin forever. Resume the same session once with a
|
|
2293
|
+
// continuation prompt so the turn picks up where the frozen process died.
|
|
2294
|
+
let stallResumes = 0;
|
|
2295
|
+
while (lastResult.stopReason === 'stalled'
|
|
2296
|
+
&& stallResumes < CLAUDE_STALL_RESUME_LIMIT
|
|
2297
|
+
&& !opts.abortSignal?.aborted) {
|
|
2298
|
+
const stalledSessionId = lastResult.sessionId || opts.sessionId;
|
|
2299
|
+
if (!stalledSessionId)
|
|
2300
|
+
break;
|
|
2301
|
+
stallResumes++;
|
|
2302
|
+
agentWarn(`[claude] turn stalled mid-flight; auto-resuming session ${stalledSessionId.slice(0, 8)} (${stallResumes}/${CLAUDE_STALL_RESUME_LIMIT})`);
|
|
2303
|
+
lastResult = await doClaudeStreamOnce({
|
|
2304
|
+
...opts,
|
|
2305
|
+
sessionId: stalledSessionId,
|
|
2306
|
+
forkOf: undefined,
|
|
2307
|
+
prompt: CLAUDE_STALL_RESUME_PROMPT,
|
|
2308
|
+
attachments: undefined,
|
|
2309
|
+
});
|
|
2310
|
+
}
|
|
2311
|
+
if (lastResult.stopReason === 'stalled') {
|
|
2312
|
+
// Still stalled after the resume budget (or no session id to resume).
|
|
2313
|
+
// Surface a self-explanatory failure instead of the raw error text.
|
|
2314
|
+
return {
|
|
2315
|
+
...lastResult,
|
|
2316
|
+
ok: false,
|
|
2317
|
+
incomplete: true,
|
|
2318
|
+
message: [
|
|
2319
|
+
'The agent process stalled mid-turn and could not be auto-recovered (known claude CLI freeze, seen on 2.1.160).',
|
|
2320
|
+
'Your session is intact — re-send your message (or say "continue") to pick up where it stopped.',
|
|
2321
|
+
'If this keeps happening, pin the claude CLI to a known-good version: npm install -g @anthropic-ai/claude-code@2.1.159',
|
|
2322
|
+
].join(' '),
|
|
2323
|
+
};
|
|
2324
|
+
}
|
|
2280
2325
|
let attempts = 0;
|
|
2281
2326
|
// Use the error text recorded by detectClaudeApiError-driven branches to
|
|
2282
2327
|
// decide retry: lastResult.error is "Anthropic API error: <reason>" on
|
package/dist/core/constants.js
CHANGED
|
@@ -287,6 +287,25 @@ export const AGENT_STREAM_HARD_KILL_GRACE_MS = 10_000;
|
|
|
287
287
|
* resumed via --resume, can see it in the transcript.
|
|
288
288
|
*/
|
|
289
289
|
export const AGENT_GRACEFUL_ABORT_GRACE_MS = 2_000;
|
|
290
|
+
/**
|
|
291
|
+
* claude-tui stall watchdog — claude CLI is known to freeze mid-turn (observed
|
|
292
|
+
* 2026-06-02 on 2.1.160: after a tool_result lands, the next assistant segment
|
|
293
|
+
* never starts; the process stays alive, the JSONL goes permanently quiet, no
|
|
294
|
+
* Stop hook ever fires). When every live signal (main JSONL, hook tool events,
|
|
295
|
+
* sub-agent sidecars, hook lifecycle state) is silent past the threshold the
|
|
296
|
+
* driver SIGTERMs the PTY and the dispatch wrapper auto-resumes the session
|
|
297
|
+
* once. Quiet threshold must sit safely above the longest healthy gap between
|
|
298
|
+
* JSONL events — a single max-effort inference can take a few minutes before
|
|
299
|
+
* its first content block lands.
|
|
300
|
+
*/
|
|
301
|
+
export const CLAUDE_TUI_STALL_QUIET_MS = 10 * 60_000;
|
|
302
|
+
/**
|
|
303
|
+
* Stall threshold while a hook-reported tool is still executing (PreToolUse
|
|
304
|
+
* seen, no matching PostToolUse). Claude's own Bash timeout caps foreground
|
|
305
|
+
* commands at ~10 minutes and fires PostToolUse either way, so a pending tool
|
|
306
|
+
* silent for this long means the freeze hit mid-execution.
|
|
307
|
+
*/
|
|
308
|
+
export const CLAUDE_TUI_STALL_PENDING_TOOL_MS = 30 * 60_000;
|
|
290
309
|
/** Codex-specific grace period added to the user-configured timeout. */
|
|
291
310
|
export const CODEX_STREAM_HARD_KILL_GRACE_MS = 5_000;
|
|
292
311
|
/**
|
package/package.json
CHANGED