pikiclaw 0.3.61 → 0.3.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/drivers/claude-tui.js +161 -3
- package/dist/agent/drivers/claude.js +128 -1
- package/dist/core/constants.js +42 -0
- package/package.json +1 -1
|
@@ -43,8 +43,8 @@ import { tmpdir } from 'node:os';
|
|
|
43
43
|
import { Q, agentLog, agentWarn, buildStreamPreviewMeta, computeContext, joinErrorMessages, emitSessionIdUpdate, normalizeClaudeModelId, pushRecentActivity, summarizeClaudeToolUse, summarizeClaudeToolResult, previewToolCallInput, previewToolCallResult, detectClaudeApiError, } from '../utils.js';
|
|
44
44
|
import { encodePathAsDirName, getHome, whichSync } from '../../core/platform.js';
|
|
45
45
|
import { stripAnsiEscapes } from '../../core/utils.js';
|
|
46
|
-
import { AGENT_STREAM_HARD_KILL_GRACE_MS } from '../../core/constants.js';
|
|
47
|
-
import { claudeParse, createClaudeStreamState, claudeContextWindowFromModel, claudeEffectiveContextWindow, registerClaudeBackgroundAgentLaunch, pendingClaudeBackgroundAgentCount, } from './claude.js';
|
|
46
|
+
import { AGENT_STREAM_HARD_KILL_GRACE_MS, CLAUDE_TUI_STALL_QUIET_MS, CLAUDE_TUI_STALL_PENDING_TOOL_MS, CLAUDE_TUI_STALL_PTY_DEAD_MS, CLAUDE_TUI_STOP_HOLD_QUIET_TTL_MS, } from '../../core/constants.js';
|
|
47
|
+
import { claudeParse, createClaudeStreamState, claudeContextWindowFromModel, claudeEffectiveContextWindow, registerClaudeBackgroundAgentLaunch, pendingClaudeBackgroundAgentCount, registerClaudeBackgroundBashLaunch, pendingClaudeBackgroundBashCount, extractClaudeBackgroundTaskId, } from './claude.js';
|
|
48
48
|
async function loadPty() {
|
|
49
49
|
// Dynamic import keeps node-pty an optional dependency — if it's not
|
|
50
50
|
// installed the print-mode dispatcher in claude.ts will catch the throw
|
|
@@ -391,6 +391,12 @@ export function applyHookToolEvent(ev, s) {
|
|
|
391
391
|
s.claudeToolsById.set(toolUseId, { name: toolName, summary: desc || kind || 'Sub-agent' });
|
|
392
392
|
return true;
|
|
393
393
|
}
|
|
394
|
+
// Background Bash — register like a backgrounded agent so the turn's Stop
|
|
395
|
+
// holds the PTY open until its <task-notification> lands, instead of
|
|
396
|
+
// SIGTERMing the still-running command (and its future report-back turn).
|
|
397
|
+
if (toolName === 'Bash' && ev.tool_input?.run_in_background === true) {
|
|
398
|
+
registerClaudeBackgroundBashLaunch(s, toolUseId);
|
|
399
|
+
}
|
|
394
400
|
const summary = summarizeClaudeToolUse(toolName, ev.tool_input || {});
|
|
395
401
|
pushRecentActivity(s.recentActivity, summary);
|
|
396
402
|
s.seenClaudeToolIds.add(toolUseId);
|
|
@@ -457,6 +463,14 @@ export function applyHookToolEvent(ev, s) {
|
|
|
457
463
|
s.activity = s.recentActivity.join('\n');
|
|
458
464
|
}
|
|
459
465
|
}
|
|
466
|
+
// Background Bash launch ack → map task id → tool_use for notification
|
|
467
|
+
// resolution (bash notifications usually omit <tool-use-id>).
|
|
468
|
+
if (toolName === 'Bash' && s.bgBashToolUseIds?.has(toolUseId)
|
|
469
|
+
&& !s.bgAgentCompletedToolUseIds?.has(toolUseId)) {
|
|
470
|
+
const taskId = extractClaudeBackgroundTaskId(ev.tool_response);
|
|
471
|
+
if (taskId && !s.bgTaskIdToToolUse.has(taskId))
|
|
472
|
+
s.bgTaskIdToToolUse.set(taskId, toolUseId);
|
|
473
|
+
}
|
|
460
474
|
s.seenClaudeToolResultIds.add(toolUseId);
|
|
461
475
|
return true;
|
|
462
476
|
}
|
|
@@ -630,10 +644,23 @@ const BG_RESETTLE_QUIET_MS = 30_000;
|
|
|
630
644
|
* is still expected. Hold until a fresh Stop or BG_RESETTLE_QUIET_MS of
|
|
631
645
|
* JSONL silence.
|
|
632
646
|
* - `terminate`: the Stop is the genuine end of the turn.
|
|
647
|
+
*
|
|
648
|
+
* The `hold-background` path carries a quiet-TTL: a genuinely-running
|
|
649
|
+
* background agent keeps emitting hook/sidecar/JSONL traffic, so a hold whose
|
|
650
|
+
* every channel has been silent past CLAUDE_TUI_STOP_HOLD_QUIET_TTL_MS is a
|
|
651
|
+
* phantom (lost <task-notification> / completion never observed). Releasing
|
|
652
|
+
* it as a normal Stop keeps the turn's clean semantics — letting the stall
|
|
653
|
+
* watchdog reap it instead would mislabel a finished turn 'stalled' and
|
|
654
|
+
* inject a confusing auto-resume prompt into the next turn.
|
|
633
655
|
*/
|
|
634
656
|
export function decideClaudeTuiStop(input) {
|
|
635
|
-
if (input.pendingBackgroundAgents > 0)
|
|
657
|
+
if (input.pendingBackgroundAgents > 0) {
|
|
658
|
+
const ttl = input.holdQuietTtlMs ?? CLAUDE_TUI_STOP_HOLD_QUIET_TTL_MS;
|
|
659
|
+
const lastActivityAt = Math.max(input.stoppedAt, input.lastJsonlEventAt, input.lastTaskNotificationAt, input.lastHookOrSidecarEventAt ?? 0);
|
|
660
|
+
if (input.now - lastActivityAt > ttl)
|
|
661
|
+
return 'terminate'; // 幽灵 hold:全通道静默超 TTL
|
|
636
662
|
return 'hold-background';
|
|
663
|
+
}
|
|
637
664
|
const stopIsStale = input.lastTaskNotificationAt > 0 && input.lastTaskNotificationAt >= input.stoppedAt;
|
|
638
665
|
if (stopIsStale) {
|
|
639
666
|
const quietMs = input.resettleQuietMs ?? BG_RESETTLE_QUIET_MS;
|
|
@@ -643,6 +670,40 @@ export function decideClaudeTuiStop(input) {
|
|
|
643
670
|
}
|
|
644
671
|
return 'terminate';
|
|
645
672
|
}
|
|
673
|
+
/**
|
|
674
|
+
* Decide whether the turn has gone dead. claude CLI is known to freeze
|
|
675
|
+
* mid-turn (observed 2026-06-02 on 2.1.160): after a tool_result lands the
|
|
676
|
+
* next assistant segment never starts — the process stays alive, the JSONL
|
|
677
|
+
* goes permanently quiet, no Stop hook ever fires, no error surfaces. Without
|
|
678
|
+
* a watchdog the IM card spins forever.
|
|
679
|
+
*
|
|
680
|
+
* `lastProgressAt` is the freshest of every live signal the driver tracks
|
|
681
|
+
* (main JSONL, hook tool events, sub-agent sidecars, hook lifecycle state).
|
|
682
|
+
* A pending tool (PreToolUse seen, no PostToolUse) extends the threshold:
|
|
683
|
+
* the freeze can also hit mid-execution, but a legitimately long foreground
|
|
684
|
+
* command must not get shot — claude's own Bash timeout fires PostToolUse
|
|
685
|
+
* well inside CLAUDE_TUI_STALL_PENDING_TOOL_MS.
|
|
686
|
+
*
|
|
687
|
+
* Fast path: `lastPtyDataAt` is raw PTY output (any repaint frame counts). A
|
|
688
|
+
* healthy TUI animates continuously mid-turn — spinner, stream ticks, status
|
|
689
|
+
* line — so PTY byte-silence is the cheapest possible "event loop is dead"
|
|
690
|
+
* detector. When BOTH the PTY and all structured signals have been silent
|
|
691
|
+
* past `ptyDeadMs`, declare the stall immediately instead of waiting out the
|
|
692
|
+
* 10/30-minute quiet thresholds. Long thinking and long foreground commands
|
|
693
|
+
* keep painting frames, which routes them to the slow thresholds as before.
|
|
694
|
+
*/
|
|
695
|
+
export function decideClaudeTuiStall(input) {
|
|
696
|
+
const ptyAt = input.lastPtyDataAt ?? 0;
|
|
697
|
+
if (ptyAt > 0) {
|
|
698
|
+
const ptyDeadMs = input.ptyDeadMs ?? CLAUDE_TUI_STALL_PTY_DEAD_MS;
|
|
699
|
+
if (input.now - Math.max(ptyAt, input.lastProgressAt) > ptyDeadMs)
|
|
700
|
+
return 'stall';
|
|
701
|
+
}
|
|
702
|
+
const threshold = input.pendingToolCount > 0
|
|
703
|
+
? (input.pendingToolMs ?? CLAUDE_TUI_STALL_PENDING_TOOL_MS)
|
|
704
|
+
: (input.quietMs ?? CLAUDE_TUI_STALL_QUIET_MS);
|
|
705
|
+
return input.now - input.lastProgressAt > threshold ? 'stall' : 'wait';
|
|
706
|
+
}
|
|
646
707
|
// ---------------------------------------------------------------------------
|
|
647
708
|
// Main entry
|
|
648
709
|
// ---------------------------------------------------------------------------
|
|
@@ -914,9 +975,15 @@ export async function doClaudeTuiStream(opts) {
|
|
|
914
975
|
}
|
|
915
976
|
agentLog(`[claude-tui] pid=${proc.pid}`);
|
|
916
977
|
const dbg = process.env.PIKICLAW_CLAUDE_TUI_DEBUG === '1';
|
|
978
|
+
/** Wall-clock of the last raw PTY byte — stall watchdog fast-path signal. */
|
|
979
|
+
let lastPtyDataAt = Date.now();
|
|
917
980
|
proc.onData((data) => {
|
|
918
981
|
// We deliberately do not parse the TUI screen output. The JSONL is the
|
|
919
982
|
// canonical source of structured events. Stash bytes only when debugging.
|
|
983
|
+
// Raw byte arrival doubles as the cheapest liveness signal: a healthy TUI
|
|
984
|
+
// repaints continuously mid-turn, so PTY silence = event loop dead — feeds
|
|
985
|
+
// the stall watchdog's fast path (decideClaudeTuiStall.lastPtyDataAt).
|
|
986
|
+
lastPtyDataAt = Date.now();
|
|
920
987
|
if (dbg) {
|
|
921
988
|
try {
|
|
922
989
|
fs.appendFileSync(ptyLogPath, data);
|
|
@@ -981,6 +1048,16 @@ export async function doClaudeTuiStream(opts) {
|
|
|
981
1048
|
// Last pending-background count we logged, so the waiting state logs on
|
|
982
1049
|
// transitions instead of every 200ms poll tick.
|
|
983
1050
|
let lastLoggedPendingBg = -1;
|
|
1051
|
+
// Stall-watchdog liveness signals. Together with lastMainJsonlEventAt they
|
|
1052
|
+
// answer "is the claude process still doing anything at all?" — see
|
|
1053
|
+
// decideClaudeTuiStall for why this exists (claude CLI mid-turn freeze).
|
|
1054
|
+
let lastToolEventAt = start;
|
|
1055
|
+
let lastSidecarEventAt = 0;
|
|
1056
|
+
let stallKilled = false;
|
|
1057
|
+
/** Last state.stoppedAt for which pendingHookToolIds was reconciled. */
|
|
1058
|
+
let lastClearedStopAt = 0;
|
|
1059
|
+
/** Hook-reported tools still executing: PreToolUse seen, no PostToolUse. */
|
|
1060
|
+
const pendingHookToolIds = new Set();
|
|
984
1061
|
// Append-only tool-events log fed by PreToolUse / PostToolUse hooks. We
|
|
985
1062
|
// tail it with the same incremental reader the JSONL transcript uses, so
|
|
986
1063
|
// tool calls + plan changes surface live during the turn even while the
|
|
@@ -1004,6 +1081,18 @@ export async function doClaudeTuiStream(opts) {
|
|
|
1004
1081
|
catch {
|
|
1005
1082
|
continue;
|
|
1006
1083
|
}
|
|
1084
|
+
// Stall-watchdog bookkeeping: any hook event is proof of life, and the
|
|
1085
|
+
// Pre/Post pairing tells the watchdog whether a tool is mid-execution
|
|
1086
|
+
// (which extends the stall threshold — long foreground commands are
|
|
1087
|
+
// legitimately silent).
|
|
1088
|
+
lastToolEventAt = Date.now();
|
|
1089
|
+
const hookToolId = typeof ev?.tool_use_id === 'string' ? ev.tool_use_id : '';
|
|
1090
|
+
if (hookToolId) {
|
|
1091
|
+
if (ev?.event === 'PreToolUse')
|
|
1092
|
+
pendingHookToolIds.add(hookToolId);
|
|
1093
|
+
else if (ev?.event === 'PostToolUse')
|
|
1094
|
+
pendingHookToolIds.delete(hookToolId);
|
|
1095
|
+
}
|
|
1007
1096
|
// A Task PreToolUse and the first sub-agent tool PreToolUse can land in
|
|
1008
1097
|
// the same tick batch. If the sub-agent's hook arrives before we've
|
|
1009
1098
|
// discovered its sidecar (and thus before s.subAgentIdToParent knows
|
|
@@ -1105,6 +1194,10 @@ export async function doClaudeTuiStream(opts) {
|
|
|
1105
1194
|
any = true;
|
|
1106
1195
|
}
|
|
1107
1196
|
}
|
|
1197
|
+
// Stall-watchdog: live sub-agents count as turn progress even while the
|
|
1198
|
+
// parent thread is quietly waiting on them.
|
|
1199
|
+
if (any)
|
|
1200
|
+
lastSidecarEventAt = Date.now();
|
|
1108
1201
|
return any;
|
|
1109
1202
|
};
|
|
1110
1203
|
const tick = () => {
|
|
@@ -1220,17 +1313,40 @@ export async function doClaudeTuiStream(opts) {
|
|
|
1220
1313
|
// has reported its <task-notification> AND the latest Stop is fresher than
|
|
1221
1314
|
// the latest notification (i.e. the model's wrap-up segment finished).
|
|
1222
1315
|
if (state.stoppedAt && !stopHookFired) {
|
|
1316
|
+
// A fired Stop means no foreground tool is genuinely mid-flight any
|
|
1317
|
+
// more. Surviving entries in pendingHookToolIds are lost PostToolUse
|
|
1318
|
+
// hook events (MCP flap / hook timeout ate them) — clearing here stops
|
|
1319
|
+
// them from silently pushing the stall watchdog onto the 30-minute
|
|
1320
|
+
// pending-tool threshold for the rest of the turn.
|
|
1321
|
+
if (state.stoppedAt !== lastClearedStopAt) {
|
|
1322
|
+
lastClearedStopAt = state.stoppedAt;
|
|
1323
|
+
if (pendingHookToolIds.size) {
|
|
1324
|
+
agentWarn(`[claude-tui] Stop fired with ${pendingHookToolIds.size} unmatched PreToolUse event(s) — clearing (lost PostToolUse hooks)`);
|
|
1325
|
+
pendingHookToolIds.clear();
|
|
1326
|
+
}
|
|
1327
|
+
}
|
|
1223
1328
|
const pendingBg = pendingClaudeBackgroundAgentCount(s);
|
|
1224
1329
|
const decision = decideClaudeTuiStop({
|
|
1225
1330
|
stoppedAt: state.stoppedAt,
|
|
1226
1331
|
pendingBackgroundAgents: pendingBg,
|
|
1227
1332
|
lastTaskNotificationAt: s.lastTaskNotificationAt || 0,
|
|
1228
1333
|
lastJsonlEventAt: lastMainJsonlEventAt,
|
|
1334
|
+
lastHookOrSidecarEventAt: Math.max(lastToolEventAt, lastSidecarEventAt),
|
|
1335
|
+
// Background *Bash* is silent by nature (no sidecar/hook traffic while
|
|
1336
|
+
// it runs) — give it the long pending-tool budget; agent-only holds
|
|
1337
|
+
// keep the default TTL (live agents emit sidecar events constantly).
|
|
1338
|
+
holdQuietTtlMs: pendingClaudeBackgroundBashCount(s) > 0
|
|
1339
|
+
? CLAUDE_TUI_STALL_PENDING_TOOL_MS
|
|
1340
|
+
: undefined,
|
|
1229
1341
|
now: Date.now(),
|
|
1230
1342
|
});
|
|
1231
1343
|
if (decision === 'terminate') {
|
|
1232
1344
|
stopHookFired = true;
|
|
1233
1345
|
stopHookSeenAt = Date.now();
|
|
1346
|
+
if (pendingBg > 0) {
|
|
1347
|
+
// 幽灵 hold 释放:计数说还有后台 agent,但所有通道静默已超 TTL。
|
|
1348
|
+
agentWarn(`[claude-tui] releasing phantom hold — ${pendingBg} background agent(s) still counted pending but every channel quiet past TTL; treating Stop as final`);
|
|
1349
|
+
}
|
|
1234
1350
|
agentLog(`[claude-tui] Stop hook fired — draining JSONL for ${POST_STOP_DRAIN_MS}ms before SIGTERM`);
|
|
1235
1351
|
}
|
|
1236
1352
|
else if (decision === 'hold-background' && pendingBg !== lastLoggedPendingBg) {
|
|
@@ -1248,6 +1364,48 @@ export async function doClaudeTuiStream(opts) {
|
|
|
1248
1364
|
// Continue polling so any post-Stop JSONL writes still get parsed; the
|
|
1249
1365
|
// process will exit shortly and onExit will resolve the wait.
|
|
1250
1366
|
}
|
|
1367
|
+
// Stall watchdog. claude CLI can freeze mid-turn (observed on 2.1.160):
|
|
1368
|
+
// a tool_result lands, then the next assistant segment never starts — the
|
|
1369
|
+
// process stays alive, every signal goes quiet, no Stop hook ever fires.
|
|
1370
|
+
// When ALL liveness signals have been silent past the threshold, declare
|
|
1371
|
+
// the turn stalled and SIGTERM; doClaudeWithRetry auto-resumes the session
|
|
1372
|
+
// once so the turn continues instead of spinning forever in the IM card.
|
|
1373
|
+
if (!stopHookFired && !timedOut && !interrupted && !stallKilled) {
|
|
1374
|
+
const lastProgressAt = Math.max(start, lastMainJsonlEventAt, lastToolEventAt, lastSidecarEventAt, state.stoppedAt || 0, state.promptSubmittedAt || 0);
|
|
1375
|
+
// Pending background work (agents + bash) extends the stall budget the
|
|
1376
|
+
// same way a pending foreground tool does: a silent 15-minute background
|
|
1377
|
+
// build must not get shot by the 10-minute quiet threshold. The PTY
|
|
1378
|
+
// fast path still catches true process freezes within minutes.
|
|
1379
|
+
const pendingBgForStall = pendingClaudeBackgroundAgentCount(s);
|
|
1380
|
+
// PTY fast path is for *mid-turn* freezes only. While the TUI idles in a
|
|
1381
|
+
// post-Stop background hold it legitimately paints nothing — a static
|
|
1382
|
+
// screen there is healthy, not frozen. Stop being the freshest signal is
|
|
1383
|
+
// exactly that hold state → disarm the fast path (0 = unavailable).
|
|
1384
|
+
const nonStopProgressAt = Math.max(start, lastMainJsonlEventAt, lastToolEventAt, lastSidecarEventAt, state.promptSubmittedAt || 0);
|
|
1385
|
+
const inPostStopHold = !!state.stoppedAt && state.stoppedAt >= nonStopProgressAt;
|
|
1386
|
+
const stallDecision = decideClaudeTuiStall({
|
|
1387
|
+
now: Date.now(),
|
|
1388
|
+
lastProgressAt,
|
|
1389
|
+
pendingToolCount: pendingHookToolIds.size + pendingBgForStall,
|
|
1390
|
+
lastPtyDataAt: inPostStopHold ? 0 : lastPtyDataAt,
|
|
1391
|
+
});
|
|
1392
|
+
if (stallDecision === 'stall') {
|
|
1393
|
+
stallKilled = true;
|
|
1394
|
+
const quietMin = Math.round((Date.now() - lastProgressAt) / 60_000);
|
|
1395
|
+
const ptyQuietS = Math.round((Date.now() - lastPtyDataAt) / 1000);
|
|
1396
|
+
s.stopReason = 'stalled';
|
|
1397
|
+
if (!s.errors) {
|
|
1398
|
+
s.errors = [`Claude process went silent mid-turn for ${quietMin}m (no JSONL, hook, or sub-agent events; PTY quiet ${ptyQuietS}s) — known claude CLI freeze. Terminated for auto-resume.`];
|
|
1399
|
+
}
|
|
1400
|
+
agentWarn(`[claude-tui] stall detected: no progress for ${quietMin}m (pendingTools=${pendingHookToolIds.size}, ptyQuiet=${ptyQuietS}s) — terminating TUI pid=${proc.pid} for auto-resume`);
|
|
1401
|
+
pushRecentActivity(s.recentActivity, `Agent stalled (${quietMin}m silent) — restarting turn`);
|
|
1402
|
+
s.activity = s.recentActivity.join('\n');
|
|
1403
|
+
emit();
|
|
1404
|
+
killProc('SIGTERM');
|
|
1405
|
+
// Keep polling: onExit resolves the wait and the final drains pick up
|
|
1406
|
+
// whatever the dying process flushes.
|
|
1407
|
+
}
|
|
1408
|
+
}
|
|
1251
1409
|
pollHandle = setTimeout(tick, POLL_INTERVAL_MS);
|
|
1252
1410
|
};
|
|
1253
1411
|
pollHandle = setTimeout(tick, POLL_INTERVAL_MS);
|
|
@@ -328,6 +328,8 @@ function ensureClaudeBgAgentState(s) {
|
|
|
328
328
|
s.bgAgentLaunchedToolUseIds = new Set();
|
|
329
329
|
if (!s.bgAgentCompletedToolUseIds)
|
|
330
330
|
s.bgAgentCompletedToolUseIds = new Set();
|
|
331
|
+
if (!s.bgBashToolUseIds)
|
|
332
|
+
s.bgBashToolUseIds = new Set();
|
|
331
333
|
if (!s.bgTaskIdToToolUse)
|
|
332
334
|
s.bgTaskIdToToolUse = new Map();
|
|
333
335
|
if (typeof s.lastTaskNotificationAt !== 'number')
|
|
@@ -341,7 +343,27 @@ export function registerClaudeBackgroundAgentLaunch(s, toolUseId) {
|
|
|
341
343
|
ensureClaudeBgAgentState(s);
|
|
342
344
|
s.bgAgentLaunchedToolUseIds.add(id);
|
|
343
345
|
}
|
|
344
|
-
/**
|
|
346
|
+
/**
|
|
347
|
+
* Record a `Bash` tool_use launched with `run_in_background: true`.
|
|
348
|
+
*
|
|
349
|
+
* Background Bash lives *inside the claude process* exactly like a
|
|
350
|
+
* backgrounded sub-agent: its tool_result is a launch ack, the real
|
|
351
|
+
* completion arrives later as a `<task-notification>` which re-invokes the
|
|
352
|
+
* model in the same process. Before this registration existed only Task/Agent
|
|
353
|
+
* launches counted as "pending background work" — a turn that backgrounded a
|
|
354
|
+
* Bash command would hit Stop, decideClaudeTuiStop saw pending=0 and
|
|
355
|
+
* terminated the PTY, killing the command and its future report-back turn
|
|
356
|
+
* (the「claude 后台任务一停止就被掐死」failure).
|
|
357
|
+
*/
|
|
358
|
+
export function registerClaudeBackgroundBashLaunch(s, toolUseId) {
|
|
359
|
+
const id = String(toolUseId || '').trim();
|
|
360
|
+
if (!id)
|
|
361
|
+
return;
|
|
362
|
+
ensureClaudeBgAgentState(s);
|
|
363
|
+
s.bgAgentLaunchedToolUseIds.add(id);
|
|
364
|
+
s.bgBashToolUseIds.add(id);
|
|
365
|
+
}
|
|
366
|
+
/** Launched background tasks (agents + bash) whose <task-notification> hasn't arrived yet. */
|
|
345
367
|
export function pendingClaudeBackgroundAgentCount(s) {
|
|
346
368
|
const launched = s?.bgAgentLaunchedToolUseIds;
|
|
347
369
|
if (!launched?.size)
|
|
@@ -354,6 +376,51 @@ export function pendingClaudeBackgroundAgentCount(s) {
|
|
|
354
376
|
}
|
|
355
377
|
return pending;
|
|
356
378
|
}
|
|
379
|
+
/** Pending background *Bash* tasks specifically. Unlike agents (whose sidecar
|
|
380
|
+
* JSONL keeps emitting events while alive), a background command is silent by
|
|
381
|
+
* nature — callers use this to pick a longer hold/stall budget. */
|
|
382
|
+
export function pendingClaudeBackgroundBashCount(s) {
|
|
383
|
+
const bash = s?.bgBashToolUseIds;
|
|
384
|
+
if (!bash?.size)
|
|
385
|
+
return 0;
|
|
386
|
+
const completed = s?.bgAgentCompletedToolUseIds;
|
|
387
|
+
let pending = 0;
|
|
388
|
+
for (const id of bash) {
|
|
389
|
+
if (!completed?.has(id))
|
|
390
|
+
pending++;
|
|
391
|
+
}
|
|
392
|
+
return pending;
|
|
393
|
+
}
|
|
394
|
+
/**
|
|
395
|
+
* Pull the background task id out of a launch ack. Claude Code's backgrounded
|
|
396
|
+
* Bash tool_result reads like "Command running in background with ID: bash_3
|
|
397
|
+
* (output: …)" — the id is what the later <task-notification> carries (its
|
|
398
|
+
* <tool-use-id> is often omitted for bash), so mapping id → tool_use here is
|
|
399
|
+
* what lets applyClaudeTaskNotification resolve the completion.
|
|
400
|
+
*/
|
|
401
|
+
export function extractClaudeBackgroundTaskId(content) {
|
|
402
|
+
let text = '';
|
|
403
|
+
if (typeof content === 'string')
|
|
404
|
+
text = content;
|
|
405
|
+
else if (Array.isArray(content)) {
|
|
406
|
+
text = content
|
|
407
|
+
.filter((b) => b?.type === 'text' && typeof b.text === 'string')
|
|
408
|
+
.map((b) => b.text)
|
|
409
|
+
.join('\n');
|
|
410
|
+
}
|
|
411
|
+
else if (content && typeof content === 'object') {
|
|
412
|
+
try {
|
|
413
|
+
text = JSON.stringify(content);
|
|
414
|
+
}
|
|
415
|
+
catch {
|
|
416
|
+
return null;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
if (!text || !/background/i.test(text))
|
|
420
|
+
return null;
|
|
421
|
+
const m = text.match(/\b(?:ID|id)\s*[::]?\s*[`"']?([A-Za-z0-9][A-Za-z0-9_-]{1,63})/);
|
|
422
|
+
return m ? m[1] : null;
|
|
423
|
+
}
|
|
357
424
|
/**
|
|
358
425
|
* Parse a `<task-notification>` wrapper out of a user event's content.
|
|
359
426
|
* Shape (observed, Claude Code 2.x):
|
|
@@ -583,6 +650,12 @@ export function claudeParse(ev, s) {
|
|
|
583
650
|
s.claudeToolsById.set(toolId, { name: toolName, summary: subAgent.description || 'Run task' });
|
|
584
651
|
continue;
|
|
585
652
|
}
|
|
653
|
+
// Background Bash — same in-process lifecycle as a backgrounded agent:
|
|
654
|
+
// launch ack now, <task-notification> later. Register so the TUI driver
|
|
655
|
+
// holds the PTY open instead of SIGTERMing the command mid-flight.
|
|
656
|
+
if (toolName === 'Bash' && block?.input?.run_in_background === true) {
|
|
657
|
+
registerClaudeBackgroundBashLaunch(s, toolId);
|
|
658
|
+
}
|
|
586
659
|
const tool = {
|
|
587
660
|
name: toolName,
|
|
588
661
|
summary: summarizeClaudeToolUse(block?.name, block?.input || {}),
|
|
@@ -667,6 +740,15 @@ export function claudeParse(ev, s) {
|
|
|
667
740
|
tool.result = previewToolCallResult(block?.content);
|
|
668
741
|
tool.status = block?.is_error ? 'failed' : 'done';
|
|
669
742
|
}
|
|
743
|
+
// Background Bash launch ack → map its task id to the tool_use so the
|
|
744
|
+
// later <task-notification> (which usually omits <tool-use-id> for bash)
|
|
745
|
+
// can resolve and decrement the pending count.
|
|
746
|
+
if (tool?.name === 'Bash' && s.bgBashToolUseIds?.has(toolId)
|
|
747
|
+
&& !s.bgAgentCompletedToolUseIds?.has(toolId)) {
|
|
748
|
+
const taskId = extractClaudeBackgroundTaskId(block?.content);
|
|
749
|
+
if (taskId && !s.bgTaskIdToToolUse.has(taskId))
|
|
750
|
+
s.bgTaskIdToToolUse.set(taskId, toolId);
|
|
751
|
+
}
|
|
670
752
|
pushRecentActivity(s.recentActivity, summarizeClaudeToolResult(tool, block, ev.tool_use_result));
|
|
671
753
|
// MCP / Skill tool_result with multimodal content — recurse for image
|
|
672
754
|
// entries so the final StreamResult carries them. Filesystem-reading
|
|
@@ -2275,8 +2357,53 @@ function makeOverloadFriendlyResult(result, reason, attempts) {
|
|
|
2275
2357
|
* friendly human-readable explanation in `message` so the IM card doesn't
|
|
2276
2358
|
* dump raw "API Error: Overloaded" text on the user.
|
|
2277
2359
|
*/
|
|
2360
|
+
/**
|
|
2361
|
+
* Continuation prompt for stall recovery. The frozen process already accepted
|
|
2362
|
+
* and partially executed the user's prompt (it sits in the transcript), so the
|
|
2363
|
+
* resumed process must NOT receive the original prompt again — it gets an
|
|
2364
|
+
* explicit "pick up where you left off" instead.
|
|
2365
|
+
*/
|
|
2366
|
+
const CLAUDE_STALL_RESUME_PROMPT = '[pikiclaw] The previous agent process stalled mid-turn and was restarted. '
|
|
2367
|
+
+ 'Continue the task from where it left off — do not start over or repeat work that already completed.';
|
|
2368
|
+
/** At most one automatic resume per turn; a second stall surfaces to the user. */
|
|
2369
|
+
const CLAUDE_STALL_RESUME_LIMIT = 1;
|
|
2278
2370
|
async function doClaudeWithRetry(opts) {
|
|
2279
2371
|
let lastResult = await doClaudeStreamOnce(opts);
|
|
2372
|
+
// Mid-turn stall recovery. The TUI driver SIGTERMs a frozen claude process
|
|
2373
|
+
// (stopReason 'stalled' — see decideClaudeTuiStall in claude-tui.ts) instead
|
|
2374
|
+
// of letting the IM card spin forever. Resume the same session once with a
|
|
2375
|
+
// continuation prompt so the turn picks up where the frozen process died.
|
|
2376
|
+
let stallResumes = 0;
|
|
2377
|
+
while (lastResult.stopReason === 'stalled'
|
|
2378
|
+
&& stallResumes < CLAUDE_STALL_RESUME_LIMIT
|
|
2379
|
+
&& !opts.abortSignal?.aborted) {
|
|
2380
|
+
const stalledSessionId = lastResult.sessionId || opts.sessionId;
|
|
2381
|
+
if (!stalledSessionId)
|
|
2382
|
+
break;
|
|
2383
|
+
stallResumes++;
|
|
2384
|
+
agentWarn(`[claude] turn stalled mid-flight; auto-resuming session ${stalledSessionId.slice(0, 8)} (${stallResumes}/${CLAUDE_STALL_RESUME_LIMIT})`);
|
|
2385
|
+
lastResult = await doClaudeStreamOnce({
|
|
2386
|
+
...opts,
|
|
2387
|
+
sessionId: stalledSessionId,
|
|
2388
|
+
forkOf: undefined,
|
|
2389
|
+
prompt: CLAUDE_STALL_RESUME_PROMPT,
|
|
2390
|
+
attachments: undefined,
|
|
2391
|
+
});
|
|
2392
|
+
}
|
|
2393
|
+
if (lastResult.stopReason === 'stalled') {
|
|
2394
|
+
// Still stalled after the resume budget (or no session id to resume).
|
|
2395
|
+
// Surface a self-explanatory failure instead of the raw error text.
|
|
2396
|
+
return {
|
|
2397
|
+
...lastResult,
|
|
2398
|
+
ok: false,
|
|
2399
|
+
incomplete: true,
|
|
2400
|
+
message: [
|
|
2401
|
+
'The agent process stalled mid-turn and could not be auto-recovered (known claude CLI freeze, seen on 2.1.160).',
|
|
2402
|
+
'Your session is intact — re-send your message (or say "continue") to pick up where it stopped.',
|
|
2403
|
+
'If this keeps happening, pin the claude CLI to a known-good version: npm install -g @anthropic-ai/claude-code@2.1.159',
|
|
2404
|
+
].join(' '),
|
|
2405
|
+
};
|
|
2406
|
+
}
|
|
2280
2407
|
let attempts = 0;
|
|
2281
2408
|
// Use the error text recorded by detectClaudeApiError-driven branches to
|
|
2282
2409
|
// decide retry: lastResult.error is "Anthropic API error: <reason>" on
|
package/dist/core/constants.js
CHANGED
|
@@ -287,6 +287,48 @@ export const AGENT_STREAM_HARD_KILL_GRACE_MS = 10_000;
|
|
|
287
287
|
* resumed via --resume, can see it in the transcript.
|
|
288
288
|
*/
|
|
289
289
|
export const AGENT_GRACEFUL_ABORT_GRACE_MS = 2_000;
|
|
290
|
+
/**
|
|
291
|
+
* claude-tui stall watchdog — claude CLI is known to freeze mid-turn (observed
|
|
292
|
+
* 2026-06-02 on 2.1.160: after a tool_result lands, the next assistant segment
|
|
293
|
+
* never starts; the process stays alive, the JSONL goes permanently quiet, no
|
|
294
|
+
* Stop hook ever fires). When every live signal (main JSONL, hook tool events,
|
|
295
|
+
* sub-agent sidecars, hook lifecycle state) is silent past the threshold the
|
|
296
|
+
* driver SIGTERMs the PTY and the dispatch wrapper auto-resumes the session
|
|
297
|
+
* once. Quiet threshold must sit safely above the longest healthy gap between
|
|
298
|
+
* JSONL events — a single max-effort inference can take a few minutes before
|
|
299
|
+
* its first content block lands.
|
|
300
|
+
*/
|
|
301
|
+
export const CLAUDE_TUI_STALL_QUIET_MS = 10 * 60_000;
|
|
302
|
+
/**
|
|
303
|
+
* Stall threshold while a hook-reported tool is still executing (PreToolUse
|
|
304
|
+
* seen, no matching PostToolUse). Claude's own Bash timeout caps foreground
|
|
305
|
+
* commands at ~10 minutes and fires PostToolUse either way, so a pending tool
|
|
306
|
+
* silent for this long means the freeze hit mid-execution.
|
|
307
|
+
*/
|
|
308
|
+
export const CLAUDE_TUI_STALL_PENDING_TOOL_MS = 30 * 60_000;
|
|
309
|
+
/**
|
|
310
|
+
* Fast-path stall: a healthy claude TUI repaints continuously while a turn is
|
|
311
|
+
* in flight (spinner frames, stream ticks, status line) — the PTY never goes
|
|
312
|
+
* byte-silent for minutes. If NO PTY output arrives for this long AND every
|
|
313
|
+
* structured signal is equally quiet, the process event loop itself is gone
|
|
314
|
+
* (the 2.1.160 mid-turn freeze: attachment lands → next API call never
|
|
315
|
+
* assembles). Declare the stall now instead of waiting out the 10/30-minute
|
|
316
|
+
* quiet thresholds — turns a 10-30 分钟「卡死」into a ~3 分钟自愈。
|
|
317
|
+
* False-positive safe: long thinking / long Bash keep painting frames, which
|
|
318
|
+
* refreshes the PTY signal and defers this path to the slow thresholds.
|
|
319
|
+
*/
|
|
320
|
+
export const CLAUDE_TUI_STALL_PTY_DEAD_MS = 3 * 60_000;
|
|
321
|
+
/**
|
|
322
|
+
* TTL for the post-Stop `hold-background` path. The hold protects
|
|
323
|
+
* run_in_background agents living inside the claude process — but a live
|
|
324
|
+
* agent keeps emitting hook/sidecar/JSONL traffic. If the hold sees no
|
|
325
|
+
* activity on ANY channel for this long, the pending count is phantom (lost
|
|
326
|
+
* <task-notification>, agents already finished): release as a NORMAL Stop.
|
|
327
|
+
* Without this TTL the stall watchdog eventually fires instead, mislabels the
|
|
328
|
+
* cleanly-finished turn 'stalled', and injects a confusing auto-resume prompt
|
|
329
|
+
* (the「回合明明答完了还被注入 Continue」symptom).
|
|
330
|
+
*/
|
|
331
|
+
export const CLAUDE_TUI_STOP_HOLD_QUIET_TTL_MS = 10 * 60_000;
|
|
290
332
|
/** Codex-specific grace period added to the user-configured timeout. */
|
|
291
333
|
export const CODEX_STREAM_HARD_KILL_GRACE_MS = 5_000;
|
|
292
334
|
/**
|
package/package.json
CHANGED