npm - pikiclaw - Versions diffs - 0.3.61 → 0.3.63 - Mend

pikiclaw 0.3.61 → 0.3.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/agent/drivers/claude-tui.js +161 -3
package/dist/agent/drivers/claude.js +128 -1
package/dist/core/constants.js +42 -0
package/package.json +1 -1

package/dist/agent/drivers/claude-tui.js CHANGED Viewed

@@ -43,8 +43,8 @@ import { tmpdir } from 'node:os';
 import { Q, agentLog, agentWarn, buildStreamPreviewMeta, computeContext, joinErrorMessages, emitSessionIdUpdate, normalizeClaudeModelId, pushRecentActivity, summarizeClaudeToolUse, summarizeClaudeToolResult, previewToolCallInput, previewToolCallResult, detectClaudeApiError, } from '../utils.js';
 import { encodePathAsDirName, getHome, whichSync } from '../../core/platform.js';
 import { stripAnsiEscapes } from '../../core/utils.js';
-import { AGENT_STREAM_HARD_KILL_GRACE_MS } from '../../core/constants.js';
-import { claudeParse, createClaudeStreamState, claudeContextWindowFromModel, claudeEffectiveContextWindow, registerClaudeBackgroundAgentLaunch, pendingClaudeBackgroundAgentCount, } from './claude.js';
+import { AGENT_STREAM_HARD_KILL_GRACE_MS, CLAUDE_TUI_STALL_QUIET_MS, CLAUDE_TUI_STALL_PENDING_TOOL_MS, CLAUDE_TUI_STALL_PTY_DEAD_MS, CLAUDE_TUI_STOP_HOLD_QUIET_TTL_MS, } from '../../core/constants.js';
+import { claudeParse, createClaudeStreamState, claudeContextWindowFromModel, claudeEffectiveContextWindow, registerClaudeBackgroundAgentLaunch, pendingClaudeBackgroundAgentCount, registerClaudeBackgroundBashLaunch, pendingClaudeBackgroundBashCount, extractClaudeBackgroundTaskId, } from './claude.js';
 async function loadPty() {
     // Dynamic import keeps node-pty an optional dependency — if it's not
     // installed the print-mode dispatcher in claude.ts will catch the throw
@@ -391,6 +391,12 @@ export function applyHookToolEvent(ev, s) {
             s.claudeToolsById.set(toolUseId, { name: toolName, summary: desc || kind || 'Sub-agent' });
             return true;
         }
+        // Background Bash — register like a backgrounded agent so the turn's Stop
+        // holds the PTY open until its <task-notification> lands, instead of
+        // SIGTERMing the still-running command (and its future report-back turn).
+        if (toolName === 'Bash' && ev.tool_input?.run_in_background === true) {
+            registerClaudeBackgroundBashLaunch(s, toolUseId);
+        }
         const summary = summarizeClaudeToolUse(toolName, ev.tool_input || {});
         pushRecentActivity(s.recentActivity, summary);
         s.seenClaudeToolIds.add(toolUseId);
@@ -457,6 +463,14 @@ export function applyHookToolEvent(ev, s) {
                 s.activity = s.recentActivity.join('\n');
             }
         }
+        // Background Bash launch ack → map task id → tool_use for notification
+        // resolution (bash notifications usually omit <tool-use-id>).
+        if (toolName === 'Bash' && s.bgBashToolUseIds?.has(toolUseId)
+            && !s.bgAgentCompletedToolUseIds?.has(toolUseId)) {
+            const taskId = extractClaudeBackgroundTaskId(ev.tool_response);
+            if (taskId && !s.bgTaskIdToToolUse.has(taskId))
+                s.bgTaskIdToToolUse.set(taskId, toolUseId);
+        }
         s.seenClaudeToolResultIds.add(toolUseId);
         return true;
     }
@@ -630,10 +644,23 @@ const BG_RESETTLE_QUIET_MS = 30_000;
  *    is still expected. Hold until a fresh Stop or BG_RESETTLE_QUIET_MS of
  *    JSONL silence.
  *  - `terminate`: the Stop is the genuine end of the turn.
+ *
+ * The `hold-background` path carries a quiet-TTL: a genuinely-running
+ * background agent keeps emitting hook/sidecar/JSONL traffic, so a hold whose
+ * every channel has been silent past CLAUDE_TUI_STOP_HOLD_QUIET_TTL_MS is a
+ * phantom (lost <task-notification> / completion never observed). Releasing
+ * it as a normal Stop keeps the turn's clean semantics — letting the stall
+ * watchdog reap it instead would mislabel a finished turn 'stalled' and
+ * inject a confusing auto-resume prompt into the next turn.
  */
 export function decideClaudeTuiStop(input) {
-    if (input.pendingBackgroundAgents > 0)
+    if (input.pendingBackgroundAgents > 0) {
+        const ttl = input.holdQuietTtlMs ?? CLAUDE_TUI_STOP_HOLD_QUIET_TTL_MS;
+        const lastActivityAt = Math.max(input.stoppedAt, input.lastJsonlEventAt, input.lastTaskNotificationAt, input.lastHookOrSidecarEventAt ?? 0);
+        if (input.now - lastActivityAt > ttl)
+            return 'terminate'; // 幽灵 hold:全通道静默超 TTL
         return 'hold-background';
+    }
     const stopIsStale = input.lastTaskNotificationAt > 0 && input.lastTaskNotificationAt >= input.stoppedAt;
     if (stopIsStale) {
         const quietMs = input.resettleQuietMs ?? BG_RESETTLE_QUIET_MS;
@@ -643,6 +670,40 @@ export function decideClaudeTuiStop(input) {
     }
     return 'terminate';
 }
+/**
+ * Decide whether the turn has gone dead. claude CLI is known to freeze
+ * mid-turn (observed 2026-06-02 on 2.1.160): after a tool_result lands the
+ * next assistant segment never starts — the process stays alive, the JSONL
+ * goes permanently quiet, no Stop hook ever fires, no error surfaces. Without
+ * a watchdog the IM card spins forever.
+ *
+ * `lastProgressAt` is the freshest of every live signal the driver tracks
+ * (main JSONL, hook tool events, sub-agent sidecars, hook lifecycle state).
+ * A pending tool (PreToolUse seen, no PostToolUse) extends the threshold:
+ * the freeze can also hit mid-execution, but a legitimately long foreground
+ * command must not get shot — claude's own Bash timeout fires PostToolUse
+ * well inside CLAUDE_TUI_STALL_PENDING_TOOL_MS.
+ *
+ * Fast path: `lastPtyDataAt` is raw PTY output (any repaint frame counts). A
+ * healthy TUI animates continuously mid-turn — spinner, stream ticks, status
+ * line — so PTY byte-silence is the cheapest possible "event loop is dead"
+ * detector. When BOTH the PTY and all structured signals have been silent
+ * past `ptyDeadMs`, declare the stall immediately instead of waiting out the
+ * 10/30-minute quiet thresholds. Long thinking and long foreground commands
+ * keep painting frames, which routes them to the slow thresholds as before.
+ */
+export function decideClaudeTuiStall(input) {
+    const ptyAt = input.lastPtyDataAt ?? 0;
+    if (ptyAt > 0) {
+        const ptyDeadMs = input.ptyDeadMs ?? CLAUDE_TUI_STALL_PTY_DEAD_MS;
+        if (input.now - Math.max(ptyAt, input.lastProgressAt) > ptyDeadMs)
+            return 'stall';
+    }
+    const threshold = input.pendingToolCount > 0
+        ? (input.pendingToolMs ?? CLAUDE_TUI_STALL_PENDING_TOOL_MS)
+        : (input.quietMs ?? CLAUDE_TUI_STALL_QUIET_MS);
+    return input.now - input.lastProgressAt > threshold ? 'stall' : 'wait';
+}
 // ---------------------------------------------------------------------------
 // Main entry
 // ---------------------------------------------------------------------------
@@ -914,9 +975,15 @@ export async function doClaudeTuiStream(opts) {
     }
     agentLog(`[claude-tui] pid=${proc.pid}`);
     const dbg = process.env.PIKICLAW_CLAUDE_TUI_DEBUG === '1';
+    /** Wall-clock of the last raw PTY byte — stall watchdog fast-path signal. */
+    let lastPtyDataAt = Date.now();
     proc.onData((data) => {
         // We deliberately do not parse the TUI screen output. The JSONL is the
         // canonical source of structured events. Stash bytes only when debugging.
+        // Raw byte arrival doubles as the cheapest liveness signal: a healthy TUI
+        // repaints continuously mid-turn, so PTY silence = event loop dead — feeds
+        // the stall watchdog's fast path (decideClaudeTuiStall.lastPtyDataAt).
+        lastPtyDataAt = Date.now();
         if (dbg) {
             try {
                 fs.appendFileSync(ptyLogPath, data);
@@ -981,6 +1048,16 @@ export async function doClaudeTuiStream(opts) {
     // Last pending-background count we logged, so the waiting state logs on
     // transitions instead of every 200ms poll tick.
     let lastLoggedPendingBg = -1;
+    // Stall-watchdog liveness signals. Together with lastMainJsonlEventAt they
+    // answer "is the claude process still doing anything at all?" — see
+    // decideClaudeTuiStall for why this exists (claude CLI mid-turn freeze).
+    let lastToolEventAt = start;
+    let lastSidecarEventAt = 0;
+    let stallKilled = false;
+    /** Last state.stoppedAt for which pendingHookToolIds was reconciled. */
+    let lastClearedStopAt = 0;
+    /** Hook-reported tools still executing: PreToolUse seen, no PostToolUse. */
+    const pendingHookToolIds = new Set();
     // Append-only tool-events log fed by PreToolUse / PostToolUse hooks. We
     // tail it with the same incremental reader the JSONL transcript uses, so
     // tool calls + plan changes surface live during the turn even while the
@@ -1004,6 +1081,18 @@ export async function doClaudeTuiStream(opts) {
             catch {
                 continue;
             }
+            // Stall-watchdog bookkeeping: any hook event is proof of life, and the
+            // Pre/Post pairing tells the watchdog whether a tool is mid-execution
+            // (which extends the stall threshold — long foreground commands are
+            // legitimately silent).
+            lastToolEventAt = Date.now();
+            const hookToolId = typeof ev?.tool_use_id === 'string' ? ev.tool_use_id : '';
+            if (hookToolId) {
+                if (ev?.event === 'PreToolUse')
+                    pendingHookToolIds.add(hookToolId);
+                else if (ev?.event === 'PostToolUse')
+                    pendingHookToolIds.delete(hookToolId);
+            }
             // A Task PreToolUse and the first sub-agent tool PreToolUse can land in
             // the same tick batch. If the sub-agent's hook arrives before we've
             // discovered its sidecar (and thus before s.subAgentIdToParent knows
@@ -1105,6 +1194,10 @@ export async function doClaudeTuiStream(opts) {
                 any = true;
             }
         }
+        // Stall-watchdog: live sub-agents count as turn progress even while the
+        // parent thread is quietly waiting on them.
+        if (any)
+            lastSidecarEventAt = Date.now();
         return any;
     };
     const tick = () => {
@@ -1220,17 +1313,40 @@ export async function doClaudeTuiStream(opts) {
         // has reported its <task-notification> AND the latest Stop is fresher than
         // the latest notification (i.e. the model's wrap-up segment finished).
         if (state.stoppedAt && !stopHookFired) {
+            // A fired Stop means no foreground tool is genuinely mid-flight any
+            // more. Surviving entries in pendingHookToolIds are lost PostToolUse
+            // hook events (MCP flap / hook timeout ate them) — clearing here stops
+            // them from silently pushing the stall watchdog onto the 30-minute
+            // pending-tool threshold for the rest of the turn.
+            if (state.stoppedAt !== lastClearedStopAt) {
+                lastClearedStopAt = state.stoppedAt;
+                if (pendingHookToolIds.size) {
+                    agentWarn(`[claude-tui] Stop fired with ${pendingHookToolIds.size} unmatched PreToolUse event(s) — clearing (lost PostToolUse hooks)`);
+                    pendingHookToolIds.clear();
+                }
+            }
             const pendingBg = pendingClaudeBackgroundAgentCount(s);
             const decision = decideClaudeTuiStop({
                 stoppedAt: state.stoppedAt,
                 pendingBackgroundAgents: pendingBg,
                 lastTaskNotificationAt: s.lastTaskNotificationAt || 0,
                 lastJsonlEventAt: lastMainJsonlEventAt,
+                lastHookOrSidecarEventAt: Math.max(lastToolEventAt, lastSidecarEventAt),
+                // Background *Bash* is silent by nature (no sidecar/hook traffic while
+                // it runs) — give it the long pending-tool budget; agent-only holds
+                // keep the default TTL (live agents emit sidecar events constantly).
+                holdQuietTtlMs: pendingClaudeBackgroundBashCount(s) > 0
+                    ? CLAUDE_TUI_STALL_PENDING_TOOL_MS
+                    : undefined,
                 now: Date.now(),
             });
             if (decision === 'terminate') {
                 stopHookFired = true;
                 stopHookSeenAt = Date.now();
+                if (pendingBg > 0) {
+                    // 幽灵 hold 释放:计数说还有后台 agent,但所有通道静默已超 TTL。
+                    agentWarn(`[claude-tui] releasing phantom hold — ${pendingBg} background agent(s) still counted pending but every channel quiet past TTL; treating Stop as final`);
+                }
                 agentLog(`[claude-tui] Stop hook fired — draining JSONL for ${POST_STOP_DRAIN_MS}ms before SIGTERM`);
             }
             else if (decision === 'hold-background' && pendingBg !== lastLoggedPendingBg) {
@@ -1248,6 +1364,48 @@ export async function doClaudeTuiStream(opts) {
             // Continue polling so any post-Stop JSONL writes still get parsed; the
             // process will exit shortly and onExit will resolve the wait.
         }
+        // Stall watchdog. claude CLI can freeze mid-turn (observed on 2.1.160):
+        // a tool_result lands, then the next assistant segment never starts — the
+        // process stays alive, every signal goes quiet, no Stop hook ever fires.
+        // When ALL liveness signals have been silent past the threshold, declare
+        // the turn stalled and SIGTERM; doClaudeWithRetry auto-resumes the session
+        // once so the turn continues instead of spinning forever in the IM card.
+        if (!stopHookFired && !timedOut && !interrupted && !stallKilled) {
+            const lastProgressAt = Math.max(start, lastMainJsonlEventAt, lastToolEventAt, lastSidecarEventAt, state.stoppedAt || 0, state.promptSubmittedAt || 0);
+            // Pending background work (agents + bash) extends the stall budget the
+            // same way a pending foreground tool does: a silent 15-minute background
+            // build must not get shot by the 10-minute quiet threshold. The PTY
+            // fast path still catches true process freezes within minutes.
+            const pendingBgForStall = pendingClaudeBackgroundAgentCount(s);
+            // PTY fast path is for *mid-turn* freezes only. While the TUI idles in a
+            // post-Stop background hold it legitimately paints nothing — a static
+            // screen there is healthy, not frozen. Stop being the freshest signal is
+            // exactly that hold state → disarm the fast path (0 = unavailable).
+            const nonStopProgressAt = Math.max(start, lastMainJsonlEventAt, lastToolEventAt, lastSidecarEventAt, state.promptSubmittedAt || 0);
+            const inPostStopHold = !!state.stoppedAt && state.stoppedAt >= nonStopProgressAt;
+            const stallDecision = decideClaudeTuiStall({
+                now: Date.now(),
+                lastProgressAt,
+                pendingToolCount: pendingHookToolIds.size + pendingBgForStall,
+                lastPtyDataAt: inPostStopHold ? 0 : lastPtyDataAt,
+            });
+            if (stallDecision === 'stall') {
+                stallKilled = true;
+                const quietMin = Math.round((Date.now() - lastProgressAt) / 60_000);
+                const ptyQuietS = Math.round((Date.now() - lastPtyDataAt) / 1000);
+                s.stopReason = 'stalled';
+                if (!s.errors) {
+                    s.errors = [`Claude process went silent mid-turn for ${quietMin}m (no JSONL, hook, or sub-agent events; PTY quiet ${ptyQuietS}s) — known claude CLI freeze. Terminated for auto-resume.`];
+                }
+                agentWarn(`[claude-tui] stall detected: no progress for ${quietMin}m (pendingTools=${pendingHookToolIds.size}, ptyQuiet=${ptyQuietS}s) — terminating TUI pid=${proc.pid} for auto-resume`);
+                pushRecentActivity(s.recentActivity, `Agent stalled (${quietMin}m silent) — restarting turn`);
+                s.activity = s.recentActivity.join('\n');
+                emit();
+                killProc('SIGTERM');
+                // Keep polling: onExit resolves the wait and the final drains pick up
+                // whatever the dying process flushes.
+            }
+        }
         pollHandle = setTimeout(tick, POLL_INTERVAL_MS);
     };
     pollHandle = setTimeout(tick, POLL_INTERVAL_MS);

package/dist/agent/drivers/claude.js CHANGED Viewed

@@ -328,6 +328,8 @@ function ensureClaudeBgAgentState(s) {
         s.bgAgentLaunchedToolUseIds = new Set();
     if (!s.bgAgentCompletedToolUseIds)
         s.bgAgentCompletedToolUseIds = new Set();
+    if (!s.bgBashToolUseIds)
+        s.bgBashToolUseIds = new Set();
     if (!s.bgTaskIdToToolUse)
         s.bgTaskIdToToolUse = new Map();
     if (typeof s.lastTaskNotificationAt !== 'number')
@@ -341,7 +343,27 @@ export function registerClaudeBackgroundAgentLaunch(s, toolUseId) {
     ensureClaudeBgAgentState(s);
     s.bgAgentLaunchedToolUseIds.add(id);
 }
-/** Launched background agents whose <task-notification> hasn't arrived yet. */
+/**
+ * Record a `Bash` tool_use launched with `run_in_background: true`.
+ *
+ * Background Bash lives *inside the claude process* exactly like a
+ * backgrounded sub-agent: its tool_result is a launch ack, the real
+ * completion arrives later as a `<task-notification>` which re-invokes the
+ * model in the same process. Before this registration existed only Task/Agent
+ * launches counted as "pending background work" — a turn that backgrounded a
+ * Bash command would hit Stop, decideClaudeTuiStop saw pending=0 and
+ * terminated the PTY, killing the command and its future report-back turn
+ * (the「claude 后台任务一停止就被掐死」failure).
+ */
+export function registerClaudeBackgroundBashLaunch(s, toolUseId) {
+    const id = String(toolUseId || '').trim();
+    if (!id)
+        return;
+    ensureClaudeBgAgentState(s);
+    s.bgAgentLaunchedToolUseIds.add(id);
+    s.bgBashToolUseIds.add(id);
+}
+/** Launched background tasks (agents + bash) whose <task-notification> hasn't arrived yet. */
 export function pendingClaudeBackgroundAgentCount(s) {
     const launched = s?.bgAgentLaunchedToolUseIds;
     if (!launched?.size)
@@ -354,6 +376,51 @@ export function pendingClaudeBackgroundAgentCount(s) {
     }
     return pending;
 }
+/** Pending background *Bash* tasks specifically. Unlike agents (whose sidecar
+ *  JSONL keeps emitting events while alive), a background command is silent by
+ *  nature — callers use this to pick a longer hold/stall budget. */
+export function pendingClaudeBackgroundBashCount(s) {
+    const bash = s?.bgBashToolUseIds;
+    if (!bash?.size)
+        return 0;
+    const completed = s?.bgAgentCompletedToolUseIds;
+    let pending = 0;
+    for (const id of bash) {
+        if (!completed?.has(id))
+            pending++;
+    }
+    return pending;
+}
+/**
+ * Pull the background task id out of a launch ack. Claude Code's backgrounded
+ * Bash tool_result reads like "Command running in background with ID: bash_3
+ * (output: …)" — the id is what the later <task-notification> carries (its
+ * <tool-use-id> is often omitted for bash), so mapping id → tool_use here is
+ * what lets applyClaudeTaskNotification resolve the completion.
+ */
+export function extractClaudeBackgroundTaskId(content) {
+    let text = '';
+    if (typeof content === 'string')
+        text = content;
+    else if (Array.isArray(content)) {
+        text = content
+            .filter((b) => b?.type === 'text' && typeof b.text === 'string')
+            .map((b) => b.text)
+            .join('\n');
+    }
+    else if (content && typeof content === 'object') {
+        try {
+            text = JSON.stringify(content);
+        }
+        catch {
+            return null;
+        }
+    }
+    if (!text || !/background/i.test(text))
+        return null;
+    const m = text.match(/\b(?:ID|id)\s*[:：]?\s*[`"']?([A-Za-z0-9][A-Za-z0-9_-]{1,63})/);
+    return m ? m[1] : null;
+}
 /**
  * Parse a `<task-notification>` wrapper out of a user event's content.
  * Shape (observed, Claude Code 2.x):
@@ -583,6 +650,12 @@ export function claudeParse(ev, s) {
                 s.claudeToolsById.set(toolId, { name: toolName, summary: subAgent.description || 'Run task' });
                 continue;
             }
+            // Background Bash — same in-process lifecycle as a backgrounded agent:
+            // launch ack now, <task-notification> later. Register so the TUI driver
+            // holds the PTY open instead of SIGTERMing the command mid-flight.
+            if (toolName === 'Bash' && block?.input?.run_in_background === true) {
+                registerClaudeBackgroundBashLaunch(s, toolId);
+            }
             const tool = {
                 name: toolName,
                 summary: summarizeClaudeToolUse(block?.name, block?.input || {}),
@@ -667,6 +740,15 @@ export function claudeParse(ev, s) {
                 tool.result = previewToolCallResult(block?.content);
                 tool.status = block?.is_error ? 'failed' : 'done';
             }
+            // Background Bash launch ack → map its task id to the tool_use so the
+            // later <task-notification> (which usually omits <tool-use-id> for bash)
+            // can resolve and decrement the pending count.
+            if (tool?.name === 'Bash' && s.bgBashToolUseIds?.has(toolId)
+                && !s.bgAgentCompletedToolUseIds?.has(toolId)) {
+                const taskId = extractClaudeBackgroundTaskId(block?.content);
+                if (taskId && !s.bgTaskIdToToolUse.has(taskId))
+                    s.bgTaskIdToToolUse.set(taskId, toolId);
+            }
             pushRecentActivity(s.recentActivity, summarizeClaudeToolResult(tool, block, ev.tool_use_result));
             // MCP / Skill tool_result with multimodal content — recurse for image
             // entries so the final StreamResult carries them. Filesystem-reading
@@ -2275,8 +2357,53 @@ function makeOverloadFriendlyResult(result, reason, attempts) {
  * friendly human-readable explanation in `message` so the IM card doesn't
  * dump raw "API Error: Overloaded" text on the user.
  */
+/**
+ * Continuation prompt for stall recovery. The frozen process already accepted
+ * and partially executed the user's prompt (it sits in the transcript), so the
+ * resumed process must NOT receive the original prompt again — it gets an
+ * explicit "pick up where you left off" instead.
+ */
+const CLAUDE_STALL_RESUME_PROMPT = '[pikiclaw] The previous agent process stalled mid-turn and was restarted. '
+    + 'Continue the task from where it left off — do not start over or repeat work that already completed.';
+/** At most one automatic resume per turn; a second stall surfaces to the user. */
+const CLAUDE_STALL_RESUME_LIMIT = 1;
 async function doClaudeWithRetry(opts) {
     let lastResult = await doClaudeStreamOnce(opts);
+    // Mid-turn stall recovery. The TUI driver SIGTERMs a frozen claude process
+    // (stopReason 'stalled' — see decideClaudeTuiStall in claude-tui.ts) instead
+    // of letting the IM card spin forever. Resume the same session once with a
+    // continuation prompt so the turn picks up where the frozen process died.
+    let stallResumes = 0;
+    while (lastResult.stopReason === 'stalled'
+        && stallResumes < CLAUDE_STALL_RESUME_LIMIT
+        && !opts.abortSignal?.aborted) {
+        const stalledSessionId = lastResult.sessionId || opts.sessionId;
+        if (!stalledSessionId)
+            break;
+        stallResumes++;
+        agentWarn(`[claude] turn stalled mid-flight; auto-resuming session ${stalledSessionId.slice(0, 8)} (${stallResumes}/${CLAUDE_STALL_RESUME_LIMIT})`);
+        lastResult = await doClaudeStreamOnce({
+            ...opts,
+            sessionId: stalledSessionId,
+            forkOf: undefined,
+            prompt: CLAUDE_STALL_RESUME_PROMPT,
+            attachments: undefined,
+        });
+    }
+    if (lastResult.stopReason === 'stalled') {
+        // Still stalled after the resume budget (or no session id to resume).
+        // Surface a self-explanatory failure instead of the raw error text.
+        return {
+            ...lastResult,
+            ok: false,
+            incomplete: true,
+            message: [
+                'The agent process stalled mid-turn and could not be auto-recovered (known claude CLI freeze, seen on 2.1.160).',
+                'Your session is intact — re-send your message (or say "continue") to pick up where it stopped.',
+                'If this keeps happening, pin the claude CLI to a known-good version: npm install -g @anthropic-ai/claude-code@2.1.159',
+            ].join(' '),
+        };
+    }
     let attempts = 0;
     // Use the error text recorded by detectClaudeApiError-driven branches to
     // decide retry: lastResult.error is "Anthropic API error: <reason>" on

package/dist/core/constants.js CHANGED Viewed

@@ -287,6 +287,48 @@ export const AGENT_STREAM_HARD_KILL_GRACE_MS = 10_000;
  * resumed via --resume, can see it in the transcript.
  */
 export const AGENT_GRACEFUL_ABORT_GRACE_MS = 2_000;
+/**
+ * claude-tui stall watchdog — claude CLI is known to freeze mid-turn (observed
+ * 2026-06-02 on 2.1.160: after a tool_result lands, the next assistant segment
+ * never starts; the process stays alive, the JSONL goes permanently quiet, no
+ * Stop hook ever fires). When every live signal (main JSONL, hook tool events,
+ * sub-agent sidecars, hook lifecycle state) is silent past the threshold the
+ * driver SIGTERMs the PTY and the dispatch wrapper auto-resumes the session
+ * once. Quiet threshold must sit safely above the longest healthy gap between
+ * JSONL events — a single max-effort inference can take a few minutes before
+ * its first content block lands.
+ */
+export const CLAUDE_TUI_STALL_QUIET_MS = 10 * 60_000;
+/**
+ * Stall threshold while a hook-reported tool is still executing (PreToolUse
+ * seen, no matching PostToolUse). Claude's own Bash timeout caps foreground
+ * commands at ~10 minutes and fires PostToolUse either way, so a pending tool
+ * silent for this long means the freeze hit mid-execution.
+ */
+export const CLAUDE_TUI_STALL_PENDING_TOOL_MS = 30 * 60_000;
+/**
+ * Fast-path stall: a healthy claude TUI repaints continuously while a turn is
+ * in flight (spinner frames, stream ticks, status line) — the PTY never goes
+ * byte-silent for minutes. If NO PTY output arrives for this long AND every
+ * structured signal is equally quiet, the process event loop itself is gone
+ * (the 2.1.160 mid-turn freeze: attachment lands → next API call never
+ * assembles). Declare the stall now instead of waiting out the 10/30-minute
+ * quiet thresholds — turns a 10-30 分钟「卡死」into a ~3 分钟自愈。
+ * False-positive safe: long thinking / long Bash keep painting frames, which
+ * refreshes the PTY signal and defers this path to the slow thresholds.
+ */
+export const CLAUDE_TUI_STALL_PTY_DEAD_MS = 3 * 60_000;
+/**
+ * TTL for the post-Stop `hold-background` path. The hold protects
+ * run_in_background agents living inside the claude process — but a live
+ * agent keeps emitting hook/sidecar/JSONL traffic. If the hold sees no
+ * activity on ANY channel for this long, the pending count is phantom (lost
+ * <task-notification>, agents already finished): release as a NORMAL Stop.
+ * Without this TTL the stall watchdog eventually fires instead, mislabels the
+ * cleanly-finished turn 'stalled', and injects a confusing auto-resume prompt
+ * (the「回合明明答完了还被注入 Continue」symptom).
+ */
+export const CLAUDE_TUI_STOP_HOLD_QUIET_TTL_MS = 10 * 60_000;
 /** Codex-specific grace period added to the user-configured timeout. */
 export const CODEX_STREAM_HARD_KILL_GRACE_MS = 5_000;
 /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pikiclaw",
-  "version": "0.3.61",
+  "version": "0.3.63",
   "description": "Put the world's smartest AI agents in your pocket. Command local Claude & Gemini via IM. | 让最好用的 IM 变成你电脑上的顶级 Agent 控制台",
   "type": "module",
   "bin": {