pikiclaw 0.3.61 → 0.3.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,7 +43,7 @@ import { tmpdir } from 'node:os';
43
43
  import { Q, agentLog, agentWarn, buildStreamPreviewMeta, computeContext, joinErrorMessages, emitSessionIdUpdate, normalizeClaudeModelId, pushRecentActivity, summarizeClaudeToolUse, summarizeClaudeToolResult, previewToolCallInput, previewToolCallResult, detectClaudeApiError, } from '../utils.js';
44
44
  import { encodePathAsDirName, getHome, whichSync } from '../../core/platform.js';
45
45
  import { stripAnsiEscapes } from '../../core/utils.js';
46
- import { AGENT_STREAM_HARD_KILL_GRACE_MS } from '../../core/constants.js';
46
+ import { AGENT_STREAM_HARD_KILL_GRACE_MS, CLAUDE_TUI_STALL_QUIET_MS, CLAUDE_TUI_STALL_PENDING_TOOL_MS, } from '../../core/constants.js';
47
47
  import { claudeParse, createClaudeStreamState, claudeContextWindowFromModel, claudeEffectiveContextWindow, registerClaudeBackgroundAgentLaunch, pendingClaudeBackgroundAgentCount, } from './claude.js';
48
48
  async function loadPty() {
49
49
  // Dynamic import keeps node-pty an optional dependency — if it's not
@@ -643,6 +643,26 @@ export function decideClaudeTuiStop(input) {
643
643
  }
644
644
  return 'terminate';
645
645
  }
646
+ /**
647
+ * Decide whether the turn has gone dead. claude CLI is known to freeze
648
+ * mid-turn (observed 2026-06-02 on 2.1.160): after a tool_result lands the
649
+ * next assistant segment never starts — the process stays alive, the JSONL
650
+ * goes permanently quiet, no Stop hook ever fires, no error surfaces. Without
651
+ * a watchdog the IM card spins forever.
652
+ *
653
+ * `lastProgressAt` is the freshest of every live signal the driver tracks
654
+ * (main JSONL, hook tool events, sub-agent sidecars, hook lifecycle state).
655
+ * A pending tool (PreToolUse seen, no PostToolUse) extends the threshold:
656
+ * the freeze can also hit mid-execution, but a legitimately long foreground
657
+ * command must not get shot — claude's own Bash timeout fires PostToolUse
658
+ * well inside CLAUDE_TUI_STALL_PENDING_TOOL_MS.
659
+ */
660
+ export function decideClaudeTuiStall(input) {
661
+ const threshold = input.pendingToolCount > 0
662
+ ? (input.pendingToolMs ?? CLAUDE_TUI_STALL_PENDING_TOOL_MS)
663
+ : (input.quietMs ?? CLAUDE_TUI_STALL_QUIET_MS);
664
+ return input.now - input.lastProgressAt > threshold ? 'stall' : 'wait';
665
+ }
646
666
  // ---------------------------------------------------------------------------
647
667
  // Main entry
648
668
  // ---------------------------------------------------------------------------
@@ -981,6 +1001,14 @@ export async function doClaudeTuiStream(opts) {
981
1001
  // Last pending-background count we logged, so the waiting state logs on
982
1002
  // transitions instead of every 200ms poll tick.
983
1003
  let lastLoggedPendingBg = -1;
1004
+ // Stall-watchdog liveness signals. Together with lastMainJsonlEventAt they
1005
+ // answer "is the claude process still doing anything at all?" — see
1006
+ // decideClaudeTuiStall for why this exists (claude CLI mid-turn freeze).
1007
+ let lastToolEventAt = start;
1008
+ let lastSidecarEventAt = 0;
1009
+ let stallKilled = false;
1010
+ /** Hook-reported tools still executing: PreToolUse seen, no PostToolUse. */
1011
+ const pendingHookToolIds = new Set();
984
1012
  // Append-only tool-events log fed by PreToolUse / PostToolUse hooks. We
985
1013
  // tail it with the same incremental reader the JSONL transcript uses, so
986
1014
  // tool calls + plan changes surface live during the turn even while the
@@ -1004,6 +1032,18 @@ export async function doClaudeTuiStream(opts) {
1004
1032
  catch {
1005
1033
  continue;
1006
1034
  }
1035
+ // Stall-watchdog bookkeeping: any hook event is proof of life, and the
1036
+ // Pre/Post pairing tells the watchdog whether a tool is mid-execution
1037
+ // (which extends the stall threshold — long foreground commands are
1038
+ // legitimately silent).
1039
+ lastToolEventAt = Date.now();
1040
+ const hookToolId = typeof ev?.tool_use_id === 'string' ? ev.tool_use_id : '';
1041
+ if (hookToolId) {
1042
+ if (ev?.event === 'PreToolUse')
1043
+ pendingHookToolIds.add(hookToolId);
1044
+ else if (ev?.event === 'PostToolUse')
1045
+ pendingHookToolIds.delete(hookToolId);
1046
+ }
1007
1047
  // A Task PreToolUse and the first sub-agent tool PreToolUse can land in
1008
1048
  // the same tick batch. If the sub-agent's hook arrives before we've
1009
1049
  // discovered its sidecar (and thus before s.subAgentIdToParent knows
@@ -1105,6 +1145,10 @@ export async function doClaudeTuiStream(opts) {
1105
1145
  any = true;
1106
1146
  }
1107
1147
  }
1148
+ // Stall-watchdog: live sub-agents count as turn progress even while the
1149
+ // parent thread is quietly waiting on them.
1150
+ if (any)
1151
+ lastSidecarEventAt = Date.now();
1108
1152
  return any;
1109
1153
  };
1110
1154
  const tick = () => {
@@ -1248,6 +1292,35 @@ export async function doClaudeTuiStream(opts) {
1248
1292
  // Continue polling so any post-Stop JSONL writes still get parsed; the
1249
1293
  // process will exit shortly and onExit will resolve the wait.
1250
1294
  }
1295
+ // Stall watchdog. claude CLI can freeze mid-turn (observed on 2.1.160):
1296
+ // a tool_result lands, then the next assistant segment never starts — the
1297
+ // process stays alive, every signal goes quiet, no Stop hook ever fires.
1298
+ // When ALL liveness signals have been silent past the threshold, declare
1299
+ // the turn stalled and SIGTERM; doClaudeWithRetry auto-resumes the session
1300
+ // once so the turn continues instead of spinning forever in the IM card.
1301
+ if (!stopHookFired && !timedOut && !interrupted && !stallKilled) {
1302
+ const lastProgressAt = Math.max(start, lastMainJsonlEventAt, lastToolEventAt, lastSidecarEventAt, state.stoppedAt || 0, state.promptSubmittedAt || 0);
1303
+ const stallDecision = decideClaudeTuiStall({
1304
+ now: Date.now(),
1305
+ lastProgressAt,
1306
+ pendingToolCount: pendingHookToolIds.size,
1307
+ });
1308
+ if (stallDecision === 'stall') {
1309
+ stallKilled = true;
1310
+ const quietMin = Math.round((Date.now() - lastProgressAt) / 60_000);
1311
+ s.stopReason = 'stalled';
1312
+ if (!s.errors) {
1313
+ s.errors = [`Claude process went silent mid-turn for ${quietMin}m (no JSONL, hook, or sub-agent events) — known claude CLI freeze. Terminated for auto-resume.`];
1314
+ }
1315
+ agentWarn(`[claude-tui] stall detected: no progress for ${quietMin}m (pendingTools=${pendingHookToolIds.size}) — terminating TUI pid=${proc.pid} for auto-resume`);
1316
+ pushRecentActivity(s.recentActivity, `Agent stalled (${quietMin}m silent) — restarting turn`);
1317
+ s.activity = s.recentActivity.join('\n');
1318
+ emit();
1319
+ killProc('SIGTERM');
1320
+ // Keep polling: onExit resolves the wait and the final drains pick up
1321
+ // whatever the dying process flushes.
1322
+ }
1323
+ }
1251
1324
  pollHandle = setTimeout(tick, POLL_INTERVAL_MS);
1252
1325
  };
1253
1326
  pollHandle = setTimeout(tick, POLL_INTERVAL_MS);
@@ -2275,8 +2275,53 @@ function makeOverloadFriendlyResult(result, reason, attempts) {
2275
2275
  * friendly human-readable explanation in `message` so the IM card doesn't
2276
2276
  * dump raw "API Error: Overloaded" text on the user.
2277
2277
  */
2278
+ /**
2279
+ * Continuation prompt for stall recovery. The frozen process already accepted
2280
+ * and partially executed the user's prompt (it sits in the transcript), so the
2281
+ * resumed process must NOT receive the original prompt again — it gets an
2282
+ * explicit "pick up where you left off" instead.
2283
+ */
2284
+ const CLAUDE_STALL_RESUME_PROMPT = '[pikiclaw] The previous agent process stalled mid-turn and was restarted. '
2285
+ + 'Continue the task from where it left off — do not start over or repeat work that already completed.';
2286
+ /** At most one automatic resume per turn; a second stall surfaces to the user. */
2287
+ const CLAUDE_STALL_RESUME_LIMIT = 1;
2278
2288
  async function doClaudeWithRetry(opts) {
2279
2289
  let lastResult = await doClaudeStreamOnce(opts);
2290
+ // Mid-turn stall recovery. The TUI driver SIGTERMs a frozen claude process
2291
+ // (stopReason 'stalled' — see decideClaudeTuiStall in claude-tui.ts) instead
2292
+ // of letting the IM card spin forever. Resume the same session once with a
2293
+ // continuation prompt so the turn picks up where the frozen process died.
2294
+ let stallResumes = 0;
2295
+ while (lastResult.stopReason === 'stalled'
2296
+ && stallResumes < CLAUDE_STALL_RESUME_LIMIT
2297
+ && !opts.abortSignal?.aborted) {
2298
+ const stalledSessionId = lastResult.sessionId || opts.sessionId;
2299
+ if (!stalledSessionId)
2300
+ break;
2301
+ stallResumes++;
2302
+ agentWarn(`[claude] turn stalled mid-flight; auto-resuming session ${stalledSessionId.slice(0, 8)} (${stallResumes}/${CLAUDE_STALL_RESUME_LIMIT})`);
2303
+ lastResult = await doClaudeStreamOnce({
2304
+ ...opts,
2305
+ sessionId: stalledSessionId,
2306
+ forkOf: undefined,
2307
+ prompt: CLAUDE_STALL_RESUME_PROMPT,
2308
+ attachments: undefined,
2309
+ });
2310
+ }
2311
+ if (lastResult.stopReason === 'stalled') {
2312
+ // Still stalled after the resume budget (or no session id to resume).
2313
+ // Surface a self-explanatory failure instead of the raw error text.
2314
+ return {
2315
+ ...lastResult,
2316
+ ok: false,
2317
+ incomplete: true,
2318
+ message: [
2319
+ 'The agent process stalled mid-turn and could not be auto-recovered (known claude CLI freeze, seen on 2.1.160).',
2320
+ 'Your session is intact — re-send your message (or say "continue") to pick up where it stopped.',
2321
+ 'If this keeps happening, pin the claude CLI to a known-good version: npm install -g @anthropic-ai/claude-code@2.1.159',
2322
+ ].join(' '),
2323
+ };
2324
+ }
2280
2325
  let attempts = 0;
2281
2326
  // Use the error text recorded by detectClaudeApiError-driven branches to
2282
2327
  // decide retry: lastResult.error is "Anthropic API error: <reason>" on
@@ -287,6 +287,25 @@ export const AGENT_STREAM_HARD_KILL_GRACE_MS = 10_000;
287
287
  * resumed via --resume, can see it in the transcript.
288
288
  */
289
289
  export const AGENT_GRACEFUL_ABORT_GRACE_MS = 2_000;
290
+ /**
291
+ * claude-tui stall watchdog — claude CLI is known to freeze mid-turn (observed
292
+ * 2026-06-02 on 2.1.160: after a tool_result lands, the next assistant segment
293
+ * never starts; the process stays alive, the JSONL goes permanently quiet, no
294
+ * Stop hook ever fires). When every live signal (main JSONL, hook tool events,
295
+ * sub-agent sidecars, hook lifecycle state) is silent past the threshold the
296
+ * driver SIGTERMs the PTY and the dispatch wrapper auto-resumes the session
297
+ * once. Quiet threshold must sit safely above the longest healthy gap between
298
+ * JSONL events — a single max-effort inference can take a few minutes before
299
+ * its first content block lands.
300
+ */
301
+ export const CLAUDE_TUI_STALL_QUIET_MS = 10 * 60_000;
302
+ /**
303
+ * Stall threshold while a hook-reported tool is still executing (PreToolUse
304
+ * seen, no matching PostToolUse). Claude's own Bash timeout caps foreground
305
+ * commands at ~10 minutes and fires PostToolUse either way, so a pending tool
306
+ * silent for this long means the freeze hit mid-execution.
307
+ */
308
+ export const CLAUDE_TUI_STALL_PENDING_TOOL_MS = 30 * 60_000;
290
309
  /** Codex-specific grace period added to the user-configured timeout. */
291
310
  export const CODEX_STREAM_HARD_KILL_GRACE_MS = 5_000;
292
311
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pikiclaw",
3
- "version": "0.3.61",
3
+ "version": "0.3.62",
4
4
  "description": "Put the world's smartest AI agents in your pocket. Command local Claude & Gemini via IM. | 让最好用的 IM 变成你电脑上的顶级 Agent 控制台",
5
5
  "type": "module",
6
6
  "bin": {