@visorcraft/idlehands 1.0.5 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent.js CHANGED
@@ -1766,7 +1766,12 @@ export async function createSession(opts) {
1766
1766
  let noToolTurns = 0;
1767
1767
  const NO_TOOL_REPROMPT_THRESHOLD = 2;
1768
1768
  let repromptUsed = false;
1769
- let blockedPackageInstallAttempts = 0;
1769
+ // Track blocked command loops by exact reason+command signature.
1770
+ const blockedExecAttemptsBySig = new Map();
1771
+ // Keep a lightweight breadcrumb for diagnostics on partial failures.
1772
+ let lastSuccessfulTestRun = null;
1773
+ // One-time nudge to prevent post-success churn after green test runs.
1774
+ let finalizeAfterTestsNudgeUsed = false;
1770
1775
  const maybeInjectVaultContext = async () => {
1771
1776
  if (!vault || vaultMode !== 'passive')
1772
1777
  return;
@@ -2363,7 +2368,25 @@ export async function createSession(opts) {
2363
2368
  const value = await builtInFn(ctx, args);
2364
2369
  content = typeof value === 'string' ? value : JSON.stringify(value);
2365
2370
  if (name === 'exec') {
2366
- blockedPackageInstallAttempts = 0;
2371
+ // Successful exec clears blocked-loop counters.
2372
+ blockedExecAttemptsBySig.clear();
2373
+ // Capture successful test runs for better partial-failure diagnostics.
2374
+ try {
2375
+ const parsed = JSON.parse(content);
2376
+ const cmd = String(args?.command ?? '');
2377
+ const out = String(parsed?.out ?? '');
2378
+ const rc = Number(parsed?.rc ?? NaN);
2379
+ const looksLikeTest = /(^|\s)(node\s+--test|npm\s+test|pnpm\s+test|yarn\s+test|pytest|go\s+test|cargo\s+test|ctest)(\s|$)/i.test(cmd);
2380
+ if (looksLikeTest && Number.isFinite(rc) && rc === 0) {
2381
+ lastSuccessfulTestRun = {
2382
+ command: cmd,
2383
+ outputPreview: out.slice(0, 400),
2384
+ };
2385
+ }
2386
+ }
2387
+ catch {
2388
+ // Ignore parse issues; non-JSON exec output is tolerated.
2389
+ }
2367
2390
  }
2368
2391
  }
2369
2392
  else if (isLspTool && lspManager) {
@@ -2466,14 +2489,32 @@ export async function createSession(opts) {
2466
2489
  if (e instanceof AgentLoopBreak)
2467
2490
  throw e;
2468
2491
  const msg = e?.message ?? String(e);
2469
- // Fast-fail package-install bypass loops in non-yolo modes.
2492
+ // Fast-fail repeated blocked command loops with accurate reason labeling.
2470
2493
  // Applies to direct exec attempts and spawn_task delegation attempts.
2471
- if ((tc.function.name === 'exec' || tc.function.name === 'spawn_task') &&
2472
- /package install\/remove.*(?:blocked|restricted)|without --no-confirm\/--yolo/i.test(msg)) {
2473
- blockedPackageInstallAttempts += 1;
2474
- if (blockedPackageInstallAttempts >= 2) {
2475
- throw new AgentLoopBreak(`${tc.function.name}: repeated blocked package-install attempts in current approval mode. ` +
2476
- 'Do not retry or delegate this. Continue with a zero-dependency path, or ask the user to restart with --no-confirm/--yolo.');
2494
+ if (tc.function.name === 'exec' || tc.function.name === 'spawn_task') {
2495
+ const blockedMatch = msg.match(/^exec:\s*blocked\s*\(([^)]+)\)\s*without --no-confirm\/--yolo:\s*(.*)$/i)
2496
+ || msg.match(/^(spawn_task):\s*blocked\s*—\s*(.*)$/i);
2497
+ if (blockedMatch) {
2498
+ const reason = (blockedMatch[1] || blockedMatch[2] || 'blocked command').trim();
2499
+ let parsedArgs = {};
2500
+ try {
2501
+ parsedArgs = JSON.parse(tc.function.arguments ?? '{}');
2502
+ }
2503
+ catch { }
2504
+ const cmd = tc.function.name === 'exec'
2505
+ ? String(parsedArgs?.command ?? '')
2506
+ : String(parsedArgs?.task ?? '');
2507
+ const normalizedReason = reason.toLowerCase();
2508
+ const aggregateByReason = normalizedReason.includes('package install/remove');
2509
+ const sig = aggregateByReason
2510
+ ? `${tc.function.name}|${reason}`
2511
+ : `${tc.function.name}|${reason}|${cmd}`;
2512
+ const count = (blockedExecAttemptsBySig.get(sig) ?? 0) + 1;
2513
+ blockedExecAttemptsBySig.set(sig, count);
2514
+ if (count >= 2) {
2515
+ throw new AgentLoopBreak(`${tc.function.name}: repeated blocked command attempts (${reason}) in current approval mode. ` +
2516
+ 'Do not retry the same blocked command. Choose a safe alternative, skip cleanup, or ask the user to restart with --no-confirm/--yolo.');
2517
+ }
2477
2518
  }
2478
2519
  }
2479
2520
  // Hook: onToolResult for errors (Phase 8.5)
@@ -2541,6 +2582,16 @@ export async function createSession(opts) {
2541
2582
  for (const r of results) {
2542
2583
  messages.push({ role: 'tool', tool_call_id: r.id, content: r.content });
2543
2584
  }
2585
+ // If tests are green and we've already made edits, nudge for final summary
2586
+ // once to avoid extra non-essential demo/cleanup turns.
2587
+ if (!finalizeAfterTestsNudgeUsed && lastSuccessfulTestRun && mutationVersion > 0) {
2588
+ finalizeAfterTestsNudgeUsed = true;
2589
+ messages.push({
2590
+ role: 'user',
2591
+ content: '[system] Tests passed successfully. If the requested work is complete, provide the final summary now and stop. ' +
2592
+ 'Only continue with additional commands if the user explicitly requested extra demos/cleanup.',
2593
+ });
2594
+ }
2544
2595
  // ── Escalating cumulative read budget (§ anti-scan guardrails) ──
2545
2596
  // Warn zone: append warnings to each read result when approaching the hard cap
2546
2597
  if (cumulativeReadOnlyCalls > READ_BUDGET_WARN && cumulativeReadOnlyCalls <= READ_BUDGET_HARD) {
@@ -2646,7 +2697,10 @@ export async function createSession(opts) {
2646
2697
  return { text: assistantText, turns, toolCalls };
2647
2698
  }
2648
2699
  const reason = `max iterations exceeded (${maxIters})`;
2649
- throw new Error(reason);
2700
+ const diag = lastSuccessfulTestRun
2701
+ ? ` Last successful test run: ${lastSuccessfulTestRun.command}`
2702
+ : '';
2703
+ throw new Error(reason + diag);
2650
2704
  }
2651
2705
  catch (e) {
2652
2706
  // Some code paths (or upstream libs) may incorrectly throw `undefined`.
@@ -2669,6 +2723,10 @@ export async function createSession(opts) {
2669
2723
  throw err;
2670
2724
  }
2671
2725
  await persistFailure(e, `ask turn ${turns}`);
2726
+ const lastTestCmd = lastSuccessfulTestRun?.command;
2727
+ if (e instanceof AgentLoopBreak && lastTestCmd) {
2728
+ e.message += `\n[diagnostic] last successful test run: ${lastTestCmd}`;
2729
+ }
2672
2730
  // Never rethrow undefined; normalize to Error for debuggability.
2673
2731
  if (e === undefined) {
2674
2732
  throw new Error('BUG: threw undefined (normalized at ask() boundary)');