@visorcraft/idlehands 1.1.3 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent.js CHANGED
@@ -14,7 +14,8 @@ import { MCPManager } from './mcp.js';
14
14
  import { LspManager, detectInstalledLspServers } from './lsp.js';
15
15
  import fs from 'node:fs/promises';
16
16
  import path from 'node:path';
17
- import { stateDir } from './utils.js';
17
+ import { spawnSync } from 'node:child_process';
18
+ import { stateDir, BASH_PATH as BASH } from './utils.js';
18
19
  function makeAbortController() {
19
20
  // Node 24: AbortController is global.
20
21
  return new AbortController();
@@ -115,6 +116,66 @@ function toolResultSummary(name, args, content, success) {
115
116
  return content.slice(0, 80);
116
117
  }
117
118
  }
119
+ const CACHED_EXEC_OBSERVATION_HINT = '[idlehands hint] Reused cached output for repeated read-only exec call (unchanged observation).';
120
+ function execCommandFromSig(sig) {
121
+ if (!sig.startsWith('exec:'))
122
+ return '';
123
+ const raw = sig.slice('exec:'.length);
124
+ try {
125
+ const parsed = JSON.parse(raw);
126
+ return typeof parsed?.command === 'string' ? parsed.command : '';
127
+ }
128
+ catch {
129
+ return '';
130
+ }
131
+ }
132
+ function looksLikeReadOnlyExecCommand(command) {
133
+ const cmd = String(command || '').trim().toLowerCase();
134
+ if (!cmd)
135
+ return false;
136
+ // Shell redirects are likely writes.
137
+ if (/(^|\s)(?:>>?|<<?)\s*/.test(cmd))
138
+ return false;
139
+ // Obvious mutators.
140
+ if (/\b(?:rm|mv|cp|touch|mkdir|rmdir|chmod|chown|truncate|dd)\b/.test(cmd))
141
+ return false;
142
+ if (/\b(?:sed|perl)\b[^\n]*\s-i\b/.test(cmd))
143
+ return false;
144
+ if (/\btee\b/.test(cmd))
145
+ return false;
146
+ // Git: allow common read-only subcommands, block mutating verbs.
147
+ if (/\bgit\b/.test(cmd)) {
148
+ if (/\bgit\b[^\n|;&]*\b(?:add|am|apply|bisect|checkout|switch|clean|clone|commit|fetch|merge|pull|push|rebase|reset|revert|stash)\b/.test(cmd)) {
149
+ return false;
150
+ }
151
+ if (/\bgit\b[^\n|;&]*\b(?:log|show|status|diff|rev-parse|branch(?:\s+--list)?|tag(?:\s+--list)?|ls-files|grep)\b/.test(cmd)) {
152
+ return true;
153
+ }
154
+ }
155
+ if (/^\s*(?:grep|rg|ag|ack|find|ls|cat|head|tail|wc|stat)\b/.test(cmd))
156
+ return true;
157
+ if (/\|\s*(?:grep|rg|ag|ack)\b/.test(cmd))
158
+ return true;
159
+ return false;
160
+ }
161
+ function withCachedExecObservationHint(content) {
162
+ if (!content)
163
+ return content;
164
+ try {
165
+ const parsed = JSON.parse(content);
166
+ const out = typeof parsed?.out === 'string' ? parsed.out : '';
167
+ if (out.includes(CACHED_EXEC_OBSERVATION_HINT))
168
+ return content;
169
+ parsed.out = out ? `${out}\n${CACHED_EXEC_OBSERVATION_HINT}` : CACHED_EXEC_OBSERVATION_HINT;
170
+ parsed.cached_observation = true;
171
+ return JSON.stringify(parsed);
172
+ }
173
+ catch {
174
+ if (content.includes(CACHED_EXEC_OBSERVATION_HINT))
175
+ return content;
176
+ return `${content}\n${CACHED_EXEC_OBSERVATION_HINT}`;
177
+ }
178
+ }
118
179
  /** Errors that should break the outer agent loop, not be caught by per-tool handlers */
119
180
  class AgentLoopBreak extends Error {
120
181
  constructor(message) {
@@ -766,6 +827,123 @@ function userDisallowsDelegation(content) {
766
827
  /\b(?:spawn[_\-\s]?task|sub[\-\s]?agents?|delegate|delegation)\b[^\n.]{0,50}\b(?:do not|don't|dont|not allowed|forbidden|no)\b/.test(text);
767
828
  return negationNearDelegation;
768
829
  }
830
+ function reviewArtifactKeys(projectDir) {
831
+ const { projectId } = projectIndexKeys(projectDir);
832
+ return {
833
+ projectId,
834
+ latestKey: `artifact:review:latest:${projectId}`,
835
+ byIdPrefix: `artifact:review:item:${projectId}:`,
836
+ };
837
+ }
838
+ function looksLikeCodeReviewRequest(text) {
839
+ const t = text.toLowerCase();
840
+ if (!t.trim())
841
+ return false;
842
+ if (/^\s*\/review\b/.test(t))
843
+ return true;
844
+ if (/\b(?:code\s+review|security\s+review|review\s+the\s+(?:code|diff|changes|repo|repository|pr)|audit\s+the\s+code)\b/.test(t))
845
+ return true;
846
+ return /\breview\b/.test(t) && /\b(?:code|repo|repository|diff|changes|pull\s*request|pr)\b/.test(t);
847
+ }
848
+ function looksLikeReviewRetrievalRequest(text) {
849
+ const t = text.toLowerCase();
850
+ if (!t.trim())
851
+ return false;
852
+ if (/^\s*\/review\s+(?:print|show|replay|latest|last|full)\b/.test(t))
853
+ return true;
854
+ if (!/\breview\b/.test(t))
855
+ return false;
856
+ if (/\bprint\s+stale\s+review\s+anyway\b/.test(t))
857
+ return true;
858
+ if (/\b(?:print|show|display|repeat|paste|send|output|give)\b[^\n.]{0,80}\breview\b[^\n.]{0,40}\b(?:again|back)\b/.test(t))
859
+ return true;
860
+ if (/\b(?:print|show|display|repeat|paste|send|output|give)\b[^\n.]{0,80}\b(?:full|entire|complete|whole)\b[^\n.]{0,80}\breview\b/.test(t))
861
+ return true;
862
+ if (/\b(?:full|entire|complete|whole)\b[^\n.]{0,30}\bcode\s+review\b/.test(t) && /\b(?:print|show|display|repeat|paste|send|output|give)\b/.test(t))
863
+ return true;
864
+ if (/\b(?:print|show|display|repeat|paste|send|output|give)\b[^\n.]{0,80}\b(?:last|latest|previous)\b[^\n.]{0,40}\breview\b/.test(t))
865
+ return true;
866
+ return false;
867
+ }
868
+ function retrievalAllowsStaleArtifact(text) {
869
+ const t = text.toLowerCase();
870
+ if (!t.trim())
871
+ return false;
872
+ if (/\bprint\s+stale\s+review\s+anyway\b/.test(t))
873
+ return true;
874
+ if (/\b(?:force|override|ignore)\b[^\n.]{0,80}\b(?:stale|old|previous)\b[^\n.]{0,80}\breview\b/.test(t))
875
+ return true;
876
+ if (/\b(?:stale|old|previous)\b[^\n.]{0,80}\breview\b[^\n.]{0,80}\b(?:anyway|still|force|override|ignore)\b/.test(t))
877
+ return true;
878
+ return false;
879
+ }
880
+ function parseReviewArtifactStalePolicy(raw) {
881
+ const v = typeof raw === 'string' ? raw.toLowerCase().trim() : '';
882
+ if (v === 'block')
883
+ return 'block';
884
+ return 'warn';
885
+ }
886
+ function parseReviewArtifact(raw) {
887
+ try {
888
+ const parsed = JSON.parse(raw);
889
+ if (!parsed || typeof parsed !== 'object')
890
+ return null;
891
+ if (parsed.kind !== 'code_review')
892
+ return null;
893
+ if (typeof parsed.id !== 'string' || !parsed.id)
894
+ return null;
895
+ if (typeof parsed.createdAt !== 'string' || !parsed.createdAt)
896
+ return null;
897
+ if (typeof parsed.model !== 'string')
898
+ return null;
899
+ if (typeof parsed.projectId !== 'string' || !parsed.projectId)
900
+ return null;
901
+ if (typeof parsed.projectDir !== 'string' || !parsed.projectDir)
902
+ return null;
903
+ if (typeof parsed.prompt !== 'string')
904
+ return null;
905
+ if (typeof parsed.content !== 'string')
906
+ return null;
907
+ return parsed;
908
+ }
909
+ catch {
910
+ return null;
911
+ }
912
+ }
913
+ function gitHead(cwd) {
914
+ const inside = spawnSync(BASH, ['-lc', 'git rev-parse --is-inside-work-tree'], {
915
+ cwd,
916
+ encoding: 'utf8',
917
+ timeout: 1000,
918
+ });
919
+ if (inside.status !== 0 || !String(inside.stdout || '').trim().startsWith('true'))
920
+ return undefined;
921
+ const head = spawnSync(BASH, ['-lc', 'git rev-parse HEAD'], {
922
+ cwd,
923
+ encoding: 'utf8',
924
+ timeout: 1000,
925
+ });
926
+ if (head.status !== 0)
927
+ return undefined;
928
+ const sha = String(head.stdout || '').trim();
929
+ return sha || undefined;
930
+ }
931
+ function shortSha(sha) {
932
+ if (!sha)
933
+ return 'unknown';
934
+ return sha.slice(0, 8);
935
+ }
936
+ function reviewArtifactStaleReason(artifact, cwd) {
937
+ const currentHead = gitHead(cwd);
938
+ const currentDirty = isGitDirty(cwd);
939
+ if (artifact.gitHead && currentHead && artifact.gitHead !== currentHead) {
940
+ return `Stored review was generated at commit ${shortSha(artifact.gitHead)}; repository is now at ${shortSha(currentHead)}.`;
941
+ }
942
+ if (artifact.gitDirty === false && currentDirty) {
943
+ return 'Stored review was generated on a clean tree; working tree now has uncommitted changes.';
944
+ }
945
+ return '';
946
+ }
769
947
  function supportsVisionModel(model, modelMeta, harness) {
770
948
  if (typeof harness.supportsVision === 'boolean')
771
949
  return harness.supportsVision;
@@ -917,7 +1095,11 @@ export async function createSession(opts) {
917
1095
  mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
918
1096
  allowSpawnTask: spawnTaskEnabled,
919
1097
  });
920
- const vault = vaultEnabled ? (opts.runtime?.vault ?? new VaultStore()) : undefined;
1098
+ const vault = vaultEnabled
1099
+ ? (opts.runtime?.vault ?? new VaultStore({
1100
+ immutableReviewArtifactsPerProject: cfg?.trifecta?.vault?.immutable_review_artifacts_per_project,
1101
+ }))
1102
+ : undefined;
921
1103
  if (vault) {
922
1104
  // Scope vault entries by project directory to prevent cross-project context leaks
923
1105
  vault.setProjectDir(cfg.dir ?? process.cwd());
@@ -1825,6 +2007,88 @@ export async function createSession(opts) {
1825
2007
  const hookObj = typeof hooks === 'function' ? { onToken: hooks } : hooks ?? {};
1826
2008
  let turns = 0;
1827
2009
  let toolCalls = 0;
2010
+ const rawInstructionText = userContentToText(instruction).trim();
2011
+ const projectDir = cfg.dir ?? process.cwd();
2012
+ const reviewKeys = reviewArtifactKeys(projectDir);
2013
+ const retrievalRequested = looksLikeReviewRetrievalRequest(rawInstructionText);
2014
+ const shouldPersistReviewArtifact = looksLikeCodeReviewRequest(rawInstructionText) && !retrievalRequested;
2015
+ if (retrievalRequested) {
2016
+ const latest = vault
2017
+ ? await vault.getLatestByKey(reviewKeys.latestKey, 'system').catch(() => null)
2018
+ : null;
2019
+ const parsedArtifact = latest?.value ? parseReviewArtifact(latest.value) : null;
2020
+ const artifact = parsedArtifact && parsedArtifact.projectId === reviewKeys.projectId
2021
+ ? parsedArtifact
2022
+ : null;
2023
+ if (artifact?.content?.trim()) {
2024
+ const stale = reviewArtifactStaleReason(artifact, projectDir);
2025
+ const stalePolicy = parseReviewArtifactStalePolicy(cfg?.trifecta?.vault?.stale_policy);
2026
+ if (stale && stalePolicy === 'block' && !retrievalAllowsStaleArtifact(rawInstructionText)) {
2027
+ const blocked = `Stored review is stale and retrieval policy is set to block. ${stale}\n` +
2028
+ 'Reply with "print stale review anyway" to override, or request a fresh review.';
2029
+ messages.push({ role: 'assistant', content: blocked });
2030
+ hookObj.onToken?.(blocked);
2031
+ await hookObj.onTurnEnd?.({
2032
+ turn: turns,
2033
+ toolCalls,
2034
+ promptTokens: cumulativeUsage.prompt,
2035
+ completionTokens: cumulativeUsage.completion,
2036
+ });
2037
+ return { text: blocked, turns, toolCalls };
2038
+ }
2039
+ const text = stale
2040
+ ? `${artifact.content}\n\n[artifact note] ${stale}`
2041
+ : artifact.content;
2042
+ messages.push({ role: 'assistant', content: text });
2043
+ hookObj.onToken?.(text);
2044
+ await hookObj.onTurnEnd?.({
2045
+ turn: turns,
2046
+ toolCalls,
2047
+ promptTokens: cumulativeUsage.prompt,
2048
+ completionTokens: cumulativeUsage.completion,
2049
+ });
2050
+ return { text, turns, toolCalls };
2051
+ }
2052
+ const miss = 'No stored full code review found yet. Ask me to run a code review first, then I can replay it verbatim.';
2053
+ messages.push({ role: 'assistant', content: miss });
2054
+ hookObj.onToken?.(miss);
2055
+ await hookObj.onTurnEnd?.({
2056
+ turn: turns,
2057
+ toolCalls,
2058
+ promptTokens: cumulativeUsage.prompt,
2059
+ completionTokens: cumulativeUsage.completion,
2060
+ });
2061
+ return { text: miss, turns, toolCalls };
2062
+ }
2063
+ const persistReviewArtifact = async (finalText) => {
2064
+ if (!vault || !shouldPersistReviewArtifact)
2065
+ return;
2066
+ const clean = finalText.trim();
2067
+ if (!clean)
2068
+ return;
2069
+ const createdAt = new Date().toISOString();
2070
+ const id = `review-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
2071
+ const artifact = {
2072
+ id,
2073
+ kind: 'code_review',
2074
+ createdAt,
2075
+ model,
2076
+ projectId: reviewKeys.projectId,
2077
+ projectDir,
2078
+ prompt: rawInstructionText.slice(0, 2000),
2079
+ content: clean,
2080
+ gitHead: gitHead(projectDir),
2081
+ gitDirty: isGitDirty(projectDir),
2082
+ };
2083
+ try {
2084
+ const raw = JSON.stringify(artifact);
2085
+ await vault.upsertNote(reviewKeys.latestKey, raw, 'system');
2086
+ await vault.upsertNote(`${reviewKeys.byIdPrefix}${artifact.id}`, raw, 'system');
2087
+ }
2088
+ catch {
2089
+ // best effort only
2090
+ }
2091
+ };
1828
2092
  // Read-only tool call budgets (§ anti-scan guardrails)
1829
2093
  const READ_ONLY_PER_TURN_CAP = 6;
1830
2094
  const READ_BUDGET_WARN = 15;
@@ -1855,6 +2119,10 @@ export async function createSession(opts) {
1855
2119
  let repromptUsed = false;
1856
2120
  // Track blocked command loops by exact reason+command signature.
1857
2121
  const blockedExecAttemptsBySig = new Map();
2122
+ // Cache successful read-only exec observations by exact signature.
2123
+ const execObservationCacheBySig = new Map();
2124
+ // Prevent repeating the same "stop rerunning" reminder every turn.
2125
+ const readOnlyExecHintedSigs = new Set();
1858
2126
  // Keep a lightweight breadcrumb for diagnostics on partial failures.
1859
2127
  let lastSuccessfulTestRun = null;
1860
2128
  // One-time nudge to prevent post-success churn after green test runs.
@@ -2236,6 +2504,9 @@ export async function createSession(opts) {
2236
2504
  const sig = `${tc.function.name}:${tc.function.arguments ?? '{}'}`;
2237
2505
  turnSigs.add(sig);
2238
2506
  }
2507
+ // Repeated read-only exec calls can be served from cache instead of hard-breaking.
2508
+ const repeatedReadOnlyExecSigs = new Set();
2509
+ const readOnlyExecTurnHints = [];
2239
2510
  // Track whether a mutation happened since a given signature was last seen.
2240
2511
  // (Tool-loop is single-threaded across turns; this is safe to keep in-memory.)
2241
2512
  for (const sig of turnSigs) {
@@ -2257,6 +2528,17 @@ export async function createSession(opts) {
2257
2528
  await injectVaultContext().catch(() => { });
2258
2529
  }
2259
2530
  if (count >= loopThreshold) {
2531
+ const command = execCommandFromSig(sig);
2532
+ const canReuseReadOnlyObservation = looksLikeReadOnlyExecCommand(command) &&
2533
+ execObservationCacheBySig.has(sig);
2534
+ if (canReuseReadOnlyObservation) {
2535
+ repeatedReadOnlyExecSigs.add(sig);
2536
+ if (!readOnlyExecHintedSigs.has(sig)) {
2537
+ readOnlyExecHintedSigs.add(sig);
2538
+ readOnlyExecTurnHints.push(command || 'exec command');
2539
+ }
2540
+ continue;
2541
+ }
2260
2542
  const args = sig.slice(toolName.length + 1);
2261
2543
  const argsPreview = args.length > 220 ? args.slice(0, 220) + '…' : args;
2262
2544
  throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
@@ -2425,72 +2707,88 @@ export async function createSession(opts) {
2425
2707
  return { id: callId, content: '[skipped by user: step mode]' };
2426
2708
  }
2427
2709
  }
2710
+ const sig = `${name}:${rawArgs || '{}'}`;
2428
2711
  let content = '';
2429
- if (isSpawnTask) {
2430
- content = await runSpawnTask(args);
2712
+ let reusedCachedReadOnlyExec = false;
2713
+ if (name === 'exec' && repeatedReadOnlyExecSigs.has(sig)) {
2714
+ const cached = execObservationCacheBySig.get(sig);
2715
+ if (cached) {
2716
+ content = withCachedExecObservationHint(cached);
2717
+ reusedCachedReadOnlyExec = true;
2718
+ }
2431
2719
  }
2432
- else if (builtInFn) {
2433
- const value = await builtInFn(ctx, args);
2434
- content = typeof value === 'string' ? value : JSON.stringify(value);
2435
- if (name === 'exec') {
2436
- // Successful exec clears blocked-loop counters.
2437
- blockedExecAttemptsBySig.clear();
2438
- // Capture successful test runs for better partial-failure diagnostics.
2439
- try {
2440
- const parsed = JSON.parse(content);
2720
+ if (!reusedCachedReadOnlyExec) {
2721
+ if (isSpawnTask) {
2722
+ content = await runSpawnTask(args);
2723
+ }
2724
+ else if (builtInFn) {
2725
+ const value = await builtInFn(ctx, args);
2726
+ content = typeof value === 'string' ? value : JSON.stringify(value);
2727
+ if (name === 'exec') {
2728
+ // Successful exec clears blocked-loop counters.
2729
+ blockedExecAttemptsBySig.clear();
2441
2730
  const cmd = String(args?.command ?? '');
2442
- const out = String(parsed?.out ?? '');
2443
- const rc = Number(parsed?.rc ?? NaN);
2444
- const looksLikeTest = /(^|\s)(node\s+--test|npm\s+test|pnpm\s+test|yarn\s+test|pytest|go\s+test|cargo\s+test|ctest)(\s|$)/i.test(cmd);
2445
- if (looksLikeTest && Number.isFinite(rc) && rc === 0) {
2446
- lastSuccessfulTestRun = {
2447
- command: cmd,
2448
- outputPreview: out.slice(0, 400),
2449
- };
2731
+ if (looksLikeReadOnlyExecCommand(cmd)) {
2732
+ execObservationCacheBySig.set(sig, content);
2733
+ }
2734
+ // Capture successful test runs for better partial-failure diagnostics.
2735
+ try {
2736
+ const parsed = JSON.parse(content);
2737
+ const out = String(parsed?.out ?? '');
2738
+ const rc = Number(parsed?.rc ?? NaN);
2739
+ const looksLikeTest = /(^|\s)(node\s+--test|npm\s+test|pnpm\s+test|yarn\s+test|pytest|go\s+test|cargo\s+test|ctest)(\s|$)/i.test(cmd);
2740
+ if (looksLikeTest && Number.isFinite(rc) && rc === 0) {
2741
+ lastSuccessfulTestRun = {
2742
+ command: cmd,
2743
+ outputPreview: out.slice(0, 400),
2744
+ };
2745
+ }
2746
+ }
2747
+ catch {
2748
+ // Ignore parse issues; non-JSON exec output is tolerated.
2450
2749
  }
2451
2750
  }
2452
- catch {
2453
- // Ignore parse issues; non-JSON exec output is tolerated.
2454
- }
2455
- }
2456
- }
2457
- else if (isLspTool && lspManager) {
2458
- // LSP tool dispatch
2459
- if (name === 'lsp_diagnostics') {
2460
- content = await lspManager.getDiagnostics(typeof args.path === 'string' ? args.path : undefined, typeof args.severity === 'number' ? args.severity : undefined);
2461
- }
2462
- else if (name === 'lsp_symbols') {
2463
- content = await buildLspLensSymbolOutput(String(args.path ?? ''));
2464
2751
  }
2465
- else if (name === 'lsp_hover') {
2466
- content = await lspManager.getHover(String(args.path ?? ''), Number(args.line ?? 0), Number(args.character ?? 0));
2467
- }
2468
- else if (name === 'lsp_definition') {
2469
- content = await lspManager.getDefinition(String(args.path ?? ''), Number(args.line ?? 0), Number(args.character ?? 0));
2470
- }
2471
- else if (name === 'lsp_references') {
2472
- content = await lspManager.getReferences(String(args.path ?? ''), Number(args.line ?? 0), Number(args.character ?? 0), typeof args.max_results === 'number' ? args.max_results : 50);
2473
- }
2474
- }
2475
- else {
2476
- if (mcpManager == null) {
2477
- throw new Error(`unknown tool: ${name}`);
2752
+ else if (isLspTool && lspManager) {
2753
+ // LSP tool dispatch
2754
+ if (name === 'lsp_diagnostics') {
2755
+ content = await lspManager.getDiagnostics(typeof args.path === 'string' ? args.path : undefined, typeof args.severity === 'number' ? args.severity : undefined);
2756
+ }
2757
+ else if (name === 'lsp_symbols') {
2758
+ content = await buildLspLensSymbolOutput(String(args.path ?? ''));
2759
+ }
2760
+ else if (name === 'lsp_hover') {
2761
+ content = await lspManager.getHover(String(args.path ?? ''), Number(args.line ?? 0), Number(args.character ?? 0));
2762
+ }
2763
+ else if (name === 'lsp_definition') {
2764
+ content = await lspManager.getDefinition(String(args.path ?? ''), Number(args.line ?? 0), Number(args.character ?? 0));
2765
+ }
2766
+ else if (name === 'lsp_references') {
2767
+ content = await lspManager.getReferences(String(args.path ?? ''), Number(args.line ?? 0), Number(args.character ?? 0), typeof args.max_results === 'number' ? args.max_results : 50);
2768
+ }
2478
2769
  }
2479
- const mcpReadOnly = isReadOnlyToolDynamic(name);
2480
- if (!cfg.step_mode && !ctx.noConfirm && !mcpReadOnly) {
2481
- const prompt = `Execute MCP tool '${name}'? [Y/n]`;
2482
- const ok = confirmBridge ? await confirmBridge(prompt, { tool: name, args }) : true;
2483
- if (!ok) {
2484
- return { id: callId, content: '[skipped by user: approval]' };
2770
+ else {
2771
+ if (mcpManager == null) {
2772
+ throw new Error(`unknown tool: ${name}`);
2773
+ }
2774
+ const mcpReadOnly = isReadOnlyToolDynamic(name);
2775
+ if (!cfg.step_mode && !ctx.noConfirm && !mcpReadOnly) {
2776
+ const prompt = `Execute MCP tool '${name}'? [Y/n]`;
2777
+ const ok = confirmBridge ? await confirmBridge(prompt, { tool: name, args }) : true;
2778
+ if (!ok) {
2779
+ return { id: callId, content: '[skipped by user: approval]' };
2780
+ }
2485
2781
  }
2782
+ const callArgs = args && typeof args === 'object' && !Array.isArray(args)
2783
+ ? args
2784
+ : {};
2785
+ content = await mcpManager.callTool(name, callArgs);
2486
2786
  }
2487
- const callArgs = args && typeof args === 'object' && !Array.isArray(args)
2488
- ? args
2489
- : {};
2490
- content = await mcpManager.callTool(name, callArgs);
2491
2787
  }
2492
2788
  // Hook: onToolResult (Phase 8.5 + Phase 7 rich display)
2493
- const summary = toolResultSummary(name, args, content, true);
2789
+ const summary = reusedCachedReadOnlyExec
2790
+ ? 'cached read-only exec observation (unchanged)'
2791
+ : toolResultSummary(name, args, content, true);
2494
2792
  const resultEvent = { id: callId, name, success: true, summary, result: content };
2495
2793
  // Phase 7: populate rich display fields
2496
2794
  if (name === 'exec') {
@@ -2647,6 +2945,18 @@ export async function createSession(opts) {
2647
2945
  for (const r of results) {
2648
2946
  messages.push({ role: 'tool', tool_call_id: r.id, content: r.content });
2649
2947
  }
2948
+ if (readOnlyExecTurnHints.length) {
2949
+ const previews = readOnlyExecTurnHints
2950
+ .slice(0, 2)
2951
+ .map((cmd) => cmd.length > 140 ? `${cmd.slice(0, 140)}…` : cmd)
2952
+ .join(' | ');
2953
+ messages.push({
2954
+ role: 'user',
2955
+ content: '[system] You repeated an identical read-only exec command with unchanged arguments. ' +
2956
+ `Idle Hands reused cached observation output instead of rerunning it (${previews}). ` +
2957
+ 'Do not call the same read-only command again unless files/history changed; proceed with analysis or final answer.',
2958
+ });
2959
+ }
2650
2960
  // If tests are green and we've already made edits, nudge for final summary
2651
2961
  // once to avoid extra non-essential demo/cleanup turns.
2652
2962
  if (!finalizeAfterTestsNudgeUsed && lastSuccessfulTestRun && mutationVersion > 0) {
@@ -2747,6 +3057,7 @@ export async function createSession(opts) {
2747
3057
  noToolTurns = 0;
2748
3058
  // final assistant message
2749
3059
  messages.push({ role: 'assistant', content: assistantText });
3060
+ await persistReviewArtifact(assistantText).catch(() => { });
2750
3061
  await hookObj.onTurnEnd?.({
2751
3062
  turn: turns,
2752
3063
  toolCalls,