@visorcraft/idlehands 1.1.5 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/agent.js +167 -71
  2. package/dist/agent.js.map +1 -1
  3. package/dist/bot/commands.js +2 -7
  4. package/dist/bot/commands.js.map +1 -1
  5. package/dist/bot/discord.js +12 -17
  6. package/dist/bot/discord.js.map +1 -1
  7. package/dist/bot/telegram.js +8 -11
  8. package/dist/bot/telegram.js.map +1 -1
  9. package/dist/cli/args.js +4 -1
  10. package/dist/cli/args.js.map +1 -1
  11. package/dist/cli/commands/session.js +110 -1
  12. package/dist/cli/commands/session.js.map +1 -1
  13. package/dist/cli/setup.js +22 -2
  14. package/dist/cli/setup.js.map +1 -1
  15. package/dist/client.js +51 -3
  16. package/dist/client.js.map +1 -1
  17. package/dist/config.js +79 -0
  18. package/dist/config.js.map +1 -1
  19. package/dist/hooks/index.js +5 -0
  20. package/dist/hooks/index.js.map +1 -0
  21. package/dist/hooks/loader.js +58 -0
  22. package/dist/hooks/loader.js.map +1 -0
  23. package/dist/hooks/manager.js +175 -0
  24. package/dist/hooks/manager.js.map +1 -0
  25. package/dist/hooks/plugins/example-console.js +24 -0
  26. package/dist/hooks/plugins/example-console.js.map +1 -0
  27. package/dist/hooks/scaffold.js +53 -0
  28. package/dist/hooks/scaffold.js.map +1 -0
  29. package/dist/hooks/types.js +8 -0
  30. package/dist/hooks/types.js.map +1 -0
  31. package/dist/index.js +3 -0
  32. package/dist/index.js.map +1 -1
  33. package/dist/model-customization.js +48 -0
  34. package/dist/model-customization.js.map +1 -0
  35. package/dist/tui/controller.js +336 -16
  36. package/dist/tui/controller.js.map +1 -1
  37. package/dist/tui/keymap.js +15 -0
  38. package/dist/tui/keymap.js.map +1 -1
  39. package/dist/tui/render.js +100 -1
  40. package/dist/tui/render.js.map +1 -1
  41. package/dist/tui/state.js +69 -1
  42. package/dist/tui/state.js.map +1 -1
  43. package/dist/watchdog.js +11 -0
  44. package/dist/watchdog.js.map +1 -1
  45. package/package.json +1 -1
package/dist/agent.js CHANGED
@@ -2,6 +2,8 @@ import { OpenAIClient } from './client.js';
2
2
  import { enforceContextBudget, stripThinking, estimateTokensFromMessages, estimateToolSchemaTokens } from './history.js';
3
3
  import * as tools from './tools.js';
4
4
  import { selectHarness } from './harnesses.js';
5
+ import { BASE_MAX_TOKENS, deriveContextWindow, deriveGenerationParams, supportsVisionModel } from './model-customization.js';
6
+ import { HookManager, loadHookPlugins } from './hooks/index.js';
5
7
  import { checkExecSafety, checkPathSafety } from './safety.js';
6
8
  import { loadProjectContext } from './context.js';
7
9
  import { loadGitContext, isGitDirty, stashWorkingTree } from './git.js';
@@ -176,6 +178,16 @@ function withCachedExecObservationHint(content) {
176
178
  return `${content}\n${CACHED_EXEC_OBSERVATION_HINT}`;
177
179
  }
178
180
  }
181
+ function readOnlyExecCacheable(content) {
182
+ try {
183
+ const parsed = JSON.parse(content);
184
+ const rc = Number(parsed?.rc ?? NaN);
185
+ return Number.isFinite(rc) && rc === 0;
186
+ }
187
+ catch {
188
+ return false;
189
+ }
190
+ }
179
191
  /** Errors that should break the outer agent loop, not be caught by per-tool handlers */
180
192
  class AgentLoopBreak extends Error {
181
193
  constructor(message) {
@@ -944,26 +956,6 @@ function reviewArtifactStaleReason(artifact, cwd) {
944
956
  }
945
957
  return '';
946
958
  }
947
- function supportsVisionModel(model, modelMeta, harness) {
948
- if (typeof harness.supportsVision === 'boolean')
949
- return harness.supportsVision;
950
- if (typeof modelMeta?.vision === 'boolean')
951
- return modelMeta.vision;
952
- const inputModalities = modelMeta?.input_modalities;
953
- if (Array.isArray(inputModalities) && inputModalities.some((m) => String(m).toLowerCase().includes('image'))) {
954
- return true;
955
- }
956
- const modalities = modelMeta?.modalities;
957
- if (Array.isArray(modalities) && modalities.some((m) => String(m).toLowerCase().includes('image'))) {
958
- return true;
959
- }
960
- const id = model.toLowerCase();
961
- if (/(vision|multimodal|\bvl\b|llava|qwen2\.5-vl|gpt-4o|gemini|claude-3)/i.test(id))
962
- return true;
963
- if (harness.id.includes('vision') || harness.id.includes('vl'))
964
- return true;
965
- return false;
966
- }
967
959
  function normalizeModelsResponse(raw) {
968
960
  if (Array.isArray(raw)) {
969
961
  return {
@@ -998,6 +990,15 @@ export async function createSession(opts) {
998
990
  if (typeof cfg.response_timeout === 'number' && cfg.response_timeout > 0) {
999
991
  client.setResponseTimeout(cfg.response_timeout);
1000
992
  }
993
+ if (typeof client.setConnectionTimeout === 'function' && typeof cfg.connection_timeout === 'number' && cfg.connection_timeout > 0) {
994
+ client.setConnectionTimeout(cfg.connection_timeout);
995
+ }
996
+ if (typeof client.setInitialConnectionCheck === 'function' && typeof cfg.initial_connection_check === 'boolean') {
997
+ client.setInitialConnectionCheck(cfg.initial_connection_check);
998
+ }
999
+ if (typeof client.setInitialConnectionProbeTimeout === 'function' && typeof cfg.initial_connection_timeout === 'number' && cfg.initial_connection_timeout > 0) {
1000
+ client.setInitialConnectionProbeTimeout(cfg.initial_connection_timeout);
1001
+ }
1001
1002
  // Health check + model list (cheap, avoids wasting GPU on chat warmups if unreachable)
1002
1003
  let modelsList = normalizeModelsResponse(await client.models().catch(() => null));
1003
1004
  let model = cfg.model && cfg.model.trim().length
@@ -1007,9 +1008,50 @@ export async function createSession(opts) {
1007
1008
  // Try to derive context window from /v1/models (if provided by server).
1008
1009
  const explicitContextWindow = cfg.context_window != null;
1009
1010
  const modelMeta = modelsList?.data?.find((m) => m.id === model);
1010
- const derivedCtx = (modelMeta?.context_window ?? modelMeta?.context_length ?? modelMeta?.max_context_length);
1011
- let contextWindow = cfg.context_window ?? derivedCtx ?? 131072;
1011
+ let contextWindow = deriveContextWindow({
1012
+ explicitContextWindow,
1013
+ configuredContextWindow: cfg.context_window,
1014
+ modelMeta,
1015
+ });
1012
1016
  let supportsVision = supportsVisionModel(model, modelMeta, harness);
1017
+ const sessionId = `session-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`;
1018
+ const hookCfg = cfg.hooks ?? {};
1019
+ const hookManager = opts.runtime?.hookManager ?? new HookManager({
1020
+ enabled: hookCfg.enabled !== false,
1021
+ strict: hookCfg.strict === true,
1022
+ warnMs: hookCfg.warn_ms,
1023
+ allowedCapabilities: Array.isArray(hookCfg.allow_capabilities) ? hookCfg.allow_capabilities : undefined,
1024
+ context: () => ({
1025
+ sessionId,
1026
+ cwd: cfg.dir ?? process.cwd(),
1027
+ model,
1028
+ harness: harness.id,
1029
+ endpoint: cfg.endpoint,
1030
+ }),
1031
+ });
1032
+ const emitDetached = (promise, eventName) => {
1033
+ void promise.catch((error) => {
1034
+ if (!process.env.IDLEHANDS_QUIET_WARNINGS) {
1035
+ console.warn(`[hooks] async ${eventName} dispatch failed: ${error?.message ?? String(error)}`);
1036
+ }
1037
+ });
1038
+ };
1039
+ if (!opts.runtime?.hookManager && hookManager.isEnabled()) {
1040
+ const loadedPlugins = await loadHookPlugins({
1041
+ pluginPaths: Array.isArray(hookCfg.plugin_paths) ? hookCfg.plugin_paths : [],
1042
+ cwd: cfg.dir ?? process.cwd(),
1043
+ strict: hookCfg.strict === true,
1044
+ });
1045
+ for (const loaded of loadedPlugins) {
1046
+ await hookManager.registerPlugin(loaded.plugin, loaded.path);
1047
+ }
1048
+ }
1049
+ await hookManager.emit('session_start', {
1050
+ model,
1051
+ harness: harness.id,
1052
+ endpoint: cfg.endpoint,
1053
+ cwd: cfg.dir ?? process.cwd(),
1054
+ });
1013
1055
  if (!cfg.i_know_what_im_doing && contextWindow > 131072) {
1014
1056
  console.warn('[warn] context_window is above 131072; this can increase memory usage and hurt throughput. Use --i-know-what-im-doing to proceed.');
1015
1057
  }
@@ -1018,13 +1060,13 @@ export async function createSession(opts) {
1018
1060
  // whether the harness wants a higher value — harness.defaults.max_tokens wins
1019
1061
  // when it's larger than the base default (16384), unless the user explicitly
1020
1062
  // configured a value in their config file or CLI.
1021
- const BASE_MAX_TOKENS = 16384;
1022
- let maxTokens = cfg.max_tokens ?? BASE_MAX_TOKENS;
1023
- if (maxTokens === BASE_MAX_TOKENS && harness.defaults?.max_tokens && harness.defaults.max_tokens > BASE_MAX_TOKENS) {
1024
- maxTokens = harness.defaults.max_tokens;
1025
- }
1026
- let temperature = cfg.temperature ?? harness.defaults?.temperature ?? 0.2;
1027
- let topP = cfg.top_p ?? harness.defaults?.top_p ?? 0.95;
1063
+ let { maxTokens, temperature, topP } = deriveGenerationParams({
1064
+ harness,
1065
+ configuredMaxTokens: cfg.max_tokens,
1066
+ configuredTemperature: cfg.temperature,
1067
+ configuredTopP: cfg.top_p,
1068
+ baseMaxTokens: BASE_MAX_TOKENS,
1069
+ });
1028
1070
  const harnessVaultMode = harness.defaults?.trifecta?.vaultMode || 'off';
1029
1071
  const vaultMode = (cfg.trifecta?.vault?.mode || harnessVaultMode);
1030
1072
  const vaultEnabled = cfg.trifecta?.enabled !== false && cfg.trifecta?.vault?.enabled !== false;
@@ -1282,6 +1324,7 @@ export async function createSession(opts) {
1282
1324
  ];
1283
1325
  sessionMetaPending = sessionMeta;
1284
1326
  lastEditedPath = undefined;
1327
+ initialConnectionProbeDone = false;
1285
1328
  mcpToolsLoaded = !mcpLazySchemaMode;
1286
1329
  };
1287
1330
  const restore = (next) => {
@@ -1304,6 +1347,7 @@ export async function createSession(opts) {
1304
1347
  };
1305
1348
  let reqCounter = 0;
1306
1349
  let inFlight = null;
1350
+ let initialConnectionProbeDone = false;
1307
1351
  let lastEditedPath;
1308
1352
  // Plan mode state (Phase 8)
1309
1353
  let planSteps = [];
@@ -1804,22 +1848,29 @@ export async function createSession(opts) {
1804
1848
  return fresh.data.map((m) => m.id).filter(Boolean);
1805
1849
  };
1806
1850
  const setModel = (name) => {
1851
+ const previousModel = model;
1807
1852
  model = name;
1808
1853
  harness = selectHarness(model, cfg.harness && cfg.harness.trim() ? cfg.harness.trim() : undefined);
1809
1854
  const nextMeta = modelsList?.data?.find((m) => m.id === model);
1810
1855
  supportsVision = supportsVisionModel(model, nextMeta, harness);
1811
- if (!explicitContextWindow) {
1812
- const derived = asNumber(nextMeta?.context_window, nextMeta?.context_length, nextMeta?.max_context_length);
1813
- if (derived && derived > 0) {
1814
- contextWindow = derived;
1815
- }
1816
- }
1817
- maxTokens = cfg.max_tokens ?? BASE_MAX_TOKENS;
1818
- if (maxTokens === BASE_MAX_TOKENS && harness.defaults?.max_tokens && harness.defaults.max_tokens > BASE_MAX_TOKENS) {
1819
- maxTokens = harness.defaults.max_tokens;
1820
- }
1821
- temperature = cfg.temperature ?? harness.defaults?.temperature ?? 0.2;
1822
- topP = cfg.top_p ?? harness.defaults?.top_p ?? 0.95;
1856
+ contextWindow = deriveContextWindow({
1857
+ explicitContextWindow,
1858
+ configuredContextWindow: cfg.context_window,
1859
+ previousContextWindow: contextWindow,
1860
+ modelMeta: nextMeta,
1861
+ });
1862
+ ({ maxTokens, temperature, topP } = deriveGenerationParams({
1863
+ harness,
1864
+ configuredMaxTokens: cfg.max_tokens,
1865
+ configuredTemperature: cfg.temperature,
1866
+ configuredTopP: cfg.top_p,
1867
+ baseMaxTokens: BASE_MAX_TOKENS,
1868
+ }));
1869
+ emitDetached(hookManager.emit('model_changed', {
1870
+ previousModel,
1871
+ nextModel: model,
1872
+ harness: harness.id,
1873
+ }), 'model_changed');
1823
1874
  };
1824
1875
  const setEndpoint = async (endpoint, modelName) => {
1825
1876
  const normalized = endpoint.replace(/\/+$/, '');
@@ -2007,11 +2058,35 @@ export async function createSession(opts) {
2007
2058
  const hookObj = typeof hooks === 'function' ? { onToken: hooks } : hooks ?? {};
2008
2059
  let turns = 0;
2009
2060
  let toolCalls = 0;
2061
+ const askId = `ask-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
2062
+ const emitToolCall = async (call) => {
2063
+ hookObj.onToolCall?.(call);
2064
+ await hookManager.emit('tool_call', { askId, turn: turns, call });
2065
+ };
2066
+ const emitToolResult = async (result) => {
2067
+ await hookObj.onToolResult?.(result);
2068
+ await hookManager.emit('tool_result', { askId, turn: turns, result });
2069
+ };
2070
+ const emitTurnEnd = async (stats) => {
2071
+ await hookObj.onTurnEnd?.(stats);
2072
+ await hookManager.emit('turn_end', { askId, stats });
2073
+ };
2074
+ const finalizeAsk = async (text) => {
2075
+ await hookManager.emit('ask_end', { askId, text, turns, toolCalls });
2076
+ return { text, turns, toolCalls };
2077
+ };
2010
2078
  const rawInstructionText = userContentToText(instruction).trim();
2079
+ await hookManager.emit('ask_start', { askId, instruction: rawInstructionText });
2011
2080
  const projectDir = cfg.dir ?? process.cwd();
2012
2081
  const reviewKeys = reviewArtifactKeys(projectDir);
2013
2082
  const retrievalRequested = looksLikeReviewRetrievalRequest(rawInstructionText);
2014
2083
  const shouldPersistReviewArtifact = looksLikeCodeReviewRequest(rawInstructionText) && !retrievalRequested;
2084
+ if (!retrievalRequested && cfg.initial_connection_check !== false && !initialConnectionProbeDone) {
2085
+ if (typeof client.probeConnection === 'function') {
2086
+ await client.probeConnection();
2087
+ initialConnectionProbeDone = true;
2088
+ }
2089
+ }
2015
2090
  if (retrievalRequested) {
2016
2091
  const latest = vault
2017
2092
  ? await vault.getLatestByKey(reviewKeys.latestKey, 'system').catch(() => null)
@@ -2028,37 +2103,37 @@ export async function createSession(opts) {
2028
2103
  'Reply with "print stale review anyway" to override, or request a fresh review.';
2029
2104
  messages.push({ role: 'assistant', content: blocked });
2030
2105
  hookObj.onToken?.(blocked);
2031
- await hookObj.onTurnEnd?.({
2106
+ await emitTurnEnd({
2032
2107
  turn: turns,
2033
2108
  toolCalls,
2034
2109
  promptTokens: cumulativeUsage.prompt,
2035
2110
  completionTokens: cumulativeUsage.completion,
2036
2111
  });
2037
- return { text: blocked, turns, toolCalls };
2112
+ return await finalizeAsk(blocked);
2038
2113
  }
2039
2114
  const text = stale
2040
2115
  ? `${artifact.content}\n\n[artifact note] ${stale}`
2041
2116
  : artifact.content;
2042
2117
  messages.push({ role: 'assistant', content: text });
2043
2118
  hookObj.onToken?.(text);
2044
- await hookObj.onTurnEnd?.({
2119
+ await emitTurnEnd({
2045
2120
  turn: turns,
2046
2121
  toolCalls,
2047
2122
  promptTokens: cumulativeUsage.prompt,
2048
2123
  completionTokens: cumulativeUsage.completion,
2049
2124
  });
2050
- return { text, turns, toolCalls };
2125
+ return await finalizeAsk(text);
2051
2126
  }
2052
2127
  const miss = 'No stored full code review found yet. Ask me to run a code review first, then I can replay it verbatim.';
2053
2128
  messages.push({ role: 'assistant', content: miss });
2054
2129
  hookObj.onToken?.(miss);
2055
- await hookObj.onTurnEnd?.({
2130
+ await emitTurnEnd({
2056
2131
  turn: turns,
2057
2132
  toolCalls,
2058
2133
  promptTokens: cumulativeUsage.prompt,
2059
2134
  completionTokens: cumulativeUsage.completion,
2060
2135
  });
2061
- return { text: miss, turns, toolCalls };
2136
+ return await finalizeAsk(miss);
2062
2137
  }
2063
2138
  const persistReviewArtifact = async (finalText) => {
2064
2139
  if (!vault || !shouldPersistReviewArtifact)
@@ -2198,6 +2273,7 @@ export async function createSession(opts) {
2198
2273
  if (inFlight?.signal?.aborted)
2199
2274
  break;
2200
2275
  turns++;
2276
+ await hookManager.emit('turn_start', { askId, turn: turns });
2201
2277
  const wallElapsed = (Date.now() - wallStart) / 1000;
2202
2278
  if (wallElapsed > cfg.timeout) {
2203
2279
  throw new Error(`session timeout exceeded (${cfg.timeout}s) after ${wallElapsed.toFixed(1)}s`);
@@ -2238,9 +2314,9 @@ export async function createSession(opts) {
2238
2314
  const callerSignal = hookObj.signal;
2239
2315
  const onCallerAbort = () => ac.abort();
2240
2316
  callerSignal?.addEventListener('abort', onCallerAbort, { once: true });
2241
- // Per-request timeout: the lesser of response_timeout (default 300s) or the remaining session wall time.
2317
+ // Per-request timeout: the lesser of response_timeout (default 600s) or the remaining session wall time.
2242
2318
  // This prevents a single slow request from consuming the entire session budget.
2243
- const perReqCap = cfg.response_timeout && cfg.response_timeout > 0 ? cfg.response_timeout : 300;
2319
+ const perReqCap = cfg.response_timeout && cfg.response_timeout > 0 ? cfg.response_timeout : 600;
2244
2320
  const wallRemaining = Math.max(0, cfg.timeout - (Date.now() - wallStart) / 1000);
2245
2321
  const reqTimeout = Math.min(perReqCap, Math.max(10, wallRemaining));
2246
2322
  const timer = setTimeout(() => ac.abort(), reqTimeout * 1000);
@@ -2394,7 +2470,7 @@ export async function createSession(opts) {
2394
2470
  role: 'user',
2395
2471
  content: '[system] Your previous response was empty (no text, no tool calls). Continue by either calling a tool with valid JSON arguments or giving a final answer.',
2396
2472
  });
2397
- await hookObj.onTurnEnd?.({
2473
+ await emitTurnEnd({
2398
2474
  turn: turns,
2399
2475
  toolCalls,
2400
2476
  promptTokens: cumulativeUsage.prompt,
@@ -2643,8 +2719,8 @@ export async function createSession(opts) {
2643
2719
  const searchTerm = typeof args.search === 'string' ? args.search : '';
2644
2720
  // Fix 1: Hard cumulative budget — refuse reads past hard cap
2645
2721
  if (cumulativeReadOnlyCalls > READ_BUDGET_HARD) {
2646
- hookObj.onToolCall?.({ id: callId, name, args });
2647
- hookObj.onToolResult?.({ id: callId, name, success: false, summary: 'read budget exhausted', result: '' });
2722
+ await emitToolCall({ id: callId, name, args });
2723
+ await emitToolResult({ id: callId, name, success: false, summary: 'read budget exhausted', result: '' });
2648
2724
  return { id: callId, content: `STOP: Read budget exhausted (${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD} calls). Do NOT read more files. Use search_files or exec: grep -rn "pattern" path/ to find what you need.` };
2649
2725
  }
2650
2726
  // Fix 2: Directory scan detection — counts unique files per dir (re-reads are OK)
@@ -2659,8 +2735,8 @@ export async function createSession(opts) {
2659
2735
  blockedDirs.add(parentDir);
2660
2736
  }
2661
2737
  if (blockedDirs.has(parentDir) && uniqueCount > 8) {
2662
- hookObj.onToolCall?.({ id: callId, name, args });
2663
- hookObj.onToolResult?.({ id: callId, name, success: false, summary: 'dir scan blocked', result: '' });
2738
+ await emitToolCall({ id: callId, name, args });
2739
+ await emitToolResult({ id: callId, name, success: false, summary: 'dir scan blocked', result: '' });
2664
2740
  return { id: callId, content: `STOP: Directory scan detected — you've read ${uniqueCount} unique files from ${parentDir}/. Use search_files(pattern, '${parentDir}') or exec: grep -rn "pattern" ${parentDir}/ instead of reading files individually.` };
2665
2741
  }
2666
2742
  }
@@ -2671,8 +2747,8 @@ export async function createSession(opts) {
2671
2747
  searchTermFiles.set(key, new Set());
2672
2748
  searchTermFiles.get(key).add(filePath);
2673
2749
  if (searchTermFiles.get(key).size >= 3) {
2674
- hookObj.onToolCall?.({ id: callId, name, args });
2675
- hookObj.onToolResult?.({ id: callId, name, success: false, summary: 'use search_files', result: '' });
2750
+ await emitToolCall({ id: callId, name, args });
2751
+ await emitToolResult({ id: callId, name, success: false, summary: 'use search_files', result: '' });
2676
2752
  return { id: callId, content: `STOP: You've searched ${searchTermFiles.get(key).size} files for "${searchTerm}" one at a time. This is what search_files does in one call. Use: search_files(pattern="${searchTerm}", path=".") or exec: grep -rn "${searchTerm}" .` };
2677
2753
  }
2678
2754
  }
@@ -2694,12 +2770,12 @@ export async function createSession(opts) {
2694
2770
  // Notify via confirmProvider.showBlocked if available
2695
2771
  opts.confirmProvider?.showBlocked?.({ tool: name, args, reason: `plan mode: ${summary}` });
2696
2772
  // Hook: onToolCall + onToolResult for plan-blocked actions
2697
- hookObj.onToolCall?.({ id: callId, name, args });
2698
- hookObj.onToolResult?.({ id: callId, name, success: true, summary: `⏸ ${summary} (blocked)`, result: blockedMsg });
2773
+ await emitToolCall({ id: callId, name, args });
2774
+ await emitToolResult({ id: callId, name, success: true, summary: `⏸ ${summary} (blocked)`, result: blockedMsg });
2699
2775
  return { id: callId, content: blockedMsg };
2700
2776
  }
2701
2777
  // Hook: onToolCall (Phase 8.5)
2702
- hookObj.onToolCall?.({ id: callId, name, args });
2778
+ await emitToolCall({ id: callId, name, args });
2703
2779
  if (cfg.step_mode) {
2704
2780
  const stepPrompt = `Step mode: execute ${name}(${JSON.stringify(args).slice(0, 200)}) ? [Y/n]`;
2705
2781
  const ok = confirmBridge ? await confirmBridge(stepPrompt, { tool: name, args }) : true;
@@ -2728,7 +2804,7 @@ export async function createSession(opts) {
2728
2804
  // Successful exec clears blocked-loop counters.
2729
2805
  blockedExecAttemptsBySig.clear();
2730
2806
  const cmd = String(args?.command ?? '');
2731
- if (looksLikeReadOnlyExecCommand(cmd)) {
2807
+ if (looksLikeReadOnlyExecCommand(cmd) && readOnlyExecCacheable(content)) {
2732
2808
  execObservationCacheBySig.set(sig, content);
2733
2809
  }
2734
2810
  // Capture successful test runs for better partial-failure diagnostics.
@@ -2820,7 +2896,7 @@ export async function createSession(opts) {
2820
2896
  }
2821
2897
  catch { }
2822
2898
  }
2823
- hookObj.onToolResult?.(resultEvent);
2899
+ await emitToolResult(resultEvent);
2824
2900
  // Proactive LSP diagnostics after file mutations
2825
2901
  if (lspManager?.hasServers() && lspCfg?.proactive_diagnostics !== false) {
2826
2902
  if (FILE_MUTATION_TOOL_SET.has(name)) {
@@ -2848,7 +2924,7 @@ export async function createSession(opts) {
2848
2924
  };
2849
2925
  const results = [];
2850
2926
  // Helper: catch tool errors but re-throw AgentLoopBreak (those must break the outer loop)
2851
- const catchToolError = (e, tc) => {
2927
+ const catchToolError = async (e, tc) => {
2852
2928
  if (e instanceof AgentLoopBreak)
2853
2929
  throw e;
2854
2930
  const msg = e?.message ?? String(e);
@@ -2882,7 +2958,7 @@ export async function createSession(opts) {
2882
2958
  }
2883
2959
  // Hook: onToolResult for errors (Phase 8.5)
2884
2960
  const callId = resolveCallId(tc);
2885
- hookObj.onToolResult?.({ id: callId, name: tc.function.name, success: false, summary: msg || 'unknown error', result: `ERROR: ${msg || 'unknown error'}` });
2961
+ await emitToolResult({ id: callId, name: tc.function.name, success: false, summary: msg || 'unknown error', result: `ERROR: ${msg || 'unknown error'}` });
2886
2962
  // Never return undefined error text; it makes bench failures impossible to debug.
2887
2963
  return { id: callId, content: `ERROR: ${msg || 'unknown tool error'}` };
2888
2964
  };
@@ -2921,7 +2997,7 @@ export async function createSession(opts) {
2921
2997
  results.push(await runOne(tc));
2922
2998
  }
2923
2999
  catch (e) {
2924
- results.push(catchToolError(e, tc));
3000
+ results.push(await catchToolError(e, tc));
2925
3001
  }
2926
3002
  }
2927
3003
  }
@@ -2935,7 +3011,7 @@ export async function createSession(opts) {
2935
3011
  results.push(await runOne(tc));
2936
3012
  }
2937
3013
  catch (e) {
2938
- results.push(catchToolError(e, tc));
3014
+ results.push(await catchToolError(e, tc));
2939
3015
  }
2940
3016
  }
2941
3017
  }
@@ -2977,7 +3053,7 @@ export async function createSession(opts) {
2977
3053
  });
2978
3054
  }
2979
3055
  // Hook: onTurnEnd (Phase 8.5)
2980
- await hookObj.onTurnEnd?.({
3056
+ await emitTurnEnd({
2981
3057
  turn: turns,
2982
3058
  toolCalls,
2983
3059
  promptTokens: cumulativeUsage.prompt,
@@ -3020,7 +3096,7 @@ export async function createSession(opts) {
3020
3096
  `Original task:\n${clippedReminder}\n\n` +
3021
3097
  `Call the needed tools directly. If everything is truly complete, provide the final answer.`
3022
3098
  });
3023
- await hookObj.onTurnEnd?.({
3099
+ await emitTurnEnd({
3024
3100
  turn: turns,
3025
3101
  toolCalls,
3026
3102
  promptTokens: cumulativeUsage.prompt,
@@ -3040,7 +3116,7 @@ export async function createSession(opts) {
3040
3116
  role: 'user',
3041
3117
  content: '[system] Continue executing the task. Use tools now (do not just narrate plans). If complete, give the final answer.'
3042
3118
  });
3043
- await hookObj.onTurnEnd?.({
3119
+ await emitTurnEnd({
3044
3120
  turn: turns,
3045
3121
  toolCalls,
3046
3122
  promptTokens: cumulativeUsage.prompt,
@@ -3058,7 +3134,7 @@ export async function createSession(opts) {
3058
3134
  // final assistant message
3059
3135
  messages.push({ role: 'assistant', content: assistantText });
3060
3136
  await persistReviewArtifact(assistantText).catch(() => { });
3061
- await hookObj.onTurnEnd?.({
3137
+ await emitTurnEnd({
3062
3138
  turn: turns,
3063
3139
  toolCalls,
3064
3140
  promptTokens: cumulativeUsage.prompt,
@@ -3070,7 +3146,7 @@ export async function createSession(opts) {
3070
3146
  ppTps,
3071
3147
  tgTps,
3072
3148
  });
3073
- return { text: assistantText, turns, toolCalls };
3149
+ return await finalizeAsk(assistantText);
3074
3150
  }
3075
3151
  const reason = `max iterations exceeded (${maxIters})`;
3076
3152
  const diag = lastSuccessfulTestRun
@@ -3096,6 +3172,12 @@ export async function createSession(opts) {
3096
3172
  })();
3097
3173
  const err = new Error(`BUG: threw undefined in agent.ask() (turn=${turns}). lastMsg=${lastMsg?.role ?? 'unknown'}:${lastMsgPreview}`);
3098
3174
  await persistFailure(err, `ask turn ${turns}`);
3175
+ await hookManager.emit('ask_error', {
3176
+ askId,
3177
+ error: err.message,
3178
+ turns,
3179
+ toolCalls,
3180
+ });
3099
3181
  throw err;
3100
3182
  }
3101
3183
  await persistFailure(e, `ask turn ${turns}`);
@@ -3105,8 +3187,21 @@ export async function createSession(opts) {
3105
3187
  }
3106
3188
  // Never rethrow undefined; normalize to Error for debuggability.
3107
3189
  if (e === undefined) {
3108
- throw new Error('BUG: threw undefined (normalized at ask() boundary)');
3190
+ const normalized = new Error('BUG: threw undefined (normalized at ask() boundary)');
3191
+ await hookManager.emit('ask_error', {
3192
+ askId,
3193
+ error: normalized.message,
3194
+ turns,
3195
+ toolCalls,
3196
+ });
3197
+ throw normalized;
3109
3198
  }
3199
+ await hookManager.emit('ask_error', {
3200
+ askId,
3201
+ error: e instanceof Error ? e.message : String(e),
3202
+ turns,
3203
+ toolCalls,
3204
+ });
3110
3205
  throw e;
3111
3206
  }
3112
3207
  };
@@ -3153,6 +3248,7 @@ export async function createSession(opts) {
3153
3248
  replay,
3154
3249
  vault,
3155
3250
  lens,
3251
+ hookManager,
3156
3252
  get lastEditedPath() {
3157
3253
  return lastEditedPath;
3158
3254
  },