@visorcraft/idlehands 2.2.4 → 2.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/agent/capture.js +98 -0
  2. package/dist/agent/capture.js.map +1 -0
  3. package/dist/agent/client-pool.js +115 -0
  4. package/dist/agent/client-pool.js.map +1 -0
  5. package/dist/agent/conversation-branch.js +50 -0
  6. package/dist/agent/conversation-branch.js.map +1 -0
  7. package/dist/agent/tools-schema.js +16 -3
  8. package/dist/agent/tools-schema.js.map +1 -1
  9. package/dist/agent.js +465 -56
  10. package/dist/agent.js.map +1 -1
  11. package/dist/anton/verifier-utils.js +75 -3
  12. package/dist/anton/verifier-utils.js.map +1 -1
  13. package/dist/anton/verifier.js +2 -1
  14. package/dist/anton/verifier.js.map +1 -1
  15. package/dist/bot/anton-run.js +1 -0
  16. package/dist/bot/anton-run.js.map +1 -1
  17. package/dist/bot/basic-commands.js +10 -1
  18. package/dist/bot/basic-commands.js.map +1 -1
  19. package/dist/bot/budget-command.js +74 -0
  20. package/dist/bot/budget-command.js.map +1 -0
  21. package/dist/bot/capture-commands.js +82 -0
  22. package/dist/bot/capture-commands.js.map +1 -0
  23. package/dist/bot/command-logic.js +6 -1
  24. package/dist/bot/command-logic.js.map +1 -1
  25. package/dist/bot/commands.js +157 -1
  26. package/dist/bot/commands.js.map +1 -1
  27. package/dist/bot/cost-command.js +80 -0
  28. package/dist/bot/cost-command.js.map +1 -0
  29. package/dist/bot/diff-command.js +48 -0
  30. package/dist/bot/diff-command.js.map +1 -0
  31. package/dist/bot/discord-commands.js +152 -18
  32. package/dist/bot/discord-commands.js.map +1 -1
  33. package/dist/bot/discord.js +86 -6
  34. package/dist/bot/discord.js.map +1 -1
  35. package/dist/bot/rollback-command.js +33 -0
  36. package/dist/bot/rollback-command.js.map +1 -0
  37. package/dist/bot/runtime-model-picker.js +77 -0
  38. package/dist/bot/runtime-model-picker.js.map +1 -0
  39. package/dist/bot/session-settings.js +28 -30
  40. package/dist/bot/session-settings.js.map +1 -1
  41. package/dist/bot/telegram-commands.js +161 -36
  42. package/dist/bot/telegram-commands.js.map +1 -1
  43. package/dist/bot/telegram.js +13 -1
  44. package/dist/bot/telegram.js.map +1 -1
  45. package/dist/bot/ux/events.js.map +1 -1
  46. package/dist/bot/ux/progress-to-events.js +11 -0
  47. package/dist/bot/ux/progress-to-events.js.map +1 -1
  48. package/dist/cli/commands/anton.js +3 -0
  49. package/dist/cli/commands/anton.js.map +1 -1
  50. package/dist/cli/commands/editing.js +37 -2
  51. package/dist/cli/commands/editing.js.map +1 -1
  52. package/dist/cli/commands/session.js +1 -1
  53. package/dist/cli/commands/session.js.map +1 -1
  54. package/dist/config.js +161 -0
  55. package/dist/config.js.map +1 -1
  56. package/dist/progress/turn-progress.js +203 -129
  57. package/dist/progress/turn-progress.js.map +1 -1
  58. package/dist/routing/hysteresis.js +69 -0
  59. package/dist/routing/hysteresis.js.map +1 -0
  60. package/dist/routing/mode.js +32 -0
  61. package/dist/routing/mode.js.map +1 -0
  62. package/dist/routing/turn-router.js +128 -0
  63. package/dist/routing/turn-router.js.map +1 -0
  64. package/package.json +1 -1
package/dist/agent.js CHANGED
@@ -14,7 +14,11 @@ import { buildDefaultSystemPrompt } from './agent/prompt-builder.js';
14
14
  import { LeakDetector } from './security/leak-detector.js';
15
15
  import { PromptGuard } from './security/prompt-guard.js';
16
16
  import { ResponseCache } from './agent/response-cache.js';
17
+ import { resilientCall } from './agent/resilient-provider.js';
17
18
  import { ToolLoopGuard } from './agent/tool-loop-guard.js';
19
+ import { CaptureManager } from './agent/capture.js';
20
+ import { ClientPool } from './agent/client-pool.js';
21
+ import { ConversationBranch } from './agent/conversation-branch.js';
18
22
  import { isLspTool, isMutationTool, isReadOnlyTool, planModeSummary } from './agent/tool-policy.js';
19
23
  import { buildToolsSchema } from './agent/tools-schema.js';
20
24
  import { OpenAIClient } from './client.js';
@@ -31,6 +35,8 @@ import { MCPManager } from './mcp.js';
31
35
  import { BASE_MAX_TOKENS, deriveContextWindow, deriveGenerationParams, supportsVisionModel, } from './model-customization.js';
32
36
  import { ReplayStore } from './replay.js';
33
37
  import { checkExecSafety, checkPathSafety } from './safety.js';
38
+ import { decideTurnRoute } from './routing/turn-router.js';
39
+ import { RouteHysteresis } from './routing/hysteresis.js';
34
40
  import { normalizeApprovalMode } from './shared/config-utils.js';
35
41
  import { collectSnapshot } from './sys/context.js';
36
42
  import { ToolError, ValidationError } from './tools/tool-error.js';
@@ -217,13 +223,19 @@ export async function createSession(opts) {
217
223
  const mcpHasEnabledTools = (mcpManager?.listTools().length ?? 0) > 0;
218
224
  const mcpLazySchemaMode = Boolean(mcpManager && mcpHasEnabledTools);
219
225
  let mcpToolsLoaded = !mcpLazySchemaMode;
220
- const getToolsSchema = () => buildToolsSchema({
226
+ const routeHysteresis = new RouteHysteresis({
227
+ minDwell: cfg.routing?.hysteresisMinDwell ?? 2,
228
+ enabled: cfg.routing?.hysteresis !== false,
229
+ });
230
+ const conversationBranch = new ConversationBranch();
231
+ const getToolsSchema = (slimFast) => buildToolsSchema({
221
232
  activeVaultTools,
222
233
  passiveVault: !activeVaultTools && vaultEnabled && vaultMode === 'passive',
223
234
  sysMode: cfg.mode === 'sys',
224
235
  lspTools: lspManager?.hasServers() === true,
225
236
  mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
226
237
  allowSpawnTask: spawnTaskEnabled,
238
+ slimFast,
227
239
  });
228
240
  const vault = vaultEnabled
229
241
  ? (opts.runtime?.vault ??
@@ -392,6 +404,27 @@ export async function createSession(opts) {
392
404
  console.warn(`[warn] sys-eager snapshot failed: ${e?.message ?? e}`);
393
405
  }
394
406
  }
407
+ const buildCompactSessionMeta = () => {
408
+ const caps = [];
409
+ if (vaultEnabled)
410
+ caps.push('vault');
411
+ if (lspManager?.hasServers())
412
+ caps.push('lsp');
413
+ if (mcpManager)
414
+ caps.push('mcp');
415
+ if (spawnTaskEnabled)
416
+ caps.push('subagents');
417
+ const lines = [
418
+ `[cwd: ${cfg.dir}]`,
419
+ `[harness: ${harness.id}]`,
420
+ '[fast-lane prelude: concise response by default; ask for details if needed.]',
421
+ caps.length ? `[optional capabilities: ${caps.join(', ')}]` : '',
422
+ ].filter(Boolean);
423
+ const maxChars = cfg.routing?.fastCompactPreludeMaxChars ?? 320;
424
+ const joined = lines.join('\n');
425
+ return joined.length > maxChars ? `${joined.slice(0, maxChars - 1)}…` : joined;
426
+ };
427
+ const compactSessionMeta = buildCompactSessionMeta();
395
428
  const defaultSystemPromptBase = SYSTEM_PROMPT;
396
429
  let activeSystemPromptBase = (cfg.system_prompt_override ?? '').trim() || defaultSystemPromptBase;
397
430
  let systemPromptOverridden = (cfg.system_prompt_override ?? '').trim().length > 0;
@@ -429,6 +462,8 @@ export async function createSession(opts) {
429
462
  lastEditedPath = undefined;
430
463
  initialConnectionProbeDone = false;
431
464
  mcpToolsLoaded = !mcpLazySchemaMode;
465
+ routeHysteresis.reset();
466
+ conversationBranch.reset();
432
467
  };
433
468
  const restore = (next) => {
434
469
  if (!Array.isArray(next) || next.length < 2) {
@@ -1056,6 +1091,7 @@ export async function createSession(opts) {
1056
1091
  const ppSamples = [];
1057
1092
  const tgSamples = [];
1058
1093
  let lastTurnMetrics;
1094
+ let lastTurnDebug;
1059
1095
  let lastServerHealth;
1060
1096
  let lastToolLoopStats = {
1061
1097
  totalHistory: 0,
@@ -1074,29 +1110,114 @@ export async function createSession(opts) {
1074
1110
  },
1075
1111
  };
1076
1112
  let lastModelsProbeMs = 0;
1077
- const capturesDir = path.join(stateDir(), 'captures');
1078
- let captureEnabled = false;
1079
- let capturePath;
1080
- let lastCaptureRecord = null;
1081
- const defaultCapturePath = () => {
1082
- const stamp = new Date().toISOString().replace(/[:.]/g, '-');
1083
- return path.join(capturesDir, `${stamp}.jsonl`);
1113
+ const capture = new CaptureManager(stateDir());
1114
+ const normalizeEndpoint = (endpoint) => endpoint.trim().replace(/\/+$/, '');
1115
+ const clientPool = new ClientPool({
1116
+ primary: client,
1117
+ primaryEndpoint: cfg.endpoint,
1118
+ apiKey: opts.apiKey,
1119
+ cfg,
1120
+ capture,
1121
+ ClientCtor: OpenAIClient,
1122
+ });
1123
+ // Thin wrapper used by setEndpoint when primary client is replaced.
1124
+ const applyClientRuntimeOptions = (target) => {
1125
+ if (typeof target.setVerbose === 'function')
1126
+ target.setVerbose(cfg.verbose);
1127
+ if (typeof cfg.response_timeout === 'number' && cfg.response_timeout > 0)
1128
+ target.setResponseTimeout(cfg.response_timeout);
1129
+ if (typeof target.setConnectionTimeout === 'function' && typeof cfg.connection_timeout === 'number' && cfg.connection_timeout > 0)
1130
+ target.setConnectionTimeout(cfg.connection_timeout);
1131
+ if (typeof target.setInitialConnectionCheck === 'function' && typeof cfg.initial_connection_check === 'boolean')
1132
+ target.setInitialConnectionCheck(cfg.initial_connection_check);
1133
+ if (typeof target.setInitialConnectionProbeTimeout === 'function' && typeof cfg.initial_connection_timeout === 'number' && cfg.initial_connection_timeout > 0)
1134
+ target.setInitialConnectionProbeTimeout(cfg.initial_connection_timeout);
1084
1135
  };
1085
- const appendCaptureRecord = async (record, outPath) => {
1086
- await fs.mkdir(path.dirname(outPath), { recursive: true });
1087
- await fs.appendFile(outPath, JSON.stringify(record) + '\n', 'utf8');
1136
+ const attachCaptureHook = (target) => {
1137
+ if (typeof target.setExchangeHook !== 'function')
1138
+ return;
1139
+ target.setExchangeHook(capture.createExchangeHook());
1088
1140
  };
1089
- const wireCaptureHook = () => {
1090
- if (typeof client.setExchangeHook !== 'function')
1141
+ const getClientForEndpoint = (endpoint) => clientPool.getForEndpoint(endpoint);
1142
+ let runtimeRoutingModules = null;
1143
+ let runtimeRoutingUnavailable = false;
1144
+ let runtimeModelIdsCache = null;
1145
+ const loadRuntimeRoutingModules = async () => {
1146
+ if (runtimeRoutingUnavailable)
1147
+ return null;
1148
+ if (runtimeRoutingModules)
1149
+ return runtimeRoutingModules;
1150
+ try {
1151
+ const [planner, executor, store] = await Promise.all([
1152
+ import('./runtime/planner.js'),
1153
+ import('./runtime/executor.js'),
1154
+ import('./runtime/store.js'),
1155
+ ]);
1156
+ runtimeRoutingModules = { planner, executor, store };
1157
+ return runtimeRoutingModules;
1158
+ }
1159
+ catch {
1160
+ runtimeRoutingUnavailable = true;
1161
+ return null;
1162
+ }
1163
+ };
1164
+ const loadRuntimeModelIds = async () => {
1165
+ if (runtimeModelIdsCache)
1166
+ return runtimeModelIdsCache;
1167
+ const mods = await loadRuntimeRoutingModules();
1168
+ if (!mods) {
1169
+ runtimeModelIdsCache = new Set();
1170
+ return runtimeModelIdsCache;
1171
+ }
1172
+ try {
1173
+ const runtimes = await mods.store.loadRuntimes();
1174
+ runtimeModelIdsCache = new Set(runtimes.models.filter((m) => m.enabled !== false).map((m) => m.id));
1175
+ return runtimeModelIdsCache;
1176
+ }
1177
+ catch {
1178
+ runtimeModelIdsCache = new Set();
1179
+ return runtimeModelIdsCache;
1180
+ }
1181
+ };
1182
+ const ensureRuntimeModelActive = async (runtimeModelId) => {
1183
+ const mods = await loadRuntimeRoutingModules();
1184
+ if (!mods)
1185
+ throw new Error('Runtime routing is unavailable in this build/environment');
1186
+ const runtimes = await mods.store.loadRuntimes();
1187
+ runtimeModelIdsCache = new Set(runtimes.models.filter((m) => m.enabled !== false).map((m) => m.id));
1188
+ const modelExists = runtimes.models.some((m) => m.enabled !== false && m.id === runtimeModelId);
1189
+ if (!modelExists) {
1190
+ throw new Error(`Runtime model not found or disabled: ${runtimeModelId}`);
1191
+ }
1192
+ let active = await mods.executor.loadActiveRuntime();
1193
+ if (active?.healthy && active.modelId === runtimeModelId && active.endpoint) {
1194
+ if (normalizeEndpoint(active.endpoint) !== normalizeEndpoint(cfg.endpoint)) {
1195
+ await setEndpoint(active.endpoint);
1196
+ }
1091
1197
  return;
1092
- client.setExchangeHook(async (record) => {
1093
- lastCaptureRecord = record;
1094
- if (!captureEnabled)
1095
- return;
1096
- const target = capturePath || defaultCapturePath();
1097
- capturePath = target;
1098
- await appendCaptureRecord(record, target);
1099
- });
1198
+ }
1199
+ const planResult = mods.planner.plan({ modelId: runtimeModelId, mode: 'live' }, runtimes, active);
1200
+ if (!planResult.ok) {
1201
+ throw new Error(`Runtime switch plan failed for ${runtimeModelId}: ${planResult.reason}`);
1202
+ }
1203
+ if (!planResult.reuse) {
1204
+ const execResult = await mods.executor.execute(planResult, {
1205
+ confirm: async () => false,
1206
+ });
1207
+ if (!execResult.ok) {
1208
+ throw new Error(`Runtime switch failed for ${runtimeModelId}: ${execResult.error ?? 'unknown error'}`);
1209
+ }
1210
+ }
1211
+ active = await mods.executor.loadActiveRuntime();
1212
+ if (!active?.endpoint || active.healthy !== true) {
1213
+ throw new Error(`Runtime did not become healthy for ${runtimeModelId}`);
1214
+ }
1215
+ if (normalizeEndpoint(active.endpoint) !== normalizeEndpoint(cfg.endpoint)) {
1216
+ await setEndpoint(active.endpoint);
1217
+ }
1218
+ };
1219
+ const wireCaptureHook = () => {
1220
+ attachCaptureHook(client);
1100
1221
  };
1101
1222
  wireCaptureHook();
1102
1223
  const replayEnabled = cfg.trifecta?.enabled !== false && cfg.trifecta?.replay?.enabled !== false;
@@ -1228,9 +1349,9 @@ export async function createSession(opts) {
1228
1349
  else {
1229
1350
  client = new OpenAIClient(normalized, opts.apiKey, cfg.verbose);
1230
1351
  }
1231
- if (typeof client.setVerbose === 'function') {
1232
- client.setVerbose(cfg.verbose);
1233
- }
1352
+ applyClientRuntimeOptions(client);
1353
+ clientPool.setPrimary(client);
1354
+ clientPool.reset();
1234
1355
  wireCaptureHook();
1235
1356
  modelsList = normalizeModelsResponse(await client.models());
1236
1357
  const chosen = modelName?.trim()
@@ -1239,25 +1360,12 @@ export async function createSession(opts) {
1239
1360
  (await autoPickModel(client, modelsList)));
1240
1361
  setModel(chosen);
1241
1362
  };
1242
- const captureOn = async (filePath) => {
1243
- const target = filePath?.trim() ? path.resolve(filePath) : defaultCapturePath();
1244
- await fs.mkdir(path.dirname(target), { recursive: true });
1245
- await fs.appendFile(target, '', 'utf8');
1246
- captureEnabled = true;
1247
- capturePath = target;
1248
- return target;
1249
- };
1250
- const captureOff = () => {
1251
- captureEnabled = false;
1252
- };
1253
- const captureLast = async (filePath) => {
1254
- if (!lastCaptureRecord) {
1255
- throw new Error('No captured request/response pair is available yet.');
1256
- }
1257
- const target = filePath?.trim() ? path.resolve(filePath) : capturePath || defaultCapturePath();
1258
- await appendCaptureRecord(lastCaptureRecord, target);
1259
- return target;
1260
- };
1363
+ const captureOn = (filePath) => capture.on(filePath);
1364
+ const captureOff = () => capture.off();
1365
+ const captureSetRedact = (enabled) => capture.setRedact(enabled);
1366
+ const captureGetRedact = () => capture.getRedact();
1367
+ const captureOpen = () => capture.open();
1368
+ const captureLast = (filePath) => capture.last(filePath);
1261
1369
  const listMcpServers = () => {
1262
1370
  return mcpManager?.listServers() ?? [];
1263
1371
  };
@@ -1288,6 +1396,7 @@ export async function createSession(opts) {
1288
1396
  const close = async () => {
1289
1397
  await mcpManager?.close().catch(() => { });
1290
1398
  await lspManager?.close().catch(() => { });
1399
+ await clientPool.closeAll();
1291
1400
  vault?.close();
1292
1401
  lens?.close();
1293
1402
  };
@@ -1388,16 +1497,38 @@ export async function createSession(opts) {
1388
1497
  : cfg.max_iterations;
1389
1498
  const wallStart = Date.now();
1390
1499
  const delegationForbiddenByUser = userDisallowsDelegation(instruction);
1500
+ const rawInstructionText = userContentToText(instruction).trim();
1501
+ // Route early so first-turn prelude/tool choices can adapt.
1502
+ const turnRoute = decideTurnRoute(cfg, rawInstructionText, model);
1503
+ // Apply hysteresis to suppress rapid lane thrashing in auto mode.
1504
+ const hysteresisResult = routeHysteresis.apply(turnRoute.selectedMode, turnRoute.selectedModeSource);
1505
+ if (hysteresisResult.suppressed) {
1506
+ // Override the selected mode with the hysteresis-stabilized lane.
1507
+ turnRoute.selectedMode = hysteresisResult.lane;
1508
+ turnRoute.selectedModeSource = 'hysteresis';
1509
+ }
1510
+ const routeFastByAuto = turnRoute.requestedMode === 'auto' &&
1511
+ turnRoute.selectedMode === 'fast' &&
1512
+ turnRoute.selectedModeSource !== 'override';
1513
+ const compactPreludeEnabled = cfg.routing?.fastCompactPrelude !== false;
1514
+ // Never use compact prelude when the harness injected format reminders
1515
+ // (e.g. tool_calls format for nemotron) — those are critical for correctness.
1516
+ const hasHarnessInjection = sessionMetaPending
1517
+ ? sessionMetaPending.includes('Use the tool_calls mechanism') ||
1518
+ sessionMetaPending.includes('[Format reminder]')
1519
+ : false;
1520
+ const useCompactPrelude = Boolean(sessionMetaPending && compactPreludeEnabled && routeFastByAuto && !hasHarnessInjection);
1391
1521
  // Prepend session meta to the first user instruction (§9b: variable context
1392
1522
  // goes in first user message, not system prompt, to preserve KV cache).
1393
1523
  // This avoids two consecutive user messages without an assistant response.
1394
1524
  let userContent = instruction;
1395
1525
  if (sessionMetaPending) {
1526
+ const prelude = useCompactPrelude ? compactSessionMeta : sessionMetaPending;
1396
1527
  if (typeof instruction === 'string') {
1397
- userContent = `${sessionMetaPending}\n\n${instruction}`;
1528
+ userContent = `${prelude}\n\n${instruction}`;
1398
1529
  }
1399
1530
  else {
1400
- userContent = [{ type: 'text', text: sessionMetaPending }, ...instruction];
1531
+ userContent = [{ type: 'text', text: prelude }, ...instruction];
1401
1532
  }
1402
1533
  sessionMetaPending = null;
1403
1534
  }
@@ -1436,6 +1567,8 @@ export async function createSession(opts) {
1436
1567
  // Vault search is best-effort; don't fail the turn
1437
1568
  }
1438
1569
  }
1570
+ // Save rollback checkpoint before this turn (captures pre-turn state).
1571
+ conversationBranch.checkpoint(messages.length, typeof instruction === 'string' ? instruction : '[multimodal]');
1439
1572
  messages.push({ role: 'user', content: userContent });
1440
1573
  const hookObj = typeof hooks === 'function' ? { onToken: hooks } : (hooks ?? {});
1441
1574
  let turns = 0;
@@ -1561,7 +1694,6 @@ export async function createSession(opts) {
1561
1694
  }
1562
1695
  return { text: finalText, turns, toolCalls };
1563
1696
  };
1564
- const rawInstructionText = userContentToText(instruction).trim();
1565
1697
  lastAskInstructionText = rawInstructionText;
1566
1698
  lastCompactionReminderObjective = '';
1567
1699
  if (hooksEnabled)
@@ -1575,6 +1707,9 @@ export async function createSession(opts) {
1575
1707
  if (typeof client.probeConnection === 'function') {
1576
1708
  await client.probeConnection();
1577
1709
  initialConnectionProbeDone = true;
1710
+ if (typeof client.getEndpoint === 'function') {
1711
+ clientPool.markProbed(client.getEndpoint());
1712
+ }
1578
1713
  }
1579
1714
  }
1580
1715
  if (retrievalRequested) {
@@ -1621,6 +1756,54 @@ export async function createSession(opts) {
1621
1756
  });
1622
1757
  return await finalizeAsk(miss);
1623
1758
  }
1759
+ const primaryRoute = turnRoute.providerTargets[0];
1760
+ const runtimeModelIds = await loadRuntimeModelIds();
1761
+ const routeRuntimeFallbackModels = (primaryRoute?.fallbackModels ?? []).filter((m) => runtimeModelIds.has(m));
1762
+ const apiProviderTargets = turnRoute.providerTargets.map((target) => ({
1763
+ ...target,
1764
+ fallbackModels: (target.fallbackModels ?? []).filter((m) => !runtimeModelIds.has(m)),
1765
+ }));
1766
+ const routeApiFallbackModels = apiProviderTargets[0]?.fallbackModels ?? [];
1767
+ const primaryUsesRuntimeModel = !!primaryRoute?.model && runtimeModelIds.has(primaryRoute.model);
1768
+ const fastLaneToolless = cfg.routing?.fastLaneToolless !== false &&
1769
+ routeFastByAuto &&
1770
+ turnRoute.classificationHint === 'fast';
1771
+ // Fast-lane slim tools: on subsequent turns of a fast-route ask, include only
1772
+ // read-only / lightweight tools to reduce per-turn token overhead (~40-50%).
1773
+ // Only active when the classifier explicitly said 'fast' (not heuristic/fallback).
1774
+ const fastLaneSlimTools = cfg.routing?.fastLaneSlimTools !== false &&
1775
+ routeFastByAuto &&
1776
+ turnRoute.classificationHint === 'fast';
1777
+ // Non-runtime route models can be selected directly in-session.
1778
+ if (!primaryUsesRuntimeModel && primaryRoute?.model && primaryRoute.model !== model) {
1779
+ setModel(primaryRoute.model);
1780
+ }
1781
+ if (cfg.verbose) {
1782
+ const routeParts = [
1783
+ `requested=${turnRoute.requestedMode}`,
1784
+ `selected=${turnRoute.selectedMode}`,
1785
+ `source=${turnRoute.selectedModeSource}`,
1786
+ `hint=${turnRoute.classificationHint ?? 'none'}`,
1787
+ `provider=${primaryRoute?.name ?? 'default'}`,
1788
+ `model=${primaryRoute?.model ?? model}`,
1789
+ ];
1790
+ if (turnRoute.heuristicDecision)
1791
+ routeParts.push(`heuristic=${turnRoute.heuristicDecision}`);
1792
+ if (primaryUsesRuntimeModel) {
1793
+ const runtimeChain = [primaryRoute?.model, ...routeRuntimeFallbackModels]
1794
+ .filter(Boolean)
1795
+ .join(' -> ');
1796
+ routeParts.push(`runtime_chain=${runtimeChain || 'none'}`);
1797
+ }
1798
+ else if (routeApiFallbackModels.length) {
1799
+ routeParts.push(`api_fallbacks=${routeApiFallbackModels.join(',')}`);
1800
+ }
1801
+ if (useCompactPrelude)
1802
+ routeParts.push('compact_prelude=on');
1803
+ if (fastLaneToolless)
1804
+ routeParts.push('fast_toolless=on');
1805
+ console.error(`[routing] ${routeParts.join(' ')}`);
1806
+ }
1624
1807
  const persistReviewArtifact = async (finalText) => {
1625
1808
  if (!vault || !shouldPersistReviewArtifact)
1626
1809
  return;
@@ -2018,10 +2201,38 @@ export async function createSession(opts) {
2018
2201
  let resp;
2019
2202
  try {
2020
2203
  try {
2021
- const toolsForTurn = cfg.no_tools || forceToollessRecoveryTurn
2204
+ // turns is 1-indexed (incremented at loop top), so first iteration = 1.
2205
+ const forceToollessByRouting = fastLaneToolless && turns === 1;
2206
+ // On fast-lane subsequent turns, slim the schema to read-only tools.
2207
+ const useSlimFast = !forceToollessByRouting && fastLaneSlimTools && turns > 1;
2208
+ const toolsForTurn = cfg.no_tools || forceToollessRecoveryTurn || forceToollessByRouting
2022
2209
  ? []
2023
- : getToolsSchema().filter((t) => !suppressedTools.has(t.function.name));
2024
- const toolChoiceForTurn = cfg.no_tools || forceToollessRecoveryTurn ? 'none' : 'auto';
2210
+ : getToolsSchema(useSlimFast).filter((t) => !suppressedTools.has(t.function.name));
2211
+ const toolChoiceForTurn = cfg.no_tools || forceToollessRecoveryTurn || forceToollessByRouting ? 'none' : 'auto';
2212
+ const promptBytesEstimate = Buffer.byteLength(JSON.stringify(messages), 'utf8');
2213
+ const toolSchemaBytesEstimate = toolsForTurn.length
2214
+ ? Buffer.byteLength(JSON.stringify(toolsForTurn), 'utf8')
2215
+ : 0;
2216
+ const toolSchemaTokenEstimate = estimateToolSchemaTokens(toolsForTurn);
2217
+ lastTurnDebug = {
2218
+ requestedMode: turnRoute.requestedMode,
2219
+ selectedMode: turnRoute.selectedMode,
2220
+ selectedModeSource: turnRoute.selectedModeSource,
2221
+ classificationHint: turnRoute.classificationHint,
2222
+ provider: primaryRoute?.name ?? 'default',
2223
+ model: primaryRoute?.model ?? model,
2224
+ runtimeRoute: primaryUsesRuntimeModel,
2225
+ compactPrelude: useCompactPrelude,
2226
+ fastLaneToolless,
2227
+ fastLaneSlimTools: useSlimFast,
2228
+ promptBytes: promptBytesEstimate,
2229
+ toolSchemaBytes: toolSchemaBytesEstimate,
2230
+ toolSchemaTokens: toolSchemaTokenEstimate,
2231
+ toolCount: toolsForTurn.length,
2232
+ };
2233
+ if (cfg.verbose) {
2234
+ console.error(`[turn-debug] prompt_bytes=${promptBytesEstimate} tools=${toolsForTurn.length} tool_schema_bytes=${toolSchemaBytesEstimate} tool_schema_tokens~=${toolSchemaTokenEstimate}`);
2235
+ }
2025
2236
  // ── Response cache: check for cached response ──────────────
2026
2237
  // Only cache tool-less turns (final answers, explanations) since
2027
2238
  // tool-calling turns have side effects that shouldn't be replayed.
@@ -2045,8 +2256,7 @@ export async function createSession(opts) {
2045
2256
  }
2046
2257
  }
2047
2258
  if (!resp) {
2048
- resp = await client.chatStream({
2049
- model,
2259
+ const chatOptsBase = {
2050
2260
  messages,
2051
2261
  tools: toolsForTurn,
2052
2262
  tool_choice: toolChoiceForTurn,
@@ -2055,9 +2265,10 @@ export async function createSession(opts) {
2055
2265
  max_tokens: maxTokens,
2056
2266
  extra: {
2057
2267
  cache_prompt: cfg.cache_prompt ?? true,
2058
- // Speculative decoding: draft model params for llama-server
2059
2268
  ...(cfg.draft_model ? { draft_model: cfg.draft_model } : {}),
2060
- ...(cfg.draft_n ? { speculative: { n: cfg.draft_n, p_min: cfg.draft_p_min ?? 0.5 } } : {}),
2269
+ ...(cfg.draft_n
2270
+ ? { speculative: { n: cfg.draft_n, p_min: cfg.draft_p_min ?? 0.5 } }
2271
+ : {}),
2061
2272
  ...(frequencyPenalty && { frequency_penalty: frequencyPenalty }),
2062
2273
  ...(presencePenalty && { presence_penalty: presencePenalty }),
2063
2274
  },
@@ -2065,7 +2276,98 @@ export async function createSession(opts) {
2065
2276
  requestId: `r${reqCounter}`,
2066
2277
  onToken: hookObj.onToken,
2067
2278
  onFirstDelta,
2068
- });
2279
+ };
2280
+ if (primaryUsesRuntimeModel && primaryRoute?.model) {
2281
+ // Runtime-native routing: lane model/fallbacks reference runtime model IDs.
2282
+ const runtimePrimaryModel = primaryRoute.model;
2283
+ const runtimeFallbackMap = {};
2284
+ if (routeRuntimeFallbackModels.length > 0) {
2285
+ runtimeFallbackMap[runtimePrimaryModel] = routeRuntimeFallbackModels;
2286
+ }
2287
+ resp = await resilientCall([
2288
+ {
2289
+ name: 'runtime-router',
2290
+ execute: async (runtimeModelId) => {
2291
+ await ensureRuntimeModelActive(runtimeModelId);
2292
+ const runtimeClient = getClientForEndpoint();
2293
+ const runtimeModel = model;
2294
+ return runtimeClient.chatStream({ ...chatOptsBase, model: runtimeModel });
2295
+ },
2296
+ },
2297
+ ], runtimePrimaryModel, {
2298
+ maxRetries: 0,
2299
+ modelFallbacks: runtimeFallbackMap,
2300
+ onRetry: (info) => {
2301
+ if (cfg.verbose) {
2302
+ console.error(`[routing] runtime-fallback: model=${info.model} attempt=${info.attempt}/${info.maxAttempts} reason=${info.reason}`);
2303
+ }
2304
+ },
2305
+ });
2306
+ }
2307
+ else {
2308
+ const isLikelyAuthError = (errMsg) => {
2309
+ const lower = errMsg.toLowerCase();
2310
+ return (lower.includes('refresh_token_reused') ||
2311
+ lower.includes('missing bearer') ||
2312
+ lower.includes('missing api key') ||
2313
+ lower.includes('invalid api key') ||
2314
+ lower.includes('authentication failed') ||
2315
+ lower.includes('unauthorized') ||
2316
+ lower.includes('forbidden') ||
2317
+ lower.includes('invalid token'));
2318
+ };
2319
+ const providerFailures = [];
2320
+ for (const target of apiProviderTargets.length
2321
+ ? apiProviderTargets
2322
+ : [{
2323
+ name: primaryRoute?.name ?? 'default',
2324
+ endpoint: primaryRoute?.endpoint,
2325
+ model: primaryRoute?.model ?? model,
2326
+ fallbackModels: routeApiFallbackModels,
2327
+ }]) {
2328
+ const routeEndpoint = target.endpoint;
2329
+ const activeClient = getClientForEndpoint(routeEndpoint);
2330
+ if (routeEndpoint) {
2331
+ await clientPool.probeIfNeeded(routeEndpoint);
2332
+ }
2333
+ const routeModel = target.model || model;
2334
+ const modelFallbackMap = {};
2335
+ if (target.fallbackModels?.length) {
2336
+ modelFallbackMap[routeModel] = target.fallbackModels;
2337
+ }
2338
+ try {
2339
+ resp = await resilientCall([
2340
+ {
2341
+ name: target.name ?? 'default',
2342
+ execute: (m) => activeClient.chatStream({ ...chatOptsBase, model: m }),
2343
+ },
2344
+ ], routeModel, {
2345
+ maxRetries: 0,
2346
+ modelFallbacks: modelFallbackMap,
2347
+ onRetry: (info) => {
2348
+ if (cfg.verbose) {
2349
+ console.error(`[routing] retry: provider=${info.provider} model=${info.model} attempt=${info.attempt}/${info.maxAttempts} reason=${info.reason}`);
2350
+ }
2351
+ },
2352
+ });
2353
+ break;
2354
+ }
2355
+ catch (providerErr) {
2356
+ const errMsg = String(providerErr?.message ?? providerErr ?? 'unknown error');
2357
+ const compactErr = errMsg.replace(/\s+/g, ' ').trim();
2358
+ providerFailures.push(`${target.name}: ${compactErr}`);
2359
+ if (cfg.verbose && isLikelyAuthError(errMsg)) {
2360
+ console.warn(`[routing] auth/provider failure on ${target.name}; trying next provider fallback`);
2361
+ }
2362
+ if (isContextWindowExceededError(providerErr)) {
2363
+ throw providerErr;
2364
+ }
2365
+ }
2366
+ }
2367
+ if (!resp) {
2368
+ throw new Error(`All routed providers failed for this turn. ${providerFailures.join(' | ')}`);
2369
+ }
2370
+ }
2069
2371
  } // end if (!resp) — cache miss path
2070
2372
  // Successful response resets overflow recovery budget.
2071
2373
  overflowCompactionAttempts = 0;
@@ -2835,6 +3137,7 @@ export async function createSession(opts) {
2835
3137
  let content = '';
2836
3138
  let reusedCachedReadOnlyExec = false;
2837
3139
  let reusedCachedReadTool = false;
3140
+ let toolFallbackNote = null;
2838
3141
  if (name === 'exec' && repeatedReadOnlyExecSigs.has(sig)) {
2839
3142
  const cached = execObservationCacheBySig.get(sig);
2840
3143
  if (cached) {
@@ -2868,7 +3171,92 @@ export async function createSession(opts) {
2868
3171
  toolName: name,
2869
3172
  onToolStream: emitToolStream,
2870
3173
  };
2871
- const value = await builtInFn(callCtx, args);
3174
+ let value;
3175
+ try {
3176
+ value = await builtInFn(callCtx, args);
3177
+ }
3178
+ catch (err) {
3179
+ const msg = String(err?.message ?? err ?? '');
3180
+ // Fallback #1: edit_file mismatch -> targeted edit_range based on closest-match hint.
3181
+ const isEditMismatch = name === 'edit_file' && /edit_file:\s*old_text not found/i.test(msg);
3182
+ if (isEditMismatch && typeof args?.path === 'string') {
3183
+ const best = msg.match(/Closest match at line\s+(\d+)\s*\((\d+)% similarity\)/i);
3184
+ const bestLine = best ? Number.parseInt(best[1], 10) : NaN;
3185
+ const similarity = best ? Number.parseInt(best[2], 10) : NaN;
3186
+ const oldTextForRange = String(args?.old_text ?? '');
3187
+ const oldLineCount = Math.max(1, oldTextForRange.split(/\r?\n/).length);
3188
+ const endLine = Number.isFinite(bestLine)
3189
+ ? bestLine + oldLineCount - 1
3190
+ : Number.NaN;
3191
+ const editRangeFn = tools['edit_range'];
3192
+ if (editRangeFn &&
3193
+ Number.isFinite(bestLine) &&
3194
+ Number.isFinite(endLine) &&
3195
+ Number.isFinite(similarity) &&
3196
+ similarity >= 70) {
3197
+ const fallbackArgs = {
3198
+ path: args.path,
3199
+ start_line: bestLine,
3200
+ end_line: endLine,
3201
+ replacement: args.new_text,
3202
+ };
3203
+ if (cfg.verbose) {
3204
+ console.warn(`[edit_file] auto-fallback to edit_range at ${bestLine}-${endLine} (${similarity}% similarity)`);
3205
+ }
3206
+ value = await editRangeFn(callCtx, fallbackArgs);
3207
+ args = fallbackArgs;
3208
+ toolFallbackNote = 'auto edit_range fallback';
3209
+ }
3210
+ else {
3211
+ throw err;
3212
+ }
3213
+ }
3214
+ else {
3215
+ const isWriteRefusal = name === 'write_file' &&
3216
+ !args?.overwrite &&
3217
+ !args?.force &&
3218
+ /write_file:\s*refusing to overwrite existing non-empty file/i.test(msg);
3219
+ if (!isWriteRefusal)
3220
+ throw err;
3221
+ // Fallback #2 (preferred): rewrite existing file via edit_range first.
3222
+ const editRangeFn = tools['edit_range'];
3223
+ let usedEditRangeFallback = false;
3224
+ if (editRangeFn && typeof args?.path === 'string') {
3225
+ try {
3226
+ const absWritePath = args.path.startsWith('/')
3227
+ ? args.path
3228
+ : path.resolve(projectDir, args.path);
3229
+ const curText = await fs.readFile(absWritePath, 'utf8');
3230
+ const totalLines = Math.max(1, curText.split(/\r?\n/).length);
3231
+ const fallbackArgs = {
3232
+ path: args.path,
3233
+ start_line: 1,
3234
+ end_line: totalLines,
3235
+ replacement: args.content,
3236
+ };
3237
+ if (cfg.verbose) {
3238
+ console.warn(`[write_file] auto-fallback to edit_range for existing file (${totalLines} lines)`);
3239
+ }
3240
+ value = await editRangeFn(callCtx, fallbackArgs);
3241
+ args = fallbackArgs;
3242
+ toolFallbackNote = 'auto edit_range fallback';
3243
+ usedEditRangeFallback = true;
3244
+ }
3245
+ catch {
3246
+ // fall through to explicit overwrite retry below
3247
+ }
3248
+ }
3249
+ if (!usedEditRangeFallback) {
3250
+ const retryArgs = { ...args, overwrite: true };
3251
+ if (cfg.verbose) {
3252
+ console.warn('[write_file] auto-retrying with overwrite=true after explicit overwrite refusal');
3253
+ }
3254
+ value = await builtInFn(callCtx, retryArgs);
3255
+ args = retryArgs;
3256
+ toolFallbackNote = 'auto overwrite fallback';
3257
+ }
3258
+ }
3259
+ }
2872
3260
  content = typeof value === 'string' ? value : JSON.stringify(value);
2873
3261
  if (READ_FILE_CACHE_TOOLS.has(name) &&
2874
3262
  typeof content === 'string' &&
@@ -2954,6 +3342,9 @@ export async function createSession(opts) {
2954
3342
  let summary = reusedCachedReadOnlyExec
2955
3343
  ? 'cached read-only exec observation (unchanged)'
2956
3344
  : toolResultSummary(name, args, content, true);
3345
+ if (toolFallbackNote) {
3346
+ summary = `${summary} (${toolFallbackNote})`;
3347
+ }
2957
3348
  const resultEvent = {
2958
3349
  id: callId,
2959
3350
  name,
@@ -3527,17 +3918,32 @@ export async function createSession(opts) {
3527
3918
  return currentContextTokens > 0 ? currentContextTokens : estimateTokensFromMessages(messages);
3528
3919
  },
3529
3920
  ask,
3921
+ rollback: () => {
3922
+ const cp = conversationBranch.rollback();
3923
+ if (!cp)
3924
+ return null;
3925
+ const removed = messages.length - cp.messageCount;
3926
+ messages.length = cp.messageCount;
3927
+ return { preview: cp.preview, removedMessages: removed };
3928
+ },
3929
+ listCheckpoints: () => conversationBranch.list(),
3530
3930
  setModel,
3531
3931
  setEndpoint,
3532
3932
  listModels,
3533
3933
  refreshServerHealth,
3534
3934
  getPerfSummary,
3535
3935
  getToolLoopStats: () => lastToolLoopStats,
3936
+ get lastAskInstructionText() {
3937
+ return lastAskInstructionText;
3938
+ },
3536
3939
  captureOn,
3537
3940
  captureOff,
3538
3941
  captureLast,
3942
+ captureSetRedact,
3943
+ captureGetRedact,
3944
+ captureOpen,
3539
3945
  get capturePath() {
3540
- return capturePath;
3946
+ return capture.path;
3541
3947
  },
3542
3948
  getSystemPrompt: () => messages[0]?.role === 'system' ? String(messages[0].content) : activeSystemPromptBase,
3543
3949
  setSystemPrompt,
@@ -3564,6 +3970,9 @@ export async function createSession(opts) {
3564
3970
  get lastTurnMetrics() {
3565
3971
  return lastTurnMetrics;
3566
3972
  },
3973
+ get lastTurnDebug() {
3974
+ return lastTurnDebug;
3975
+ },
3567
3976
  get lastServerHealth() {
3568
3977
  return lastServerHealth;
3569
3978
  },