@visorcraft/idlehands 2.2.5 → 2.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/agent/capture.js +98 -0
  2. package/dist/agent/capture.js.map +1 -0
  3. package/dist/agent/client-pool.js +115 -0
  4. package/dist/agent/client-pool.js.map +1 -0
  5. package/dist/agent/conversation-branch.js +50 -0
  6. package/dist/agent/conversation-branch.js.map +1 -0
  7. package/dist/agent/tools-schema.js +16 -3
  8. package/dist/agent/tools-schema.js.map +1 -1
  9. package/dist/agent.js +300 -118
  10. package/dist/agent.js.map +1 -1
  11. package/dist/bot/basic-commands.js +8 -0
  12. package/dist/bot/basic-commands.js.map +1 -1
  13. package/dist/bot/budget-command.js +74 -0
  14. package/dist/bot/budget-command.js.map +1 -0
  15. package/dist/bot/capture-commands.js +82 -0
  16. package/dist/bot/capture-commands.js.map +1 -0
  17. package/dist/bot/command-logic.js +5 -0
  18. package/dist/bot/command-logic.js.map +1 -1
  19. package/dist/bot/commands.js +79 -1
  20. package/dist/bot/commands.js.map +1 -1
  21. package/dist/bot/cost-command.js +80 -0
  22. package/dist/bot/cost-command.js.map +1 -0
  23. package/dist/bot/diff-command.js +48 -0
  24. package/dist/bot/diff-command.js.map +1 -0
  25. package/dist/bot/discord-commands.js +32 -1
  26. package/dist/bot/discord-commands.js.map +1 -1
  27. package/dist/bot/rollback-command.js +33 -0
  28. package/dist/bot/rollback-command.js.map +1 -0
  29. package/dist/bot/telegram.js +8 -1
  30. package/dist/bot/telegram.js.map +1 -1
  31. package/dist/cli/commands/editing.js +11 -2
  32. package/dist/cli/commands/editing.js.map +1 -1
  33. package/dist/config.js +27 -0
  34. package/dist/config.js.map +1 -1
  35. package/dist/progress/turn-progress.js +203 -129
  36. package/dist/progress/turn-progress.js.map +1 -1
  37. package/dist/routing/hysteresis.js +69 -0
  38. package/dist/routing/hysteresis.js.map +1 -0
  39. package/package.json +1 -1
package/dist/agent.js CHANGED
@@ -16,6 +16,9 @@ import { PromptGuard } from './security/prompt-guard.js';
16
16
  import { ResponseCache } from './agent/response-cache.js';
17
17
  import { resilientCall } from './agent/resilient-provider.js';
18
18
  import { ToolLoopGuard } from './agent/tool-loop-guard.js';
19
+ import { CaptureManager } from './agent/capture.js';
20
+ import { ClientPool } from './agent/client-pool.js';
21
+ import { ConversationBranch } from './agent/conversation-branch.js';
19
22
  import { isLspTool, isMutationTool, isReadOnlyTool, planModeSummary } from './agent/tool-policy.js';
20
23
  import { buildToolsSchema } from './agent/tools-schema.js';
21
24
  import { OpenAIClient } from './client.js';
@@ -33,6 +36,7 @@ import { BASE_MAX_TOKENS, deriveContextWindow, deriveGenerationParams, supportsV
33
36
  import { ReplayStore } from './replay.js';
34
37
  import { checkExecSafety, checkPathSafety } from './safety.js';
35
38
  import { decideTurnRoute } from './routing/turn-router.js';
39
+ import { RouteHysteresis } from './routing/hysteresis.js';
36
40
  import { normalizeApprovalMode } from './shared/config-utils.js';
37
41
  import { collectSnapshot } from './sys/context.js';
38
42
  import { ToolError, ValidationError } from './tools/tool-error.js';
@@ -219,13 +223,19 @@ export async function createSession(opts) {
219
223
  const mcpHasEnabledTools = (mcpManager?.listTools().length ?? 0) > 0;
220
224
  const mcpLazySchemaMode = Boolean(mcpManager && mcpHasEnabledTools);
221
225
  let mcpToolsLoaded = !mcpLazySchemaMode;
222
- const getToolsSchema = () => buildToolsSchema({
226
+ const routeHysteresis = new RouteHysteresis({
227
+ minDwell: cfg.routing?.hysteresisMinDwell ?? 2,
228
+ enabled: cfg.routing?.hysteresis !== false,
229
+ });
230
+ const conversationBranch = new ConversationBranch();
231
+ const getToolsSchema = (slimFast) => buildToolsSchema({
223
232
  activeVaultTools,
224
233
  passiveVault: !activeVaultTools && vaultEnabled && vaultMode === 'passive',
225
234
  sysMode: cfg.mode === 'sys',
226
235
  lspTools: lspManager?.hasServers() === true,
227
236
  mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
228
237
  allowSpawnTask: spawnTaskEnabled,
238
+ slimFast,
229
239
  });
230
240
  const vault = vaultEnabled
231
241
  ? (opts.runtime?.vault ??
@@ -394,6 +404,27 @@ export async function createSession(opts) {
394
404
  console.warn(`[warn] sys-eager snapshot failed: ${e?.message ?? e}`);
395
405
  }
396
406
  }
407
+ const buildCompactSessionMeta = () => {
408
+ const caps = [];
409
+ if (vaultEnabled)
410
+ caps.push('vault');
411
+ if (lspManager?.hasServers())
412
+ caps.push('lsp');
413
+ if (mcpManager)
414
+ caps.push('mcp');
415
+ if (spawnTaskEnabled)
416
+ caps.push('subagents');
417
+ const lines = [
418
+ `[cwd: ${cfg.dir}]`,
419
+ `[harness: ${harness.id}]`,
420
+ '[fast-lane prelude: concise response by default; ask for details if needed.]',
421
+ caps.length ? `[optional capabilities: ${caps.join(', ')}]` : '',
422
+ ].filter(Boolean);
423
+ const maxChars = cfg.routing?.fastCompactPreludeMaxChars ?? 320;
424
+ const joined = lines.join('\n');
425
+ return joined.length > maxChars ? `${joined.slice(0, maxChars - 1)}…` : joined;
426
+ };
427
+ const compactSessionMeta = buildCompactSessionMeta();
397
428
  const defaultSystemPromptBase = SYSTEM_PROMPT;
398
429
  let activeSystemPromptBase = (cfg.system_prompt_override ?? '').trim() || defaultSystemPromptBase;
399
430
  let systemPromptOverridden = (cfg.system_prompt_override ?? '').trim().length > 0;
@@ -431,6 +462,8 @@ export async function createSession(opts) {
431
462
  lastEditedPath = undefined;
432
463
  initialConnectionProbeDone = false;
433
464
  mcpToolsLoaded = !mcpLazySchemaMode;
465
+ routeHysteresis.reset();
466
+ conversationBranch.reset();
434
467
  };
435
468
  const restore = (next) => {
436
469
  if (!Array.isArray(next) || next.length < 2) {
@@ -1058,6 +1091,7 @@ export async function createSession(opts) {
1058
1091
  const ppSamples = [];
1059
1092
  const tgSamples = [];
1060
1093
  let lastTurnMetrics;
1094
+ let lastTurnDebug;
1061
1095
  let lastServerHealth;
1062
1096
  let lastToolLoopStats = {
1063
1097
  totalHistory: 0,
@@ -1076,70 +1110,35 @@ export async function createSession(opts) {
1076
1110
  },
1077
1111
  };
1078
1112
  let lastModelsProbeMs = 0;
1079
- const capturesDir = path.join(stateDir(), 'captures');
1080
- let captureEnabled = false;
1081
- let capturePath;
1082
- let lastCaptureRecord = null;
1083
- const routedClients = new Map();
1084
- const probedEndpoints = new Set();
1113
+ const capture = new CaptureManager(stateDir());
1085
1114
  const normalizeEndpoint = (endpoint) => endpoint.trim().replace(/\/+$/, '');
1086
- const defaultCapturePath = () => {
1087
- const stamp = new Date().toISOString().replace(/[:.]/g, '-');
1088
- return path.join(capturesDir, `${stamp}.jsonl`);
1089
- };
1090
- const appendCaptureRecord = async (record, outPath) => {
1091
- await fs.mkdir(path.dirname(outPath), { recursive: true });
1092
- await fs.appendFile(outPath, JSON.stringify(record) + '\n', 'utf8');
1093
- };
1115
+ const clientPool = new ClientPool({
1116
+ primary: client,
1117
+ primaryEndpoint: cfg.endpoint,
1118
+ apiKey: opts.apiKey,
1119
+ cfg,
1120
+ capture,
1121
+ ClientCtor: OpenAIClient,
1122
+ });
1123
+ // Thin wrapper used by setEndpoint when primary client is replaced.
1094
1124
  const applyClientRuntimeOptions = (target) => {
1095
- if (typeof target.setVerbose === 'function') {
1125
+ if (typeof target.setVerbose === 'function')
1096
1126
  target.setVerbose(cfg.verbose);
1097
- }
1098
- if (typeof cfg.response_timeout === 'number' && cfg.response_timeout > 0) {
1127
+ if (typeof cfg.response_timeout === 'number' && cfg.response_timeout > 0)
1099
1128
  target.setResponseTimeout(cfg.response_timeout);
1100
- }
1101
- if (typeof target.setConnectionTimeout === 'function' &&
1102
- typeof cfg.connection_timeout === 'number' &&
1103
- cfg.connection_timeout > 0) {
1129
+ if (typeof target.setConnectionTimeout === 'function' && typeof cfg.connection_timeout === 'number' && cfg.connection_timeout > 0)
1104
1130
  target.setConnectionTimeout(cfg.connection_timeout);
1105
- }
1106
- if (typeof target.setInitialConnectionCheck === 'function' &&
1107
- typeof cfg.initial_connection_check === 'boolean') {
1131
+ if (typeof target.setInitialConnectionCheck === 'function' && typeof cfg.initial_connection_check === 'boolean')
1108
1132
  target.setInitialConnectionCheck(cfg.initial_connection_check);
1109
- }
1110
- if (typeof target.setInitialConnectionProbeTimeout === 'function' &&
1111
- typeof cfg.initial_connection_timeout === 'number' &&
1112
- cfg.initial_connection_timeout > 0) {
1133
+ if (typeof target.setInitialConnectionProbeTimeout === 'function' && typeof cfg.initial_connection_timeout === 'number' && cfg.initial_connection_timeout > 0)
1113
1134
  target.setInitialConnectionProbeTimeout(cfg.initial_connection_timeout);
1114
- }
1115
1135
  };
1116
1136
  const attachCaptureHook = (target) => {
1117
1137
  if (typeof target.setExchangeHook !== 'function')
1118
1138
  return;
1119
- target.setExchangeHook(async (record) => {
1120
- lastCaptureRecord = record;
1121
- if (!captureEnabled)
1122
- return;
1123
- const outFile = capturePath || defaultCapturePath();
1124
- capturePath = outFile;
1125
- await appendCaptureRecord(record, outFile);
1126
- });
1127
- };
1128
- const getClientForEndpoint = (endpoint) => {
1129
- if (!endpoint)
1130
- return client;
1131
- const normalized = normalizeEndpoint(endpoint);
1132
- if (!normalized || normalized === normalizeEndpoint(cfg.endpoint))
1133
- return client;
1134
- const existing = routedClients.get(normalized);
1135
- if (existing)
1136
- return existing;
1137
- const routed = new OpenAIClient(normalized, opts.apiKey, cfg.verbose);
1138
- applyClientRuntimeOptions(routed);
1139
- attachCaptureHook(routed);
1140
- routedClients.set(normalized, routed);
1141
- return routed;
1139
+ target.setExchangeHook(capture.createExchangeHook());
1142
1140
  };
1141
+ const getClientForEndpoint = (endpoint) => clientPool.getForEndpoint(endpoint);
1143
1142
  let runtimeRoutingModules = null;
1144
1143
  let runtimeRoutingUnavailable = false;
1145
1144
  let runtimeModelIdsCache = null;
@@ -1351,8 +1350,8 @@ export async function createSession(opts) {
1351
1350
  client = new OpenAIClient(normalized, opts.apiKey, cfg.verbose);
1352
1351
  }
1353
1352
  applyClientRuntimeOptions(client);
1354
- routedClients.clear();
1355
- probedEndpoints.clear();
1353
+ clientPool.setPrimary(client);
1354
+ clientPool.reset();
1356
1355
  wireCaptureHook();
1357
1356
  modelsList = normalizeModelsResponse(await client.models());
1358
1357
  const chosen = modelName?.trim()
@@ -1361,25 +1360,12 @@ export async function createSession(opts) {
1361
1360
  (await autoPickModel(client, modelsList)));
1362
1361
  setModel(chosen);
1363
1362
  };
1364
- const captureOn = async (filePath) => {
1365
- const target = filePath?.trim() ? path.resolve(filePath) : defaultCapturePath();
1366
- await fs.mkdir(path.dirname(target), { recursive: true });
1367
- await fs.appendFile(target, '', 'utf8');
1368
- captureEnabled = true;
1369
- capturePath = target;
1370
- return target;
1371
- };
1372
- const captureOff = () => {
1373
- captureEnabled = false;
1374
- };
1375
- const captureLast = async (filePath) => {
1376
- if (!lastCaptureRecord) {
1377
- throw new Error('No captured request/response pair is available yet.');
1378
- }
1379
- const target = filePath?.trim() ? path.resolve(filePath) : capturePath || defaultCapturePath();
1380
- await appendCaptureRecord(lastCaptureRecord, target);
1381
- return target;
1382
- };
1363
+ const captureOn = (filePath) => capture.on(filePath);
1364
+ const captureOff = () => capture.off();
1365
+ const captureSetRedact = (enabled) => capture.setRedact(enabled);
1366
+ const captureGetRedact = () => capture.getRedact();
1367
+ const captureOpen = () => capture.open();
1368
+ const captureLast = (filePath) => capture.last(filePath);
1383
1369
  const listMcpServers = () => {
1384
1370
  return mcpManager?.listServers() ?? [];
1385
1371
  };
@@ -1410,7 +1396,7 @@ export async function createSession(opts) {
1410
1396
  const close = async () => {
1411
1397
  await mcpManager?.close().catch(() => { });
1412
1398
  await lspManager?.close().catch(() => { });
1413
- routedClients.clear();
1399
+ await clientPool.closeAll();
1414
1400
  vault?.close();
1415
1401
  lens?.close();
1416
1402
  };
@@ -1511,16 +1497,38 @@ export async function createSession(opts) {
1511
1497
  : cfg.max_iterations;
1512
1498
  const wallStart = Date.now();
1513
1499
  const delegationForbiddenByUser = userDisallowsDelegation(instruction);
1500
+ const rawInstructionText = userContentToText(instruction).trim();
1501
+ // Route early so first-turn prelude/tool choices can adapt.
1502
+ const turnRoute = decideTurnRoute(cfg, rawInstructionText, model);
1503
+ // Apply hysteresis to suppress rapid lane thrashing in auto mode.
1504
+ const hysteresisResult = routeHysteresis.apply(turnRoute.selectedMode, turnRoute.selectedModeSource);
1505
+ if (hysteresisResult.suppressed) {
1506
+ // Override the selected mode with the hysteresis-stabilized lane.
1507
+ turnRoute.selectedMode = hysteresisResult.lane;
1508
+ turnRoute.selectedModeSource = 'hysteresis';
1509
+ }
1510
+ const routeFastByAuto = turnRoute.requestedMode === 'auto' &&
1511
+ turnRoute.selectedMode === 'fast' &&
1512
+ turnRoute.selectedModeSource !== 'override';
1513
+ const compactPreludeEnabled = cfg.routing?.fastCompactPrelude !== false;
1514
+ // Never use compact prelude when the harness injected format reminders
1515
+ // (e.g. tool_calls format for nemotron) — those are critical for correctness.
1516
+ const hasHarnessInjection = sessionMetaPending
1517
+ ? sessionMetaPending.includes('Use the tool_calls mechanism') ||
1518
+ sessionMetaPending.includes('[Format reminder]')
1519
+ : false;
1520
+ const useCompactPrelude = Boolean(sessionMetaPending && compactPreludeEnabled && routeFastByAuto && !hasHarnessInjection);
1514
1521
  // Prepend session meta to the first user instruction (§9b: variable context
1515
1522
  // goes in first user message, not system prompt, to preserve KV cache).
1516
1523
  // This avoids two consecutive user messages without an assistant response.
1517
1524
  let userContent = instruction;
1518
1525
  if (sessionMetaPending) {
1526
+ const prelude = useCompactPrelude ? compactSessionMeta : sessionMetaPending;
1519
1527
  if (typeof instruction === 'string') {
1520
- userContent = `${sessionMetaPending}\n\n${instruction}`;
1528
+ userContent = `${prelude}\n\n${instruction}`;
1521
1529
  }
1522
1530
  else {
1523
- userContent = [{ type: 'text', text: sessionMetaPending }, ...instruction];
1531
+ userContent = [{ type: 'text', text: prelude }, ...instruction];
1524
1532
  }
1525
1533
  sessionMetaPending = null;
1526
1534
  }
@@ -1559,6 +1567,8 @@ export async function createSession(opts) {
1559
1567
  // Vault search is best-effort; don't fail the turn
1560
1568
  }
1561
1569
  }
1570
+ // Save rollback checkpoint before this turn (captures pre-turn state).
1571
+ conversationBranch.checkpoint(messages.length, typeof instruction === 'string' ? instruction : '[multimodal]');
1562
1572
  messages.push({ role: 'user', content: userContent });
1563
1573
  const hookObj = typeof hooks === 'function' ? { onToken: hooks } : (hooks ?? {});
1564
1574
  let turns = 0;
@@ -1684,7 +1694,6 @@ export async function createSession(opts) {
1684
1694
  }
1685
1695
  return { text: finalText, turns, toolCalls };
1686
1696
  };
1687
- const rawInstructionText = userContentToText(instruction).trim();
1688
1697
  lastAskInstructionText = rawInstructionText;
1689
1698
  lastCompactionReminderObjective = '';
1690
1699
  if (hooksEnabled)
@@ -1699,7 +1708,7 @@ export async function createSession(opts) {
1699
1708
  await client.probeConnection();
1700
1709
  initialConnectionProbeDone = true;
1701
1710
  if (typeof client.getEndpoint === 'function') {
1702
- probedEndpoints.add(normalizeEndpoint(client.getEndpoint()));
1711
+ clientPool.markProbed(client.getEndpoint());
1703
1712
  }
1704
1713
  }
1705
1714
  }
@@ -1747,12 +1756,24 @@ export async function createSession(opts) {
1747
1756
  });
1748
1757
  return await finalizeAsk(miss);
1749
1758
  }
1750
- const turnRoute = decideTurnRoute(cfg, rawInstructionText, model);
1751
1759
  const primaryRoute = turnRoute.providerTargets[0];
1752
1760
  const runtimeModelIds = await loadRuntimeModelIds();
1753
1761
  const routeRuntimeFallbackModels = (primaryRoute?.fallbackModels ?? []).filter((m) => runtimeModelIds.has(m));
1754
- const routeApiFallbackModels = (primaryRoute?.fallbackModels ?? []).filter((m) => !runtimeModelIds.has(m));
1762
+ const apiProviderTargets = turnRoute.providerTargets.map((target) => ({
1763
+ ...target,
1764
+ fallbackModels: (target.fallbackModels ?? []).filter((m) => !runtimeModelIds.has(m)),
1765
+ }));
1766
+ const routeApiFallbackModels = apiProviderTargets[0]?.fallbackModels ?? [];
1755
1767
  const primaryUsesRuntimeModel = !!primaryRoute?.model && runtimeModelIds.has(primaryRoute.model);
1768
+ const fastLaneToolless = cfg.routing?.fastLaneToolless !== false &&
1769
+ routeFastByAuto &&
1770
+ turnRoute.classificationHint === 'fast';
1771
+ // Fast-lane slim tools: on subsequent turns of a fast-route ask, include only
1772
+ // read-only / lightweight tools to reduce per-turn token overhead (~40-50%).
1773
+ // Only active when the classifier explicitly said 'fast' (not heuristic/fallback).
1774
+ const fastLaneSlimTools = cfg.routing?.fastLaneSlimTools !== false &&
1775
+ routeFastByAuto &&
1776
+ turnRoute.classificationHint === 'fast';
1756
1777
  // Non-runtime route models can be selected directly in-session.
1757
1778
  if (!primaryUsesRuntimeModel && primaryRoute?.model && primaryRoute.model !== model) {
1758
1779
  setModel(primaryRoute.model);
@@ -1777,6 +1798,10 @@ export async function createSession(opts) {
1777
1798
  else if (routeApiFallbackModels.length) {
1778
1799
  routeParts.push(`api_fallbacks=${routeApiFallbackModels.join(',')}`);
1779
1800
  }
1801
+ if (useCompactPrelude)
1802
+ routeParts.push('compact_prelude=on');
1803
+ if (fastLaneToolless)
1804
+ routeParts.push('fast_toolless=on');
1780
1805
  console.error(`[routing] ${routeParts.join(' ')}`);
1781
1806
  }
1782
1807
  const persistReviewArtifact = async (finalText) => {
@@ -2176,10 +2201,38 @@ export async function createSession(opts) {
2176
2201
  let resp;
2177
2202
  try {
2178
2203
  try {
2179
- const toolsForTurn = cfg.no_tools || forceToollessRecoveryTurn
2204
+ // turns is 1-indexed (incremented at loop top), so first iteration = 1.
2205
+ const forceToollessByRouting = fastLaneToolless && turns === 1;
2206
+ // On fast-lane subsequent turns, slim the schema to read-only tools.
2207
+ const useSlimFast = !forceToollessByRouting && fastLaneSlimTools && turns > 1;
2208
+ const toolsForTurn = cfg.no_tools || forceToollessRecoveryTurn || forceToollessByRouting
2180
2209
  ? []
2181
- : getToolsSchema().filter((t) => !suppressedTools.has(t.function.name));
2182
- const toolChoiceForTurn = cfg.no_tools || forceToollessRecoveryTurn ? 'none' : 'auto';
2210
+ : getToolsSchema(useSlimFast).filter((t) => !suppressedTools.has(t.function.name));
2211
+ const toolChoiceForTurn = cfg.no_tools || forceToollessRecoveryTurn || forceToollessByRouting ? 'none' : 'auto';
2212
+ const promptBytesEstimate = Buffer.byteLength(JSON.stringify(messages), 'utf8');
2213
+ const toolSchemaBytesEstimate = toolsForTurn.length
2214
+ ? Buffer.byteLength(JSON.stringify(toolsForTurn), 'utf8')
2215
+ : 0;
2216
+ const toolSchemaTokenEstimate = estimateToolSchemaTokens(toolsForTurn);
2217
+ lastTurnDebug = {
2218
+ requestedMode: turnRoute.requestedMode,
2219
+ selectedMode: turnRoute.selectedMode,
2220
+ selectedModeSource: turnRoute.selectedModeSource,
2221
+ classificationHint: turnRoute.classificationHint,
2222
+ provider: primaryRoute?.name ?? 'default',
2223
+ model: primaryRoute?.model ?? model,
2224
+ runtimeRoute: primaryUsesRuntimeModel,
2225
+ compactPrelude: useCompactPrelude,
2226
+ fastLaneToolless,
2227
+ fastLaneSlimTools: useSlimFast,
2228
+ promptBytes: promptBytesEstimate,
2229
+ toolSchemaBytes: toolSchemaBytesEstimate,
2230
+ toolSchemaTokens: toolSchemaTokenEstimate,
2231
+ toolCount: toolsForTurn.length,
2232
+ };
2233
+ if (cfg.verbose) {
2234
+ console.error(`[turn-debug] prompt_bytes=${promptBytesEstimate} tools=${toolsForTurn.length} tool_schema_bytes=${toolSchemaBytesEstimate} tool_schema_tokens~=${toolSchemaTokenEstimate}`);
2235
+ }
2183
2236
  // ── Response cache: check for cached response ──────────────
2184
2237
  // Only cache tool-less turns (final answers, explanations) since
2185
2238
  // tool-calling turns have side effects that shouldn't be replayed.
@@ -2252,42 +2305,67 @@ export async function createSession(opts) {
2252
2305
  });
2253
2306
  }
2254
2307
  else {
2255
- const routeEndpoint = primaryRoute?.endpoint;
2256
- const activeClient = getClientForEndpoint(routeEndpoint);
2257
- const endpointKey = routeEndpoint ? normalizeEndpoint(routeEndpoint) : undefined;
2258
- if (endpointKey && !probedEndpoints.has(endpointKey)) {
2259
- if (typeof activeClient.probeConnection === 'function') {
2260
- try {
2261
- await activeClient.probeConnection();
2308
+ const isLikelyAuthError = (errMsg) => {
2309
+ const lower = errMsg.toLowerCase();
2310
+ return (lower.includes('refresh_token_reused') ||
2311
+ lower.includes('missing bearer') ||
2312
+ lower.includes('missing api key') ||
2313
+ lower.includes('invalid api key') ||
2314
+ lower.includes('authentication failed') ||
2315
+ lower.includes('unauthorized') ||
2316
+ lower.includes('forbidden') ||
2317
+ lower.includes('invalid token'));
2318
+ };
2319
+ const providerFailures = [];
2320
+ for (const target of apiProviderTargets.length
2321
+ ? apiProviderTargets
2322
+ : [{
2323
+ name: primaryRoute?.name ?? 'default',
2324
+ endpoint: primaryRoute?.endpoint,
2325
+ model: primaryRoute?.model ?? model,
2326
+ fallbackModels: routeApiFallbackModels,
2327
+ }]) {
2328
+ const routeEndpoint = target.endpoint;
2329
+ const activeClient = getClientForEndpoint(routeEndpoint);
2330
+ if (routeEndpoint) {
2331
+ await clientPool.probeIfNeeded(routeEndpoint);
2332
+ }
2333
+ const routeModel = target.model || model;
2334
+ const modelFallbackMap = {};
2335
+ if (target.fallbackModels?.length) {
2336
+ modelFallbackMap[routeModel] = target.fallbackModels;
2337
+ }
2338
+ try {
2339
+ resp = await resilientCall([
2340
+ {
2341
+ name: target.name ?? 'default',
2342
+ execute: (m) => activeClient.chatStream({ ...chatOptsBase, model: m }),
2343
+ },
2344
+ ], routeModel, {
2345
+ maxRetries: 0,
2346
+ modelFallbacks: modelFallbackMap,
2347
+ onRetry: (info) => {
2348
+ if (cfg.verbose) {
2349
+ console.error(`[routing] retry: provider=${info.provider} model=${info.model} attempt=${info.attempt}/${info.maxAttempts} reason=${info.reason}`);
2350
+ }
2351
+ },
2352
+ });
2353
+ break;
2354
+ }
2355
+ catch (providerErr) {
2356
+ const errMsg = String(providerErr?.message ?? providerErr ?? 'unknown error');
2357
+ const compactErr = errMsg.replace(/\s+/g, ' ').trim();
2358
+ providerFailures.push(`${target.name}: ${compactErr}`);
2359
+ if (cfg.verbose && isLikelyAuthError(errMsg)) {
2360
+ console.warn(`[routing] auth/provider failure on ${target.name}; trying next provider fallback`);
2262
2361
  }
2263
- catch {
2264
- // best-effort: if probe fails we still try the call
2362
+ if (isContextWindowExceededError(providerErr)) {
2363
+ throw providerErr;
2265
2364
  }
2266
- probedEndpoints.add(endpointKey);
2267
2365
  }
2268
2366
  }
2269
- const routeModel = primaryRoute?.model ?? model;
2270
- if (routeApiFallbackModels.length > 0) {
2271
- const modelFallbackMap = {
2272
- [routeModel]: routeApiFallbackModels,
2273
- };
2274
- resp = await resilientCall([
2275
- {
2276
- name: primaryRoute?.name ?? 'default',
2277
- execute: (m) => activeClient.chatStream({ ...chatOptsBase, model: m }),
2278
- },
2279
- ], routeModel, {
2280
- maxRetries: 1,
2281
- modelFallbacks: modelFallbackMap,
2282
- onRetry: (info) => {
2283
- if (cfg.verbose) {
2284
- console.error(`[routing] retry: provider=${info.provider} model=${info.model} attempt=${info.attempt}/${info.maxAttempts} reason=${info.reason}`);
2285
- }
2286
- },
2287
- });
2288
- }
2289
- else {
2290
- resp = await activeClient.chatStream({ ...chatOptsBase, model: routeModel });
2367
+ if (!resp) {
2368
+ throw new Error(`All routed providers failed for this turn. ${providerFailures.join(' | ')}`);
2291
2369
  }
2292
2370
  }
2293
2371
  } // end if (!resp) — cache miss path
@@ -3059,6 +3137,7 @@ export async function createSession(opts) {
3059
3137
  let content = '';
3060
3138
  let reusedCachedReadOnlyExec = false;
3061
3139
  let reusedCachedReadTool = false;
3140
+ let toolFallbackNote = null;
3062
3141
  if (name === 'exec' && repeatedReadOnlyExecSigs.has(sig)) {
3063
3142
  const cached = execObservationCacheBySig.get(sig);
3064
3143
  if (cached) {
@@ -3092,7 +3171,92 @@ export async function createSession(opts) {
3092
3171
  toolName: name,
3093
3172
  onToolStream: emitToolStream,
3094
3173
  };
3095
- const value = await builtInFn(callCtx, args);
3174
+ let value;
3175
+ try {
3176
+ value = await builtInFn(callCtx, args);
3177
+ }
3178
+ catch (err) {
3179
+ const msg = String(err?.message ?? err ?? '');
3180
+ // Fallback #1: edit_file mismatch -> targeted edit_range based on closest-match hint.
3181
+ const isEditMismatch = name === 'edit_file' && /edit_file:\s*old_text not found/i.test(msg);
3182
+ if (isEditMismatch && typeof args?.path === 'string') {
3183
+ const best = msg.match(/Closest match at line\s+(\d+)\s*\((\d+)% similarity\)/i);
3184
+ const bestLine = best ? Number.parseInt(best[1], 10) : NaN;
3185
+ const similarity = best ? Number.parseInt(best[2], 10) : NaN;
3186
+ const oldTextForRange = String(args?.old_text ?? '');
3187
+ const oldLineCount = Math.max(1, oldTextForRange.split(/\r?\n/).length);
3188
+ const endLine = Number.isFinite(bestLine)
3189
+ ? bestLine + oldLineCount - 1
3190
+ : Number.NaN;
3191
+ const editRangeFn = tools['edit_range'];
3192
+ if (editRangeFn &&
3193
+ Number.isFinite(bestLine) &&
3194
+ Number.isFinite(endLine) &&
3195
+ Number.isFinite(similarity) &&
3196
+ similarity >= 70) {
3197
+ const fallbackArgs = {
3198
+ path: args.path,
3199
+ start_line: bestLine,
3200
+ end_line: endLine,
3201
+ replacement: args.new_text,
3202
+ };
3203
+ if (cfg.verbose) {
3204
+ console.warn(`[edit_file] auto-fallback to edit_range at ${bestLine}-${endLine} (${similarity}% similarity)`);
3205
+ }
3206
+ value = await editRangeFn(callCtx, fallbackArgs);
3207
+ args = fallbackArgs;
3208
+ toolFallbackNote = 'auto edit_range fallback';
3209
+ }
3210
+ else {
3211
+ throw err;
3212
+ }
3213
+ }
3214
+ else {
3215
+ const isWriteRefusal = name === 'write_file' &&
3216
+ !args?.overwrite &&
3217
+ !args?.force &&
3218
+ /write_file:\s*refusing to overwrite existing non-empty file/i.test(msg);
3219
+ if (!isWriteRefusal)
3220
+ throw err;
3221
+ // Fallback #2 (preferred): rewrite existing file via edit_range first.
3222
+ const editRangeFn = tools['edit_range'];
3223
+ let usedEditRangeFallback = false;
3224
+ if (editRangeFn && typeof args?.path === 'string') {
3225
+ try {
3226
+ const absWritePath = args.path.startsWith('/')
3227
+ ? args.path
3228
+ : path.resolve(projectDir, args.path);
3229
+ const curText = await fs.readFile(absWritePath, 'utf8');
3230
+ const totalLines = Math.max(1, curText.split(/\r?\n/).length);
3231
+ const fallbackArgs = {
3232
+ path: args.path,
3233
+ start_line: 1,
3234
+ end_line: totalLines,
3235
+ replacement: args.content,
3236
+ };
3237
+ if (cfg.verbose) {
3238
+ console.warn(`[write_file] auto-fallback to edit_range for existing file (${totalLines} lines)`);
3239
+ }
3240
+ value = await editRangeFn(callCtx, fallbackArgs);
3241
+ args = fallbackArgs;
3242
+ toolFallbackNote = 'auto edit_range fallback';
3243
+ usedEditRangeFallback = true;
3244
+ }
3245
+ catch {
3246
+ // fall through to explicit overwrite retry below
3247
+ }
3248
+ }
3249
+ if (!usedEditRangeFallback) {
3250
+ const retryArgs = { ...args, overwrite: true };
3251
+ if (cfg.verbose) {
3252
+ console.warn('[write_file] auto-retrying with overwrite=true after explicit overwrite refusal');
3253
+ }
3254
+ value = await builtInFn(callCtx, retryArgs);
3255
+ args = retryArgs;
3256
+ toolFallbackNote = 'auto overwrite fallback';
3257
+ }
3258
+ }
3259
+ }
3096
3260
  content = typeof value === 'string' ? value : JSON.stringify(value);
3097
3261
  if (READ_FILE_CACHE_TOOLS.has(name) &&
3098
3262
  typeof content === 'string' &&
@@ -3178,6 +3342,9 @@ export async function createSession(opts) {
3178
3342
  let summary = reusedCachedReadOnlyExec
3179
3343
  ? 'cached read-only exec observation (unchanged)'
3180
3344
  : toolResultSummary(name, args, content, true);
3345
+ if (toolFallbackNote) {
3346
+ summary = `${summary} (${toolFallbackNote})`;
3347
+ }
3181
3348
  const resultEvent = {
3182
3349
  id: callId,
3183
3350
  name,
@@ -3751,6 +3918,15 @@ export async function createSession(opts) {
3751
3918
  return currentContextTokens > 0 ? currentContextTokens : estimateTokensFromMessages(messages);
3752
3919
  },
3753
3920
  ask,
3921
+ rollback: () => {
3922
+ const cp = conversationBranch.rollback();
3923
+ if (!cp)
3924
+ return null;
3925
+ const removed = messages.length - cp.messageCount;
3926
+ messages.length = cp.messageCount;
3927
+ return { preview: cp.preview, removedMessages: removed };
3928
+ },
3929
+ listCheckpoints: () => conversationBranch.list(),
3754
3930
  setModel,
3755
3931
  setEndpoint,
3756
3932
  listModels,
@@ -3763,8 +3939,11 @@ export async function createSession(opts) {
3763
3939
  captureOn,
3764
3940
  captureOff,
3765
3941
  captureLast,
3942
+ captureSetRedact,
3943
+ captureGetRedact,
3944
+ captureOpen,
3766
3945
  get capturePath() {
3767
- return capturePath;
3946
+ return capture.path;
3768
3947
  },
3769
3948
  getSystemPrompt: () => messages[0]?.role === 'system' ? String(messages[0].content) : activeSystemPromptBase,
3770
3949
  setSystemPrompt,
@@ -3791,6 +3970,9 @@ export async function createSession(opts) {
3791
3970
  get lastTurnMetrics() {
3792
3971
  return lastTurnMetrics;
3793
3972
  },
3973
+ get lastTurnDebug() {
3974
+ return lastTurnDebug;
3975
+ },
3794
3976
  get lastServerHealth() {
3795
3977
  return lastServerHealth;
3796
3978
  },