@visorcraft/idlehands 2.3.2 → 2.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent.js CHANGED
@@ -2,13 +2,13 @@ import fs from 'node:fs/promises';
2
2
  import path from 'node:path';
3
3
  import { DEFAULT_SUB_AGENT_RESULT_TOKEN_CAP, DEFAULT_SUB_AGENT_SYSTEM_PROMPT, MCP_TOOLS_REQUEST_TOKEN, } from './agent/constants.js';
4
4
  import { AgentLoopBreak } from './agent/errors.js';
5
- import { execRcShouldSignalFailure, looksLikeReadOnlyExecCommand, detectSedAsRead, extractGrepPattern, detectCatHeadTailAsRead, extractTestFilter, extractGrepTargetFile, extractLogFilePath, readOnlyExecCacheable, withCachedExecObservationHint, withReplayedExecHint, } from './agent/exec-helpers.js';
5
+ import { execRcShouldSignalFailure, looksLikeReadOnlyExecCommand, detectSedAsRead, extractGrepPattern, detectCatHeadTailAsRead, extractFilePathFromReadCommand, extractTestFilter, extractGrepTargetFile, extractLogFilePath, readOnlyExecCacheable, withCachedExecObservationHint, withReplayedExecHint, } from './agent/exec-helpers.js';
6
6
  import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
7
7
  import { autoPickModel } from './agent/model-pick.js';
8
8
  import { reviewArtifactKeys, looksLikeCodeReviewRequest, looksLikeReviewRetrievalRequest, retrievalAllowsStaleArtifact, parseReviewArtifactStalePolicy, parseReviewArtifact, reviewArtifactStaleReason, gitHead, normalizeModelsResponse, } from './agent/review-artifact.js';
9
9
  import { capApprovalMode, ensureInformativeAssistantText, isContextWindowExceededError, makeAbortController, userContentToText, userDisallowsDelegation, } from './agent/session-utils.js';
10
10
  import { buildSubAgentContextBlock, extractLensBody } from './agent/subagent-context.js';
11
- import { parseToolCallsFromContent, getMissingRequiredParams, getArgValidationIssues, stripMarkdownFences, parseJsonArgs, } from './agent/tool-calls.js';
11
+ import { parseToolCallsFromContent, getMissingRequiredParams, getArgValidationIssues, stripUnknownArgs, stripMarkdownFences, parseJsonArgs, } from './agent/tool-calls.js';
12
12
  import { resolveToolAlias } from './agent/tool-name-alias.js';
13
13
  import { buildDefaultSystemPrompt } from './agent/prompt-builder.js';
14
14
  import { LeakDetector } from './security/leak-detector.js';
@@ -25,7 +25,7 @@ import { OpenAIClient } from './client.js';
25
25
  import { loadProjectContext } from './context.js';
26
26
  import { loadGitContext, isGitDirty, stashWorkingTree } from './git.js';
27
27
  import { selectHarness } from './harnesses.js';
28
- import { enforceContextBudget, stripThinking, estimateTokensFromMessages, estimateToolSchemaTokens, } from './history.js';
28
+ import { enforceContextBudget, stripThinking, estimateTokensFromMessages, estimateToolSchemaTokens, rollingCompressToolResults, } from './history.js';
29
29
  import { truncateToolResultContent } from './agent/context-budget.js';
30
30
  import { HookManager, loadHookPlugins } from './hooks/index.js';
31
31
  import { projectIndexKeys, parseIndexMeta, isFreshIndex, indexSummaryLine } from './indexer.js';
@@ -2034,6 +2034,13 @@ export async function createSession(opts) {
2034
2034
  let repromptUsed = false;
2035
2035
  let readBudgetWarned = false;
2036
2036
  let noToolNudgeUsed = false;
2037
+ // ── Edited paths tracking (#4) ──
2038
+ // Track files that have been successfully edited, for compression of prior reads.
2039
+ const editedPaths = new Set();
2040
+ // ── Stagnation detection (#3) ──
2041
+ // Track turns where no novel action (new signature) occurs.
2042
+ let stagnantTurns = 0;
2043
+ let stagnationWarned = false;
2037
2044
  // ── Per-file mutation spiral detection ──
2038
2045
  // Track how many times the same file is mutated within a single ask().
2039
2046
  // When a file is edited too many times it usually means the model is in a
@@ -2081,7 +2088,8 @@ export async function createSession(opts) {
2081
2088
  });
2082
2089
  const toolLoopWarningKeys = new Set();
2083
2090
  let forceToollessRecoveryTurn = false;
2084
- let toollessRecoveryUsed = false;
2091
+ let toollessRecoveryCount = 0;
2092
+ const MAX_TOOLLESS_RECOVERIES = 3;
2085
2093
  const streamedToolCallPreviews = new Set();
2086
2094
  const streamedToolCallPreviewScores = new Map();
2087
2095
  // ── Security: credential leak detection + prompt injection guard ──
@@ -2222,7 +2230,22 @@ export async function createSession(opts) {
2222
2230
  await maybeAutoDetectModelChange();
2223
2231
  const compactionStartMs = Date.now();
2224
2232
  await runCompactionWithLock('auto context-budget compaction', async () => {
2225
- const beforeMsgs = messages;
2233
+ let beforeMsgs = messages;
2234
+ // Rolling compression: shrink old read_file/read_files/exec results
2235
+ const rolling = rollingCompressToolResults({
2236
+ messages: beforeMsgs,
2237
+ freshCount: cfg.rolling_compress_fresh_count ?? cfg.compact_min_tail ?? 12,
2238
+ maxChars: cfg.rolling_compress_max_chars ?? 1500,
2239
+ toolNameByCallId,
2240
+ toolArgsByCallId,
2241
+ editedPaths,
2242
+ });
2243
+ if (rolling.compressedCount > 0) {
2244
+ beforeMsgs = rolling.messages;
2245
+ if (cfg.verbose) {
2246
+ console.error(`[rolling-compress] ${rolling.compressedCount} results, ~${Math.ceil(rolling.charsSaved / 4)} tokens freed`);
2247
+ }
2248
+ }
2226
2249
  const beforeTokens = estimateTokensCached(beforeMsgs);
2227
2250
  const compacted = enforceContextBudget({
2228
2251
  messages: beforeMsgs,
@@ -2960,6 +2983,26 @@ export async function createSession(opts) {
2960
2983
  criticalLoopSigs.add(detected.signature);
2961
2984
  }
2962
2985
  }
2986
+ // ── Stagnation detection (#3): check for novel actions ──
2987
+ {
2988
+ const hasNovelAction = [...turnSigs].some(sig => !sigCounts.has(sig));
2989
+ if (hasNovelAction) {
2990
+ stagnantTurns = 0;
2991
+ stagnationWarned = false;
2992
+ }
2993
+ else {
2994
+ stagnantTurns++;
2995
+ }
2996
+ if (stagnantTurns >= 3 && totalToolCallsThisAsk >= 10 && !stagnationWarned) {
2997
+ stagnationWarned = true;
2998
+ messages.push({
2999
+ role: 'system',
3000
+ content: '[stagnation detected] You have repeated the same actions for 3 turns with no new progress. ' +
3001
+ 'STOP and reassess your approach. Try a different strategy, or if you are stuck, ' +
3002
+ 'summarize what you have tried and ask for guidance.',
3003
+ });
3004
+ }
3005
+ }
2963
3006
  // Track whether a mutation happened since a given signature was last seen.
2964
3007
  // (Tool-loop is single-threaded across turns; this is safe to keep in-memory.)
2965
3008
  for (const sig of turnSigs) {
@@ -3082,13 +3125,11 @@ export async function createSession(opts) {
3082
3125
  content: `[tool-loop critical] ${toolName} repeated ${consec}x unchanged. Tools disabled next turn; use existing results.`,
3083
3126
  });
3084
3127
  }
3085
- // At consec >= 3: poison the result (don't execute, return error).
3086
- // At consec >= 4: also suppress the tool from the schema entirely.
3128
+ // At consec >= 3: poison this specific signature (don't execute, return error).
3129
+ // The tool itself stays in the schema so the model can call it with
3130
+ // different arguments (e.g. read a different file or different offset).
3087
3131
  if (consec >= 3) {
3088
3132
  poisonedToolSigs.add(sig);
3089
- if (consec >= 4) {
3090
- suppressedTools.add(toolName);
3091
- }
3092
3133
  continue;
3093
3134
  }
3094
3135
  continue;
@@ -3132,13 +3173,45 @@ export async function createSession(opts) {
3132
3173
  // Update consecutive tracking: save this turn's signatures for next turn comparison.
3133
3174
  lastTurnSigs = turnSigs;
3134
3175
  if (shouldForceToollessRecovery) {
3135
- if (!toollessRecoveryUsed) {
3136
- console.error(`[tool-loop] Disabling tools for one recovery turn (turn=${turns})`);
3176
+ if (toollessRecoveryCount < MAX_TOOLLESS_RECOVERIES) {
3177
+ toollessRecoveryCount++;
3178
+ console.error(`[tool-loop] Recovery turn ${toollessRecoveryCount}/${MAX_TOOLLESS_RECOVERIES}` +
3179
+ ` \u2014 disabling tools (turn=${turns})`);
3137
3180
  forceToollessRecoveryTurn = true;
3138
- toollessRecoveryUsed = true;
3181
+ // Reset loop state so the model gets a genuine fresh start after reflection.
3182
+ // Without this, it immediately re-hits the same thresholds on the next turn.
3183
+ consecutiveCounts.clear();
3184
+ suppressedTools.clear();
3185
+ // Escalating recovery messages — more urgent with each attempt
3186
+ let recoveryContent;
3187
+ if (toollessRecoveryCount === 1) {
3188
+ recoveryContent =
3189
+ `[system] \u{1F6D1} Tool loop detected (recovery ${toollessRecoveryCount}/${MAX_TOOLLESS_RECOVERIES}). ` +
3190
+ `Tools are disabled for this turn. Before your next tool call, explain:\n` +
3191
+ `1. What you were trying to accomplish\n` +
3192
+ `2. Why your previous approach was not working\n` +
3193
+ `3. What different approach you will take next`;
3194
+ }
3195
+ else if (toollessRecoveryCount === 2) {
3196
+ recoveryContent =
3197
+ `[system] \u{1F6D1} Tool loop detected again (recovery ${toollessRecoveryCount}/${MAX_TOOLLESS_RECOVERIES}). ` +
3198
+ `You have already failed to break out of a loop once. ` +
3199
+ `You MUST take a fundamentally different approach:\n` +
3200
+ `- If you were editing a file repeatedly, try a completely different fix\n` +
3201
+ `- If you were reading the same file, use the content you already have\n` +
3202
+ `- If you were searching for something and not finding it, it may not exist\n` +
3203
+ `- Consider whether the task can be completed with what you already know`;
3204
+ }
3205
+ else {
3206
+ recoveryContent =
3207
+ `[system] \u{1F6D1} FINAL recovery attempt (${toollessRecoveryCount}/${MAX_TOOLLESS_RECOVERIES}). ` +
3208
+ `If you loop again, the session will be terminated.\n` +
3209
+ `Summarize what you know and either complete the task with what you have, ` +
3210
+ `or explain clearly what is blocking you so the user can intervene.`;
3211
+ }
3139
3212
  messages.push({
3140
3213
  role: 'user',
3141
- content: '[system] 🛑 Tool loop detected. Tools disabled for this turn. Analyze the situation using existing results and explain what went wrong before continuing.',
3214
+ content: recoveryContent,
3142
3215
  });
3143
3216
  await emitTurnEnd({
3144
3217
  turn: turns,
@@ -3154,8 +3227,10 @@ export async function createSession(opts) {
3154
3227
  });
3155
3228
  continue;
3156
3229
  }
3157
- console.error(`[tool-loop] Recovery failed model resumed looping after tools-disabled turn (turn=${turns})`);
3158
- throw new AgentLoopBreak('critical tool-loop persisted after one tools-disabled recovery turn. Stopping to avoid infinite loop.');
3230
+ console.error(`[tool-loop] Recovery failed \u2014 model resumed looping after ` +
3231
+ `${MAX_TOOLLESS_RECOVERIES} recovery turns (turn=${turns})`);
3232
+ throw new AgentLoopBreak(`critical tool-loop persisted after ${MAX_TOOLLESS_RECOVERIES} recovery turns. ` +
3233
+ `Stopping to avoid infinite loop.`);
3159
3234
  }
3160
3235
  const runOne = async (tc) => {
3161
3236
  // Resolve tool name aliases (bash→exec, file_read→read_file, etc.)
@@ -3214,10 +3289,23 @@ export async function createSession(opts) {
3214
3289
  toolLoopGuard.registerCall(name, parsedArgs, callId);
3215
3290
  toolArgsByCallId.set(callId, parsedArgs);
3216
3291
  recordToolUsageForHints(name, parsedArgs);
3292
+ // Auto-strip unknown parameters before validation (#1).
3293
+ // This prevents hard errors from unknown keys while still
3294
+ // reporting them as notes in the tool result.
3295
+ let strippedArgKeys = [];
3296
+ if (builtInFn || isSpawnTask) {
3297
+ const strip = stripUnknownArgs(name, args);
3298
+ if (strip.stripped.length > 0) {
3299
+ strippedArgKeys = strip.stripped;
3300
+ // Mutate args in-place so downstream code sees cleaned args
3301
+ for (const k of strip.stripped) {
3302
+ delete args[k];
3303
+ }
3304
+ }
3305
+ }
3217
3306
  // Pre-dispatch argument validation.
3218
3307
  // - Required params
3219
3308
  // - Type/range/enums
3220
- // - Unknown properties
3221
3309
  if (builtInFn || isSpawnTask) {
3222
3310
  const missing = getMissingRequiredParams(name, args);
3223
3311
  if (missing.length) {
@@ -3277,6 +3365,30 @@ export async function createSession(opts) {
3277
3365
  // Detect cat/head/tail used as a substitute for read_file
3278
3366
  const catRedirect = detectCatHeadTailAsRead(args.command);
3279
3367
  if (catRedirect) {
3368
+ // Before returning a bare STOP, check if we have cached content
3369
+ // for the target file. When read_file is poisoned for this path
3370
+ // (deadlock scenario), serve the cached content so the model can
3371
+ // make progress instead of looping on STOP messages.
3372
+ const catReadPath = extractFilePathFromReadCommand(args.command);
3373
+ if (catReadPath) {
3374
+ const cachedContent = await toolLoopGuard.getFileContentCache('read_file', { path: catReadPath }, ctx.cwd);
3375
+ if (cachedContent) {
3376
+ await emitToolCall(callId, name, args);
3377
+ await emitToolResult({
3378
+ id: callId,
3379
+ name,
3380
+ success: true,
3381
+ summary: 'served cached file content (read_file redirect)',
3382
+ result: '',
3383
+ });
3384
+ return {
3385
+ id: callId,
3386
+ content: '[system] Use read_file instead of shell commands for reading files. ' +
3387
+ 'Here is the cached content you already have:\n\n' +
3388
+ cachedContent,
3389
+ };
3390
+ }
3391
+ }
3280
3392
  await emitToolCall(callId, name, args);
3281
3393
  await emitToolResult({
3282
3394
  id: callId,
@@ -3777,6 +3889,10 @@ export async function createSession(opts) {
3777
3889
  if (!toolSuccess && name === 'exec' && typeof resultEvent.execRc === 'number') {
3778
3890
  resultEvent.summary = `rc=${resultEvent.execRc} (command failed)`;
3779
3891
  }
3892
+ // Append note about stripped unknown parameters (#1)
3893
+ if (strippedArgKeys.length > 0) {
3894
+ content += `\n[note: unknown parameters ${strippedArgKeys.join(", ")} were ignored]`;
3895
+ }
3780
3896
  await emitToolResult(resultEvent);
3781
3897
  // Proactive LSP diagnostics after file mutations
3782
3898
  if (lspManager?.hasServers() && lspCfg?.proactive_diagnostics !== false) {
@@ -3818,6 +3934,13 @@ export async function createSession(opts) {
3818
3934
  totalToolCallsThisAsk++;
3819
3935
  if (isMutationTool(name) && toolSuccess) {
3820
3936
  totalEditsThisAsk++;
3937
+ // Track edited paths for acted-on read compression (#4)
3938
+ if (typeof args.path === 'string') {
3939
+ const absEditedPath = args.path.startsWith('/')
3940
+ ? args.path
3941
+ : path.resolve(projectDir, args.path);
3942
+ editedPaths.add(absEditedPath);
3943
+ }
3821
3944
  }
3822
3945
  // ── Per-file mutation spiral detection ──
3823
3946
  // Track edits to the same file. If the model keeps editing the same file
@@ -3987,12 +4110,30 @@ export async function createSession(opts) {
3987
4110
  }
3988
4111
  }
3989
4112
  if (harness.toolCalls.parallelCalls) {
3990
- // Models that support parallel calls: read-only in parallel, mutations sequential
4113
+ // Models that support parallel calls: read-only in parallel, mutations by file (#7)
3991
4114
  const readonly = toolCallsArr.filter((tc) => isReadOnlyToolDynamic(tc.function.name));
3992
4115
  const others = toolCallsArr.filter((tc) => !isReadOnlyToolDynamic(tc.function.name));
3993
4116
  const ro = await Promise.all(readonly.map((tc) => runOne(tc).catch((e) => catchToolError(e, tc))));
3994
4117
  results.push(...ro);
4118
+ // Group mutations by target file path for parallel execution (#7).
4119
+ // Mutations targeting the same file run sequentially within their group.
4120
+ // Mutations targeting different files run in parallel.
4121
+ // Non-file-targeting tools (exec, apply_patch) run sequentially first.
4122
+ const sequential = [];
4123
+ const byFile = new Map();
3995
4124
  for (const tc of others) {
4125
+ const target = getMutationTargetPath(tc, projectDir);
4126
+ if (!target) {
4127
+ sequential.push(tc);
4128
+ }
4129
+ else {
4130
+ const group = byFile.get(target) ?? [];
4131
+ group.push(tc);
4132
+ byFile.set(target, group);
4133
+ }
4134
+ }
4135
+ // Run sequential group first (exec, apply_patch, etc.)
4136
+ for (const tc of sequential) {
3996
4137
  if (ac.signal.aborted)
3997
4138
  break;
3998
4139
  try {
@@ -4000,12 +4141,31 @@ export async function createSession(opts) {
4000
4141
  }
4001
4142
  catch (e) {
4002
4143
  results.push(await catchToolError(e, tc));
4003
- if (isMutationTool(tc.function.name)) {
4004
- // Fail-fast: after mutating tool failure, stop the remaining batch.
4144
+ if (isMutationTool(tc.function.name))
4005
4145
  break;
4006
- }
4007
4146
  }
4008
4147
  }
4148
+ // Run file-mutation groups in parallel
4149
+ if (byFile.size > 0 && !ac.signal.aborted) {
4150
+ const groupResults = await Promise.all([...byFile.values()].map(async (group) => {
4151
+ const groupRes = [];
4152
+ for (const tc of group) {
4153
+ if (ac.signal.aborted)
4154
+ break;
4155
+ try {
4156
+ groupRes.push(await runOne(tc));
4157
+ }
4158
+ catch (e) {
4159
+ groupRes.push(await catchToolError(e, tc));
4160
+ // Fail-fast within this file's group
4161
+ break;
4162
+ }
4163
+ }
4164
+ return groupRes;
4165
+ }));
4166
+ for (const gr of groupResults)
4167
+ results.push(...gr);
4168
+ }
4009
4169
  }
4010
4170
  else {
4011
4171
  // Models with parallelCalls=false: run ALL calls sequentially (§4i).
@@ -4408,6 +4568,27 @@ export async function createSession(opts) {
4408
4568
  compactHistory,
4409
4569
  };
4410
4570
  }
4571
+ /**
4572
+ * Extract the absolute file path targeted by a mutation tool call (#7).
4573
+ * Returns null for apply_patch (multi-file), exec, and non-file tools.
4574
+ */
4575
+ function getMutationTargetPath(tc, projectDir) {
4576
+ const name = tc.function?.name ?? '';
4577
+ // apply_patch can target multiple files — treat as sequential
4578
+ if (name === 'apply_patch' || name === 'exec')
4579
+ return null;
4580
+ let raw;
4581
+ try {
4582
+ const args = JSON.parse(tc.function?.arguments ?? '{}');
4583
+ raw = typeof args.path === 'string' ? args.path.trim() : undefined;
4584
+ }
4585
+ catch {
4586
+ return null;
4587
+ }
4588
+ if (!raw)
4589
+ return null;
4590
+ return raw.startsWith('/') ? raw : path.resolve(projectDir, raw);
4591
+ }
4411
4592
  export async function runAgent(opts) {
4412
4593
  const session = await createSession({
4413
4594
  config: opts.config,