@visorcraft/idlehands 2.2.25 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent.js CHANGED
@@ -2,7 +2,7 @@ import fs from 'node:fs/promises';
2
2
  import path from 'node:path';
3
3
  import { DEFAULT_SUB_AGENT_RESULT_TOKEN_CAP, DEFAULT_SUB_AGENT_SYSTEM_PROMPT, MCP_TOOLS_REQUEST_TOKEN, } from './agent/constants.js';
4
4
  import { AgentLoopBreak } from './agent/errors.js';
5
- import { execRcShouldSignalFailure, looksLikeReadOnlyExecCommand, readOnlyExecCacheable, withCachedExecObservationHint, withReplayedExecHint, } from './agent/exec-helpers.js';
5
+ import { execRcShouldSignalFailure, looksLikeReadOnlyExecCommand, detectSedAsRead, extractGrepPattern, detectCatHeadTailAsRead, extractTestFilter, extractGrepTargetFile, extractLogFilePath, readOnlyExecCacheable, withCachedExecObservationHint, withReplayedExecHint, } from './agent/exec-helpers.js';
6
6
  import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
7
7
  import { autoPickModel } from './agent/model-pick.js';
8
8
  import { reviewArtifactKeys, looksLikeCodeReviewRequest, looksLikeReviewRetrievalRequest, retrievalAllowsStaleArtifact, parseReviewArtifactStalePolicy, parseReviewArtifact, reviewArtifactStaleReason, gitHead, normalizeModelsResponse, } from './agent/review-artifact.js';
@@ -236,6 +236,7 @@ export async function createSession(opts) {
236
236
  lspTools: lspManager?.hasServers() === true,
237
237
  mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
238
238
  allowSpawnTask: spawnTaskEnabled,
239
+ maxReadLines: cfg.max_read_lines,
239
240
  slimFast,
240
241
  });
241
242
  const collectToolContext = () => {
@@ -1984,8 +1985,12 @@ export async function createSession(opts) {
1984
1985
  };
1985
1986
  // Read-only tool call budgets (§ anti-scan guardrails)
1986
1987
  const READ_ONLY_PER_TURN_CAP = 6;
1987
- const READ_BUDGET_WARN = 15;
1988
1988
  const READ_BUDGET_HARD = harness.quirks.readBudget ?? 20;
1989
+ // Warn before hard-stop; scales for smaller custom budgets (e.g. 12 for qwen3-coder).
1990
+ const READ_BUDGET_WARN = READ_BUDGET_HARD > 1 ? Math.max(1, Math.min(15, READ_BUDGET_HARD - 2)) : 0;
1991
+ // Only count file-reading tools toward the cumulative read budget.
1992
+ // search_files is intentionally excluded so the model can still narrow scope.
1993
+ const isBudgetedReadTool = (toolName) => toolName === 'read_file' || toolName === 'read_files' || toolName === 'list_dir';
1989
1994
  let cumulativeReadOnlyCalls = 0;
1990
1995
  // Directory scan detection: track unique file paths per parent dir.
1991
1996
  // Only counts distinct files (re-reads of the same file after editing are normal).
@@ -1993,6 +1998,20 @@ export async function createSession(opts) {
1993
1998
  const blockedDirs = new Set();
1994
1999
  // Same-search detection: track search= params across read_file calls
1995
2000
  const searchTermFiles = new Map(); // search term → set of file paths
2001
+ // Widening grep pattern detection: track grep patterns across exec calls
2002
+ const grepPatternPaths = new Map(); // grep pattern → set of paths searched
2003
+ // Analysis paralysis: track total tool calls vs edits to detect stalling
2004
+ let totalToolCallsThisAsk = 0;
2005
+ let totalEditsThisAsk = 0;
2006
+ let analysisParalysisWarned = false;
2007
+ // Same-file search_files repetition: track search_files calls per target file
2008
+ const searchFilesPerTarget = new Map(); // file path → search call count
2009
+ // Same-file grep repetition: track how many different grep patterns hit the same file
2010
+ const grepTargetFileCounts = new Map(); // file path → distinct grep call count
2011
+ // Log-tail spiral detection: track tail/grep calls on the same log file
2012
+ const logFileTailCounts = new Map(); // log file path → tail call count
2013
+ // Per-filter test run tracking: count test runs by filter name
2014
+ const testRunCountsByFilter = new Map(); // filter name → run count
1996
2015
  // identical tool call signature counts across this ask() run
1997
2016
  const sigCounts = new Map();
1998
2017
  const toolNameByCallId = new Map();
@@ -2857,6 +2876,7 @@ export async function createSession(opts) {
2857
2876
  lastEditedPath = absPath;
2858
2877
  mutationVersion++;
2859
2878
  suppressedTools.clear(); // file changed, re-enable all tools
2879
+ toolLoopGuard.invalidateFileContentCache(absPath);
2860
2880
  },
2861
2881
  });
2862
2882
  // Tool-call argument parsing and validation logic
@@ -3215,6 +3235,94 @@ export async function createSession(opts) {
3215
3235
  throw new Error(`exec: ${reason} — command: ${args.command}`);
3216
3236
  }
3217
3237
  }
3238
+ // ── Exec anti-pattern detection: sed-as-read and widening grep ──
3239
+ if (name === 'exec' && typeof args.command === 'string') {
3240
+ // Detect sed -n 'N,Mp' used as a substitute for read_file
3241
+ const sedRedirect = detectSedAsRead(args.command);
3242
+ if (sedRedirect) {
3243
+ await emitToolCall(callId, name, args);
3244
+ await emitToolResult({
3245
+ id: callId,
3246
+ name,
3247
+ success: false,
3248
+ summary: 'use read_file instead of sed',
3249
+ result: '',
3250
+ });
3251
+ return { id: callId, content: sedRedirect };
3252
+ }
3253
+ // Track widening grep patterns (same search string, expanding paths)
3254
+ const grepInfo = extractGrepPattern(args.command);
3255
+ if (grepInfo) {
3256
+ const key = grepInfo.pattern.toLowerCase();
3257
+ if (!grepPatternPaths.has(key))
3258
+ grepPatternPaths.set(key, new Set());
3259
+ for (const p of grepInfo.paths)
3260
+ grepPatternPaths.get(key).add(p);
3261
+ if (grepPatternPaths.get(key).size >= 3) {
3262
+ messages.push({
3263
+ role: 'user',
3264
+ content: `[system] You have searched for "${grepInfo.pattern}" across ${grepPatternPaths.get(key).size} different paths. ` +
3265
+ `Start with the broadest scope next time: search_files({ pattern: "${grepInfo.pattern}", path: "." })`,
3266
+ });
3267
+ }
3268
+ }
3269
+ // Detect cat/head/tail used as a substitute for read_file
3270
+ const catRedirect = detectCatHeadTailAsRead(args.command);
3271
+ if (catRedirect) {
3272
+ await emitToolCall(callId, name, args);
3273
+ await emitToolResult({
3274
+ id: callId,
3275
+ name,
3276
+ success: false,
3277
+ summary: 'use read_file instead',
3278
+ result: '',
3279
+ });
3280
+ return { id: callId, content: catRedirect };
3281
+ }
3282
+ // Log-tail spiral detection: track repeated tail/grep on same log file
3283
+ const logPath = extractLogFilePath(args.command);
3284
+ if (logPath) {
3285
+ const count = (logFileTailCounts.get(logPath) ?? 0) + 1;
3286
+ logFileTailCounts.set(logPath, count);
3287
+ if (count >= 4) {
3288
+ messages.push({
3289
+ role: 'user',
3290
+ content: `[system] You have read ${logPath} ${count} times. Stop tailing the log — ` +
3291
+ `review the error messages you already have and fix the root cause. ` +
3292
+ `If the error is unclear, read the relevant source file instead.`,
3293
+ });
3294
+ }
3295
+ }
3296
+ // Per-filter test run tracking
3297
+ const testFilter = extractTestFilter(args.command);
3298
+ if (testFilter) {
3299
+ const count = (testRunCountsByFilter.get(testFilter) ?? 0) + 1;
3300
+ testRunCountsByFilter.set(testFilter, count);
3301
+ if (count >= 5) {
3302
+ messages.push({
3303
+ role: 'user',
3304
+ content: `[system] You have run the test "${testFilter}" ${count} times. ` +
3305
+ `STOP re-running the same failing test. Step back, analyze the error message, ` +
3306
+ `and fix the root cause before running the test again.`,
3307
+ });
3308
+ }
3309
+ }
3310
+ // Same-file grep thrashing: track grep calls targeting a single file
3311
+ const grepTargetFile = extractGrepTargetFile(args.command);
3312
+ if (grepTargetFile) {
3313
+ const count = (grepTargetFileCounts.get(grepTargetFile) ?? 0) + 1;
3314
+ grepTargetFileCounts.set(grepTargetFile, count);
3315
+ if (count >= 4) {
3316
+ const basename = grepTargetFile.split('/').pop() || grepTargetFile;
3317
+ messages.push({
3318
+ role: 'user',
3319
+ content: `[system] You have run ${count} separate grep commands on ${basename}. ` +
3320
+ `STOP grepping the same file repeatedly. Use read_file to read the whole file once: ` +
3321
+ `read_file({ path: "${grepTargetFile}" })`,
3322
+ });
3323
+ }
3324
+ }
3325
+ }
3218
3326
  if (isMutationTool(name) && typeof args.path === 'string') {
3219
3327
  const absPath = args.path.startsWith('/')
3220
3328
  ? args.path
@@ -3236,9 +3344,25 @@ export async function createSession(opts) {
3236
3344
  }
3237
3345
  }
3238
3346
  // ── Anti-scan: read_file guardrails (Fix 1/2/3) ──
3239
- if (name === 'read_file' || name === 'read_files') {
3240
- const filePath = typeof args.path === 'string' ? args.path : '';
3241
- const searchTerm = typeof args.search === 'string' ? args.search : '';
3347
+ // Same-file search_files repetition: if searching the same file 4+ times, tell the model to stop
3348
+ if (name === 'search_files') {
3349
+ const searchPath = typeof args.path === 'string' ? args.path : '';
3350
+ // Only track when targeting a specific file (has extension), not a directory
3351
+ if (searchPath && searchPath.includes('.')) {
3352
+ const count = (searchFilesPerTarget.get(searchPath) ?? 0) + 1;
3353
+ searchFilesPerTarget.set(searchPath, count);
3354
+ if (count >= 4) {
3355
+ const basename = searchPath.split('/').pop() || searchPath;
3356
+ messages.push({
3357
+ role: 'user',
3358
+ content: `[system] You have called search_files on ${basename} ${count} times with different patterns. ` +
3359
+ `STOP searching this file repeatedly. You already have enough information from previous reads and searches. ` +
3360
+ `Proceed to make your edit or tell the user what you need.`,
3361
+ });
3362
+ }
3363
+ }
3364
+ }
3365
+ if (isBudgetedReadTool(name)) {
3242
3366
  // Fix 1: Hard cumulative budget — refuse reads once hard cap is reached.
3243
3367
  // Count only actual executed read-only calls (not cache replays), so this check
3244
3368
  // blocks the next call exactly at the configured cap.
@@ -3253,9 +3377,13 @@ export async function createSession(opts) {
3253
3377
  });
3254
3378
  return {
3255
3379
  id: callId,
3256
- content: `STOP: Read budget exhausted (${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD} calls). Do NOT read more files. Use search_files or exec: grep -rn "pattern" path/ to find what you need.`,
3380
+ content: `STOP: Read budget exhausted (${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD} calls). Do NOT read more files. Use search_files(pattern, path) to find what you need.`,
3257
3381
  };
3258
3382
  }
3383
+ }
3384
+ if (name === 'read_file' || name === 'read_files') {
3385
+ const filePath = typeof args.path === 'string' ? args.path : '';
3386
+ const searchTerm = typeof args.search === 'string' ? args.search : '';
3259
3387
  // Fix 2: Directory scan detection — counts unique files per dir (re-reads are OK)
3260
3388
  if (filePath) {
3261
3389
  const absFilePath = filePath.startsWith('/')
@@ -3280,7 +3408,7 @@ export async function createSession(opts) {
3280
3408
  });
3281
3409
  return {
3282
3410
  id: callId,
3283
- content: `STOP: Directory scan detected — you've read ${uniqueCount} unique files from ${parentDir}/. Use search_files(pattern, '${parentDir}') or exec: grep -rn "pattern" ${parentDir}/ instead of reading files individually.`,
3411
+ content: `STOP: Directory scan detected — you've read ${uniqueCount} unique files from ${parentDir}/. Use search_files(pattern, '${parentDir}') instead of reading files individually.`,
3284
3412
  };
3285
3413
  }
3286
3414
  }
@@ -3301,7 +3429,7 @@ export async function createSession(opts) {
3301
3429
  });
3302
3430
  return {
3303
3431
  id: callId,
3304
- content: `STOP: You've searched ${searchTermFiles.get(key).size} files for "${searchTerm}" one at a time. This is what search_files does in one call. Use: search_files(pattern="${searchTerm}", path=".") or exec: grep -rn "${searchTerm}" .`,
3432
+ content: `STOP: You've searched ${searchTermFiles.get(key).size} files for "${searchTerm}" one at a time. This is what search_files does in one call. Use: search_files(pattern="${searchTerm}", path=".")`,
3305
3433
  };
3306
3434
  }
3307
3435
  }
@@ -3374,6 +3502,16 @@ export async function createSession(opts) {
3374
3502
  reusedCachedReadOnlyExec = true; // skip re-execution below
3375
3503
  }
3376
3504
  }
3505
+ // Per-file content cache: catches non-consecutive re-reads of unchanged files.
3506
+ // This fires even when the consecutive-repeat detector misses (interleaved calls).
3507
+ if (name === 'read_file' && !reusedCachedReadOnlyExec && !reusedCachedReadTool) {
3508
+ const fileReplay = await toolLoopGuard.getFileContentCache(name, args, ctx.cwd);
3509
+ if (fileReplay) {
3510
+ content = fileReplay;
3511
+ reusedCachedReadTool = true;
3512
+ // Cache hit — do NOT count toward read budget (no new tokens consumed)
3513
+ }
3514
+ }
3377
3515
  if (READ_FILE_CACHE_TOOLS.has(name) && repeatedReadFileSigs.has(sig)) {
3378
3516
  const replay = await toolLoopGuard.getReadCacheReplay(name, args, ctx.cwd);
3379
3517
  if (replay) {
@@ -3485,6 +3623,11 @@ export async function createSession(opts) {
3485
3623
  const baseCwd = typeof args?.cwd === 'string' ? String(args.cwd) : ctx.cwd;
3486
3624
  await toolLoopGuard.storeReadCache(name, args, baseCwd, content);
3487
3625
  }
3626
+ // Store in per-file content cache for non-consecutive re-read detection
3627
+ if (name === 'read_file' && typeof content === 'string' && !content.startsWith('ERROR:')) {
3628
+ const baseCwd = typeof args?.cwd === 'string' ? String(args.cwd) : ctx.cwd;
3629
+ await toolLoopGuard.storeFileContentCache(name, args, baseCwd, content);
3630
+ }
3488
3631
  if (name === 'exec') {
3489
3632
  // Successful exec clears blocked-loop counters.
3490
3633
  blockedExecAttemptsBySig.clear();
@@ -3658,13 +3801,16 @@ export async function createSession(opts) {
3658
3801
  toolCallId: callId,
3659
3802
  result: content,
3660
3803
  });
3661
- // Count only actual read-only executions toward cumulative read budget.
3804
+ // Count only actual file-read executions toward cumulative read budget.
3662
3805
  // Cached/replayed read observations should not consume budget.
3663
- if (isReadOnlyToolDynamic(name) &&
3664
- !reusedCachedReadTool &&
3665
- !reusedCachedReadOnlyExec) {
3806
+ if (isBudgetedReadTool(name) && !reusedCachedReadTool && !reusedCachedReadOnlyExec) {
3666
3807
  cumulativeReadOnlyCalls += 1;
3667
3808
  }
3809
+ // Track total tool calls and edits for analysis paralysis detection
3810
+ totalToolCallsThisAsk++;
3811
+ if (isMutationTool(name) && toolSuccess) {
3812
+ totalEditsThisAsk++;
3813
+ }
3668
3814
  // ── Per-file mutation spiral detection ──
3669
3815
  // Track edits to the same file. If the model keeps editing the same file
3670
3816
  // over and over, it's likely in an edit→break→read→edit corruption spiral.
@@ -3825,7 +3971,7 @@ export async function createSession(opts) {
3825
3971
  const callId = resolveCallId(tc);
3826
3972
  results.push({
3827
3973
  id: callId,
3828
- content: `STOP: Per-turn read limit (${READ_ONLY_PER_TURN_CAP}). Use search_files or exec with grep instead of reading files one by one.`,
3974
+ content: `STOP: Per-turn read limit (${READ_ONLY_PER_TURN_CAP}). Use search_files(pattern, path) instead of reading files one by one.`,
3829
3975
  });
3830
3976
  }
3831
3977
  if (cfg.verbose) {
@@ -3941,14 +4087,28 @@ export async function createSession(opts) {
3941
4087
  // ── Escalating cumulative read budget (§ anti-scan guardrails) ──
3942
4088
  // Warn zone: append warnings to each read result when approaching the hard cap
3943
4089
  if (!readBudgetWarned &&
3944
- cumulativeReadOnlyCalls > READ_BUDGET_WARN &&
3945
- cumulativeReadOnlyCalls <= READ_BUDGET_HARD) {
4090
+ READ_BUDGET_WARN > 0 &&
4091
+ cumulativeReadOnlyCalls >= READ_BUDGET_WARN &&
4092
+ cumulativeReadOnlyCalls < READ_BUDGET_HARD) {
3946
4093
  readBudgetWarned = true;
3947
4094
  messages.push({
3948
4095
  role: 'user',
3949
4096
  content: `[system] Read budget: ${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD}. Use search_files instead of reading files individually.`,
3950
4097
  });
3951
4098
  }
4099
+ // Analysis paralysis: if the model has made 25+ tool calls with zero edits,
4100
+ // it's stuck in a read/search loop and needs to be forced into action.
4101
+ if (!analysisParalysisWarned &&
4102
+ totalToolCallsThisAsk >= 25 &&
4103
+ totalEditsThisAsk === 0) {
4104
+ analysisParalysisWarned = true;
4105
+ messages.push({
4106
+ role: 'user',
4107
+ content: `[system] CRITICAL: You have made ${totalToolCallsThisAsk} tool calls without producing a single edit. ` +
4108
+ `You are stuck in analysis paralysis. STOP reading and searching. ` +
4109
+ `You have enough information. Make your edit NOW or explain to the user what is blocking you.`,
4110
+ });
4111
+ }
3952
4112
  // One bounded automatic repair attempt for invalid tool args.
3953
4113
  if (invalidArgsThisTurn && toolRepairAttempts < MAX_TOOL_REPAIR_ATTEMPTS) {
3954
4114
  toolRepairAttempts++;