@visorcraft/idlehands 2.2.25 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/exec-helpers.js +203 -0
- package/dist/agent/exec-helpers.js.map +1 -1
- package/dist/agent/prompt-builder.js +7 -2
- package/dist/agent/prompt-builder.js.map +1 -1
- package/dist/agent/tool-loop-guard.js +77 -0
- package/dist/agent/tool-loop-guard.js.map +1 -1
- package/dist/agent/tools-schema.js +4 -2
- package/dist/agent/tools-schema.js.map +1 -1
- package/dist/agent.js +175 -15
- package/dist/agent.js.map +1 -1
- package/dist/anton/controller.js +50 -29
- package/dist/anton/controller.js.map +1 -1
- package/dist/anton/reporter.js +15 -0
- package/dist/anton/reporter.js.map +1 -1
- package/dist/bot/anton-run.js +3 -3
- package/dist/bot/anton-run.js.map +1 -1
- package/dist/cli/commands/anton.js +3 -3
- package/dist/cli/commands/anton.js.map +1 -1
- package/dist/harnesses.js +2 -2
- package/dist/harnesses.js.map +1 -1
- package/dist/runtime/planner.js +11 -0
- package/dist/runtime/planner.js.map +1 -1
- package/package.json +1 -1
package/dist/agent.js
CHANGED
|
@@ -2,7 +2,7 @@ import fs from 'node:fs/promises';
|
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { DEFAULT_SUB_AGENT_RESULT_TOKEN_CAP, DEFAULT_SUB_AGENT_SYSTEM_PROMPT, MCP_TOOLS_REQUEST_TOKEN, } from './agent/constants.js';
|
|
4
4
|
import { AgentLoopBreak } from './agent/errors.js';
|
|
5
|
-
import { execRcShouldSignalFailure, looksLikeReadOnlyExecCommand, readOnlyExecCacheable, withCachedExecObservationHint, withReplayedExecHint, } from './agent/exec-helpers.js';
|
|
5
|
+
import { execRcShouldSignalFailure, looksLikeReadOnlyExecCommand, detectSedAsRead, extractGrepPattern, detectCatHeadTailAsRead, extractTestFilter, extractGrepTargetFile, extractLogFilePath, readOnlyExecCacheable, withCachedExecObservationHint, withReplayedExecHint, } from './agent/exec-helpers.js';
|
|
6
6
|
import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
|
|
7
7
|
import { autoPickModel } from './agent/model-pick.js';
|
|
8
8
|
import { reviewArtifactKeys, looksLikeCodeReviewRequest, looksLikeReviewRetrievalRequest, retrievalAllowsStaleArtifact, parseReviewArtifactStalePolicy, parseReviewArtifact, reviewArtifactStaleReason, gitHead, normalizeModelsResponse, } from './agent/review-artifact.js';
|
|
@@ -236,6 +236,7 @@ export async function createSession(opts) {
|
|
|
236
236
|
lspTools: lspManager?.hasServers() === true,
|
|
237
237
|
mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
|
|
238
238
|
allowSpawnTask: spawnTaskEnabled,
|
|
239
|
+
maxReadLines: cfg.max_read_lines,
|
|
239
240
|
slimFast,
|
|
240
241
|
});
|
|
241
242
|
const collectToolContext = () => {
|
|
@@ -1984,8 +1985,12 @@ export async function createSession(opts) {
|
|
|
1984
1985
|
};
|
|
1985
1986
|
// Read-only tool call budgets (§ anti-scan guardrails)
|
|
1986
1987
|
const READ_ONLY_PER_TURN_CAP = 6;
|
|
1987
|
-
const READ_BUDGET_WARN = 15;
|
|
1988
1988
|
const READ_BUDGET_HARD = harness.quirks.readBudget ?? 20;
|
|
1989
|
+
// Warn before hard-stop; scales for smaller custom budgets (e.g. 12 for qwen3-coder).
|
|
1990
|
+
const READ_BUDGET_WARN = READ_BUDGET_HARD > 1 ? Math.max(1, Math.min(15, READ_BUDGET_HARD - 2)) : 0;
|
|
1991
|
+
// Only count file-reading tools toward the cumulative read budget.
|
|
1992
|
+
// search_files is intentionally excluded so the model can still narrow scope.
|
|
1993
|
+
const isBudgetedReadTool = (toolName) => toolName === 'read_file' || toolName === 'read_files' || toolName === 'list_dir';
|
|
1989
1994
|
let cumulativeReadOnlyCalls = 0;
|
|
1990
1995
|
// Directory scan detection: track unique file paths per parent dir.
|
|
1991
1996
|
// Only counts distinct files (re-reads of the same file after editing are normal).
|
|
@@ -1993,6 +1998,20 @@ export async function createSession(opts) {
|
|
|
1993
1998
|
const blockedDirs = new Set();
|
|
1994
1999
|
// Same-search detection: track search= params across read_file calls
|
|
1995
2000
|
const searchTermFiles = new Map(); // search term → set of file paths
|
|
2001
|
+
// Widening grep pattern detection: track grep patterns across exec calls
|
|
2002
|
+
const grepPatternPaths = new Map(); // grep pattern → set of paths searched
|
|
2003
|
+
// Analysis paralysis: track total tool calls vs edits to detect stalling
|
|
2004
|
+
let totalToolCallsThisAsk = 0;
|
|
2005
|
+
let totalEditsThisAsk = 0;
|
|
2006
|
+
let analysisParalysisWarned = false;
|
|
2007
|
+
// Same-file search_files repetition: track search_files calls per target file
|
|
2008
|
+
const searchFilesPerTarget = new Map(); // file path → search call count
|
|
2009
|
+
// Same-file grep repetition: track how many different grep patterns hit the same file
|
|
2010
|
+
const grepTargetFileCounts = new Map(); // file path → distinct grep call count
|
|
2011
|
+
// Log-tail spiral detection: track tail/grep calls on the same log file
|
|
2012
|
+
const logFileTailCounts = new Map(); // log file path → tail call count
|
|
2013
|
+
// Per-filter test run tracking: count test runs by filter name
|
|
2014
|
+
const testRunCountsByFilter = new Map(); // filter name → run count
|
|
1996
2015
|
// identical tool call signature counts across this ask() run
|
|
1997
2016
|
const sigCounts = new Map();
|
|
1998
2017
|
const toolNameByCallId = new Map();
|
|
@@ -2857,6 +2876,7 @@ export async function createSession(opts) {
|
|
|
2857
2876
|
lastEditedPath = absPath;
|
|
2858
2877
|
mutationVersion++;
|
|
2859
2878
|
suppressedTools.clear(); // file changed, re-enable all tools
|
|
2879
|
+
toolLoopGuard.invalidateFileContentCache(absPath);
|
|
2860
2880
|
},
|
|
2861
2881
|
});
|
|
2862
2882
|
// Tool-call argument parsing and validation logic
|
|
@@ -3215,6 +3235,94 @@ export async function createSession(opts) {
|
|
|
3215
3235
|
throw new Error(`exec: ${reason} — command: ${args.command}`);
|
|
3216
3236
|
}
|
|
3217
3237
|
}
|
|
3238
|
+
// ── Exec anti-pattern detection: sed-as-read and widening grep ──
|
|
3239
|
+
if (name === 'exec' && typeof args.command === 'string') {
|
|
3240
|
+
// Detect sed -n 'N,Mp' used as a substitute for read_file
|
|
3241
|
+
const sedRedirect = detectSedAsRead(args.command);
|
|
3242
|
+
if (sedRedirect) {
|
|
3243
|
+
await emitToolCall(callId, name, args);
|
|
3244
|
+
await emitToolResult({
|
|
3245
|
+
id: callId,
|
|
3246
|
+
name,
|
|
3247
|
+
success: false,
|
|
3248
|
+
summary: 'use read_file instead of sed',
|
|
3249
|
+
result: '',
|
|
3250
|
+
});
|
|
3251
|
+
return { id: callId, content: sedRedirect };
|
|
3252
|
+
}
|
|
3253
|
+
// Track widening grep patterns (same search string, expanding paths)
|
|
3254
|
+
const grepInfo = extractGrepPattern(args.command);
|
|
3255
|
+
if (grepInfo) {
|
|
3256
|
+
const key = grepInfo.pattern.toLowerCase();
|
|
3257
|
+
if (!grepPatternPaths.has(key))
|
|
3258
|
+
grepPatternPaths.set(key, new Set());
|
|
3259
|
+
for (const p of grepInfo.paths)
|
|
3260
|
+
grepPatternPaths.get(key).add(p);
|
|
3261
|
+
if (grepPatternPaths.get(key).size >= 3) {
|
|
3262
|
+
messages.push({
|
|
3263
|
+
role: 'user',
|
|
3264
|
+
content: `[system] You have searched for "${grepInfo.pattern}" across ${grepPatternPaths.get(key).size} different paths. ` +
|
|
3265
|
+
`Start with the broadest scope next time: search_files({ pattern: "${grepInfo.pattern}", path: "." })`,
|
|
3266
|
+
});
|
|
3267
|
+
}
|
|
3268
|
+
}
|
|
3269
|
+
// Detect cat/head/tail used as a substitute for read_file
|
|
3270
|
+
const catRedirect = detectCatHeadTailAsRead(args.command);
|
|
3271
|
+
if (catRedirect) {
|
|
3272
|
+
await emitToolCall(callId, name, args);
|
|
3273
|
+
await emitToolResult({
|
|
3274
|
+
id: callId,
|
|
3275
|
+
name,
|
|
3276
|
+
success: false,
|
|
3277
|
+
summary: 'use read_file instead',
|
|
3278
|
+
result: '',
|
|
3279
|
+
});
|
|
3280
|
+
return { id: callId, content: catRedirect };
|
|
3281
|
+
}
|
|
3282
|
+
// Log-tail spiral detection: track repeated tail/grep on same log file
|
|
3283
|
+
const logPath = extractLogFilePath(args.command);
|
|
3284
|
+
if (logPath) {
|
|
3285
|
+
const count = (logFileTailCounts.get(logPath) ?? 0) + 1;
|
|
3286
|
+
logFileTailCounts.set(logPath, count);
|
|
3287
|
+
if (count >= 4) {
|
|
3288
|
+
messages.push({
|
|
3289
|
+
role: 'user',
|
|
3290
|
+
content: `[system] You have read ${logPath} ${count} times. Stop tailing the log — ` +
|
|
3291
|
+
`review the error messages you already have and fix the root cause. ` +
|
|
3292
|
+
`If the error is unclear, read the relevant source file instead.`,
|
|
3293
|
+
});
|
|
3294
|
+
}
|
|
3295
|
+
}
|
|
3296
|
+
// Per-filter test run tracking
|
|
3297
|
+
const testFilter = extractTestFilter(args.command);
|
|
3298
|
+
if (testFilter) {
|
|
3299
|
+
const count = (testRunCountsByFilter.get(testFilter) ?? 0) + 1;
|
|
3300
|
+
testRunCountsByFilter.set(testFilter, count);
|
|
3301
|
+
if (count >= 5) {
|
|
3302
|
+
messages.push({
|
|
3303
|
+
role: 'user',
|
|
3304
|
+
content: `[system] You have run the test "${testFilter}" ${count} times. ` +
|
|
3305
|
+
`STOP re-running the same failing test. Step back, analyze the error message, ` +
|
|
3306
|
+
`and fix the root cause before running the test again.`,
|
|
3307
|
+
});
|
|
3308
|
+
}
|
|
3309
|
+
}
|
|
3310
|
+
// Same-file grep thrashing: track grep calls targeting a single file
|
|
3311
|
+
const grepTargetFile = extractGrepTargetFile(args.command);
|
|
3312
|
+
if (grepTargetFile) {
|
|
3313
|
+
const count = (grepTargetFileCounts.get(grepTargetFile) ?? 0) + 1;
|
|
3314
|
+
grepTargetFileCounts.set(grepTargetFile, count);
|
|
3315
|
+
if (count >= 4) {
|
|
3316
|
+
const basename = grepTargetFile.split('/').pop() || grepTargetFile;
|
|
3317
|
+
messages.push({
|
|
3318
|
+
role: 'user',
|
|
3319
|
+
content: `[system] You have run ${count} separate grep commands on ${basename}. ` +
|
|
3320
|
+
`STOP grepping the same file repeatedly. Use read_file to read the whole file once: ` +
|
|
3321
|
+
`read_file({ path: "${grepTargetFile}" })`,
|
|
3322
|
+
});
|
|
3323
|
+
}
|
|
3324
|
+
}
|
|
3325
|
+
}
|
|
3218
3326
|
if (isMutationTool(name) && typeof args.path === 'string') {
|
|
3219
3327
|
const absPath = args.path.startsWith('/')
|
|
3220
3328
|
? args.path
|
|
@@ -3236,9 +3344,25 @@ export async function createSession(opts) {
|
|
|
3236
3344
|
}
|
|
3237
3345
|
}
|
|
3238
3346
|
// ── Anti-scan: read_file guardrails (Fix 1/2/3) ──
|
|
3239
|
-
if
|
|
3240
|
-
|
|
3241
|
-
const
|
|
3347
|
+
// Same-file search_files repetition: if searching the same file 4+ times, tell the model to stop
|
|
3348
|
+
if (name === 'search_files') {
|
|
3349
|
+
const searchPath = typeof args.path === 'string' ? args.path : '';
|
|
3350
|
+
// Only track when targeting a specific file (has extension), not a directory
|
|
3351
|
+
if (searchPath && searchPath.includes('.')) {
|
|
3352
|
+
const count = (searchFilesPerTarget.get(searchPath) ?? 0) + 1;
|
|
3353
|
+
searchFilesPerTarget.set(searchPath, count);
|
|
3354
|
+
if (count >= 4) {
|
|
3355
|
+
const basename = searchPath.split('/').pop() || searchPath;
|
|
3356
|
+
messages.push({
|
|
3357
|
+
role: 'user',
|
|
3358
|
+
content: `[system] You have called search_files on ${basename} ${count} times with different patterns. ` +
|
|
3359
|
+
`STOP searching this file repeatedly. You already have enough information from previous reads and searches. ` +
|
|
3360
|
+
`Proceed to make your edit or tell the user what you need.`,
|
|
3361
|
+
});
|
|
3362
|
+
}
|
|
3363
|
+
}
|
|
3364
|
+
}
|
|
3365
|
+
if (isBudgetedReadTool(name)) {
|
|
3242
3366
|
// Fix 1: Hard cumulative budget — refuse reads once hard cap is reached.
|
|
3243
3367
|
// Count only actual executed read-only calls (not cache replays), so this check
|
|
3244
3368
|
// blocks the next call exactly at the configured cap.
|
|
@@ -3253,9 +3377,13 @@ export async function createSession(opts) {
|
|
|
3253
3377
|
});
|
|
3254
3378
|
return {
|
|
3255
3379
|
id: callId,
|
|
3256
|
-
content: `STOP: Read budget exhausted (${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD} calls). Do NOT read more files. Use search_files
|
|
3380
|
+
content: `STOP: Read budget exhausted (${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD} calls). Do NOT read more files. Use search_files(pattern, path) to find what you need.`,
|
|
3257
3381
|
};
|
|
3258
3382
|
}
|
|
3383
|
+
}
|
|
3384
|
+
if (name === 'read_file' || name === 'read_files') {
|
|
3385
|
+
const filePath = typeof args.path === 'string' ? args.path : '';
|
|
3386
|
+
const searchTerm = typeof args.search === 'string' ? args.search : '';
|
|
3259
3387
|
// Fix 2: Directory scan detection — counts unique files per dir (re-reads are OK)
|
|
3260
3388
|
if (filePath) {
|
|
3261
3389
|
const absFilePath = filePath.startsWith('/')
|
|
@@ -3280,7 +3408,7 @@ export async function createSession(opts) {
|
|
|
3280
3408
|
});
|
|
3281
3409
|
return {
|
|
3282
3410
|
id: callId,
|
|
3283
|
-
content: `STOP: Directory scan detected — you've read ${uniqueCount} unique files from ${parentDir}/. Use search_files(pattern, '${parentDir}')
|
|
3411
|
+
content: `STOP: Directory scan detected — you've read ${uniqueCount} unique files from ${parentDir}/. Use search_files(pattern, '${parentDir}') instead of reading files individually.`,
|
|
3284
3412
|
};
|
|
3285
3413
|
}
|
|
3286
3414
|
}
|
|
@@ -3301,7 +3429,7 @@ export async function createSession(opts) {
|
|
|
3301
3429
|
});
|
|
3302
3430
|
return {
|
|
3303
3431
|
id: callId,
|
|
3304
|
-
content: `STOP: You've searched ${searchTermFiles.get(key).size} files for "${searchTerm}" one at a time. This is what search_files does in one call. Use: search_files(pattern="${searchTerm}", path=".")
|
|
3432
|
+
content: `STOP: You've searched ${searchTermFiles.get(key).size} files for "${searchTerm}" one at a time. This is what search_files does in one call. Use: search_files(pattern="${searchTerm}", path=".")`,
|
|
3305
3433
|
};
|
|
3306
3434
|
}
|
|
3307
3435
|
}
|
|
@@ -3374,6 +3502,16 @@ export async function createSession(opts) {
|
|
|
3374
3502
|
reusedCachedReadOnlyExec = true; // skip re-execution below
|
|
3375
3503
|
}
|
|
3376
3504
|
}
|
|
3505
|
+
// Per-file content cache: catches non-consecutive re-reads of unchanged files.
|
|
3506
|
+
// This fires even when the consecutive-repeat detector misses (interleaved calls).
|
|
3507
|
+
if (name === 'read_file' && !reusedCachedReadOnlyExec && !reusedCachedReadTool) {
|
|
3508
|
+
const fileReplay = await toolLoopGuard.getFileContentCache(name, args, ctx.cwd);
|
|
3509
|
+
if (fileReplay) {
|
|
3510
|
+
content = fileReplay;
|
|
3511
|
+
reusedCachedReadTool = true;
|
|
3512
|
+
// Cache hit — do NOT count toward read budget (no new tokens consumed)
|
|
3513
|
+
}
|
|
3514
|
+
}
|
|
3377
3515
|
if (READ_FILE_CACHE_TOOLS.has(name) && repeatedReadFileSigs.has(sig)) {
|
|
3378
3516
|
const replay = await toolLoopGuard.getReadCacheReplay(name, args, ctx.cwd);
|
|
3379
3517
|
if (replay) {
|
|
@@ -3485,6 +3623,11 @@ export async function createSession(opts) {
|
|
|
3485
3623
|
const baseCwd = typeof args?.cwd === 'string' ? String(args.cwd) : ctx.cwd;
|
|
3486
3624
|
await toolLoopGuard.storeReadCache(name, args, baseCwd, content);
|
|
3487
3625
|
}
|
|
3626
|
+
// Store in per-file content cache for non-consecutive re-read detection
|
|
3627
|
+
if (name === 'read_file' && typeof content === 'string' && !content.startsWith('ERROR:')) {
|
|
3628
|
+
const baseCwd = typeof args?.cwd === 'string' ? String(args.cwd) : ctx.cwd;
|
|
3629
|
+
await toolLoopGuard.storeFileContentCache(name, args, baseCwd, content);
|
|
3630
|
+
}
|
|
3488
3631
|
if (name === 'exec') {
|
|
3489
3632
|
// Successful exec clears blocked-loop counters.
|
|
3490
3633
|
blockedExecAttemptsBySig.clear();
|
|
@@ -3658,13 +3801,16 @@ export async function createSession(opts) {
|
|
|
3658
3801
|
toolCallId: callId,
|
|
3659
3802
|
result: content,
|
|
3660
3803
|
});
|
|
3661
|
-
// Count only actual read
|
|
3804
|
+
// Count only actual file-read executions toward cumulative read budget.
|
|
3662
3805
|
// Cached/replayed read observations should not consume budget.
|
|
3663
|
-
if (
|
|
3664
|
-
!reusedCachedReadTool &&
|
|
3665
|
-
!reusedCachedReadOnlyExec) {
|
|
3806
|
+
if (isBudgetedReadTool(name) && !reusedCachedReadTool && !reusedCachedReadOnlyExec) {
|
|
3666
3807
|
cumulativeReadOnlyCalls += 1;
|
|
3667
3808
|
}
|
|
3809
|
+
// Track total tool calls and edits for analysis paralysis detection
|
|
3810
|
+
totalToolCallsThisAsk++;
|
|
3811
|
+
if (isMutationTool(name) && toolSuccess) {
|
|
3812
|
+
totalEditsThisAsk++;
|
|
3813
|
+
}
|
|
3668
3814
|
// ── Per-file mutation spiral detection ──
|
|
3669
3815
|
// Track edits to the same file. If the model keeps editing the same file
|
|
3670
3816
|
// over and over, it's likely in an edit→break→read→edit corruption spiral.
|
|
@@ -3825,7 +3971,7 @@ export async function createSession(opts) {
|
|
|
3825
3971
|
const callId = resolveCallId(tc);
|
|
3826
3972
|
results.push({
|
|
3827
3973
|
id: callId,
|
|
3828
|
-
content: `STOP: Per-turn read limit (${READ_ONLY_PER_TURN_CAP}). Use search_files
|
|
3974
|
+
content: `STOP: Per-turn read limit (${READ_ONLY_PER_TURN_CAP}). Use search_files(pattern, path) instead of reading files one by one.`,
|
|
3829
3975
|
});
|
|
3830
3976
|
}
|
|
3831
3977
|
if (cfg.verbose) {
|
|
@@ -3941,14 +4087,28 @@ export async function createSession(opts) {
|
|
|
3941
4087
|
// ── Escalating cumulative read budget (§ anti-scan guardrails) ──
|
|
3942
4088
|
// Warn zone: append warnings to each read result when approaching the hard cap
|
|
3943
4089
|
if (!readBudgetWarned &&
|
|
3944
|
-
|
|
3945
|
-
cumulativeReadOnlyCalls
|
|
4090
|
+
READ_BUDGET_WARN > 0 &&
|
|
4091
|
+
cumulativeReadOnlyCalls >= READ_BUDGET_WARN &&
|
|
4092
|
+
cumulativeReadOnlyCalls < READ_BUDGET_HARD) {
|
|
3946
4093
|
readBudgetWarned = true;
|
|
3947
4094
|
messages.push({
|
|
3948
4095
|
role: 'user',
|
|
3949
4096
|
content: `[system] Read budget: ${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD}. Use search_files instead of reading files individually.`,
|
|
3950
4097
|
});
|
|
3951
4098
|
}
|
|
4099
|
+
// Analysis paralysis: if the model has made 25+ tool calls with zero edits,
|
|
4100
|
+
// it's stuck in a read/search loop and needs to be forced into action.
|
|
4101
|
+
if (!analysisParalysisWarned &&
|
|
4102
|
+
totalToolCallsThisAsk >= 25 &&
|
|
4103
|
+
totalEditsThisAsk === 0) {
|
|
4104
|
+
analysisParalysisWarned = true;
|
|
4105
|
+
messages.push({
|
|
4106
|
+
role: 'user',
|
|
4107
|
+
content: `[system] CRITICAL: You have made ${totalToolCallsThisAsk} tool calls without producing a single edit. ` +
|
|
4108
|
+
`You are stuck in analysis paralysis. STOP reading and searching. ` +
|
|
4109
|
+
`You have enough information. Make your edit NOW or explain to the user what is blocking you.`,
|
|
4110
|
+
});
|
|
4111
|
+
}
|
|
3952
4112
|
// One bounded automatic repair attempt for invalid tool args.
|
|
3953
4113
|
if (invalidArgsThisTurn && toolRepairAttempts < MAX_TOOL_REPAIR_ATTEMPTS) {
|
|
3954
4114
|
toolRepairAttempts++;
|