npm - @visorcraft/idlehands - Versions diffs - 2.2.25 → 2.3.0 - Mend

@visorcraft/idlehands 2.2.25 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/agent/exec-helpers.js +203 -0
package/dist/agent/exec-helpers.js.map +1 -1
package/dist/agent/prompt-builder.js +7 -2
package/dist/agent/prompt-builder.js.map +1 -1
package/dist/agent/tool-loop-guard.js +77 -0
package/dist/agent/tool-loop-guard.js.map +1 -1
package/dist/agent/tools-schema.js +4 -2
package/dist/agent/tools-schema.js.map +1 -1
package/dist/agent.js +175 -15
package/dist/agent.js.map +1 -1
package/dist/anton/controller.js +50 -29
package/dist/anton/controller.js.map +1 -1
package/dist/anton/reporter.js +15 -0
package/dist/anton/reporter.js.map +1 -1
package/dist/bot/anton-run.js +3 -3
package/dist/bot/anton-run.js.map +1 -1
package/dist/cli/commands/anton.js +3 -3
package/dist/cli/commands/anton.js.map +1 -1
package/dist/harnesses.js +2 -2
package/dist/harnesses.js.map +1 -1
package/dist/runtime/planner.js +11 -0
package/dist/runtime/planner.js.map +1 -1
package/package.json +1 -1

package/dist/agent.js CHANGED Viewed

@@ -2,7 +2,7 @@ import fs from 'node:fs/promises';
 import path from 'node:path';
 import { DEFAULT_SUB_AGENT_RESULT_TOKEN_CAP, DEFAULT_SUB_AGENT_SYSTEM_PROMPT, MCP_TOOLS_REQUEST_TOKEN, } from './agent/constants.js';
 import { AgentLoopBreak } from './agent/errors.js';
-import { execRcShouldSignalFailure, looksLikeReadOnlyExecCommand, readOnlyExecCacheable, withCachedExecObservationHint, withReplayedExecHint, } from './agent/exec-helpers.js';
+import { execRcShouldSignalFailure, looksLikeReadOnlyExecCommand, detectSedAsRead, extractGrepPattern, detectCatHeadTailAsRead, extractTestFilter, extractGrepTargetFile, extractLogFilePath, readOnlyExecCacheable, withCachedExecObservationHint, withReplayedExecHint, } from './agent/exec-helpers.js';
 import { generateMinimalDiff, toolResultSummary, execCommandFromSig, formatDurationMs, looksLikePlanningNarration, capTextByApproxTokens, sanitizePathsInMessage, digestToolResult, } from './agent/formatting.js';
 import { autoPickModel } from './agent/model-pick.js';
 import { reviewArtifactKeys, looksLikeCodeReviewRequest, looksLikeReviewRetrievalRequest, retrievalAllowsStaleArtifact, parseReviewArtifactStalePolicy, parseReviewArtifact, reviewArtifactStaleReason, gitHead, normalizeModelsResponse, } from './agent/review-artifact.js';
@@ -236,6 +236,7 @@ export async function createSession(opts) {
         lspTools: lspManager?.hasServers() === true,
         mcpTools: mcpToolsLoaded ? (mcpManager?.getEnabledToolSchemas() ?? []) : [],
         allowSpawnTask: spawnTaskEnabled,
+        maxReadLines: cfg.max_read_lines,
         slimFast,
     });
     const collectToolContext = () => {
@@ -1984,8 +1985,12 @@ export async function createSession(opts) {
         };
         // Read-only tool call budgets (§ anti-scan guardrails)
         const READ_ONLY_PER_TURN_CAP = 6;
-        const READ_BUDGET_WARN = 15;
         const READ_BUDGET_HARD = harness.quirks.readBudget ?? 20;
+        // Warn before hard-stop; scales for smaller custom budgets (e.g. 12 for qwen3-coder).
+        const READ_BUDGET_WARN = READ_BUDGET_HARD > 1 ? Math.max(1, Math.min(15, READ_BUDGET_HARD - 2)) : 0;
+        // Only count file-reading tools toward the cumulative read budget.
+        // search_files is intentionally excluded so the model can still narrow scope.
+        const isBudgetedReadTool = (toolName) => toolName === 'read_file' || toolName === 'read_files' || toolName === 'list_dir';
         let cumulativeReadOnlyCalls = 0;
         // Directory scan detection: track unique file paths per parent dir.
         // Only counts distinct files (re-reads of the same file after editing are normal).
@@ -1993,6 +1998,20 @@ export async function createSession(opts) {
         const blockedDirs = new Set();
         // Same-search detection: track search= params across read_file calls
         const searchTermFiles = new Map(); // search term → set of file paths
+        // Widening grep pattern detection: track grep patterns across exec calls
+        const grepPatternPaths = new Map(); // grep pattern → set of paths searched
+        // Analysis paralysis: track total tool calls vs edits to detect stalling
+        let totalToolCallsThisAsk = 0;
+        let totalEditsThisAsk = 0;
+        let analysisParalysisWarned = false;
+        // Same-file search_files repetition: track search_files calls per target file
+        const searchFilesPerTarget = new Map(); // file path → search call count
+        // Same-file grep repetition: track how many different grep patterns hit the same file
+        const grepTargetFileCounts = new Map(); // file path → distinct grep call count
+        // Log-tail spiral detection: track tail/grep calls on the same log file
+        const logFileTailCounts = new Map(); // log file path → tail call count
+        // Per-filter test run tracking: count test runs by filter name
+        const testRunCountsByFilter = new Map(); // filter name → run count
         // identical tool call signature counts across this ask() run
         const sigCounts = new Map();
         const toolNameByCallId = new Map();
@@ -2857,6 +2876,7 @@ export async function createSession(opts) {
                             lastEditedPath = absPath;
                             mutationVersion++;
                             suppressedTools.clear(); // file changed, re-enable all tools
+                            toolLoopGuard.invalidateFileContentCache(absPath);
                         },
                     });
                     // Tool-call argument parsing and validation logic
@@ -3215,6 +3235,94 @@ export async function createSession(opts) {
                                 throw new Error(`exec: ${reason} — command: ${args.command}`);
                             }
                         }
+                        // ── Exec anti-pattern detection: sed-as-read and widening grep ──
+                        if (name === 'exec' && typeof args.command === 'string') {
+                            // Detect sed -n 'N,Mp' used as a substitute for read_file
+                            const sedRedirect = detectSedAsRead(args.command);
+                            if (sedRedirect) {
+                                await emitToolCall(callId, name, args);
+                                await emitToolResult({
+                                    id: callId,
+                                    name,
+                                    success: false,
+                                    summary: 'use read_file instead of sed',
+                                    result: '',
+                                });
+                                return { id: callId, content: sedRedirect };
+                            }
+                            // Track widening grep patterns (same search string, expanding paths)
+                            const grepInfo = extractGrepPattern(args.command);
+                            if (grepInfo) {
+                                const key = grepInfo.pattern.toLowerCase();
+                                if (!grepPatternPaths.has(key))
+                                    grepPatternPaths.set(key, new Set());
+                                for (const p of grepInfo.paths)
+                                    grepPatternPaths.get(key).add(p);
+                                if (grepPatternPaths.get(key).size >= 3) {
+                                    messages.push({
+                                        role: 'user',
+                                        content: `[system] You have searched for "${grepInfo.pattern}" across ${grepPatternPaths.get(key).size} different paths. ` +
+                                            `Start with the broadest scope next time: search_files({ pattern: "${grepInfo.pattern}", path: "." })`,
+                                    });
+                                }
+                            }
+                            // Detect cat/head/tail used as a substitute for read_file
+                            const catRedirect = detectCatHeadTailAsRead(args.command);
+                            if (catRedirect) {
+                                await emitToolCall(callId, name, args);
+                                await emitToolResult({
+                                    id: callId,
+                                    name,
+                                    success: false,
+                                    summary: 'use read_file instead',
+                                    result: '',
+                                });
+                                return { id: callId, content: catRedirect };
+                            }
+                            // Log-tail spiral detection: track repeated tail/grep on same log file
+                            const logPath = extractLogFilePath(args.command);
+                            if (logPath) {
+                                const count = (logFileTailCounts.get(logPath) ?? 0) + 1;
+                                logFileTailCounts.set(logPath, count);
+                                if (count >= 4) {
+                                    messages.push({
+                                        role: 'user',
+                                        content: `[system] You have read ${logPath} ${count} times. Stop tailing the log — ` +
+                                            `review the error messages you already have and fix the root cause. ` +
+                                            `If the error is unclear, read the relevant source file instead.`,
+                                    });
+                                }
+                            }
+                            // Per-filter test run tracking
+                            const testFilter = extractTestFilter(args.command);
+                            if (testFilter) {
+                                const count = (testRunCountsByFilter.get(testFilter) ?? 0) + 1;
+                                testRunCountsByFilter.set(testFilter, count);
+                                if (count >= 5) {
+                                    messages.push({
+                                        role: 'user',
+                                        content: `[system] You have run the test "${testFilter}" ${count} times. ` +
+                                            `STOP re-running the same failing test. Step back, analyze the error message, ` +
+                                            `and fix the root cause before running the test again.`,
+                                    });
+                                }
+                            }
+                            // Same-file grep thrashing: track grep calls targeting a single file
+                            const grepTargetFile = extractGrepTargetFile(args.command);
+                            if (grepTargetFile) {
+                                const count = (grepTargetFileCounts.get(grepTargetFile) ?? 0) + 1;
+                                grepTargetFileCounts.set(grepTargetFile, count);
+                                if (count >= 4) {
+                                    const basename = grepTargetFile.split('/').pop() || grepTargetFile;
+                                    messages.push({
+                                        role: 'user',
+                                        content: `[system] You have run ${count} separate grep commands on ${basename}. ` +
+                                            `STOP grepping the same file repeatedly. Use read_file to read the whole file once: ` +
+                                            `read_file({ path: "${grepTargetFile}" })`,
+                                    });
+                                }
+                            }
+                        }
                         if (isMutationTool(name) && typeof args.path === 'string') {
                             const absPath = args.path.startsWith('/')
                                 ? args.path
@@ -3236,9 +3344,25 @@ export async function createSession(opts) {
                             }
                         }
                         // ── Anti-scan: read_file guardrails (Fix 1/2/3) ──
-                        if (name === 'read_file' || name === 'read_files') {
-                            const filePath = typeof args.path === 'string' ? args.path : '';
-                            const searchTerm = typeof args.search === 'string' ? args.search : '';
+                        // Same-file search_files repetition: if searching the same file 4+ times, tell the model to stop
+                        if (name === 'search_files') {
+                            const searchPath = typeof args.path === 'string' ? args.path : '';
+                            // Only track when targeting a specific file (has extension), not a directory
+                            if (searchPath && searchPath.includes('.')) {
+                                const count = (searchFilesPerTarget.get(searchPath) ?? 0) + 1;
+                                searchFilesPerTarget.set(searchPath, count);
+                                if (count >= 4) {
+                                    const basename = searchPath.split('/').pop() || searchPath;
+                                    messages.push({
+                                        role: 'user',
+                                        content: `[system] You have called search_files on ${basename} ${count} times with different patterns. ` +
+                                            `STOP searching this file repeatedly. You already have enough information from previous reads and searches. ` +
+                                            `Proceed to make your edit or tell the user what you need.`,
+                                    });
+                                }
+                            }
+                        }
+                        if (isBudgetedReadTool(name)) {
                             // Fix 1: Hard cumulative budget — refuse reads once hard cap is reached.
                             // Count only actual executed read-only calls (not cache replays), so this check
                             // blocks the next call exactly at the configured cap.
@@ -3253,9 +3377,13 @@ export async function createSession(opts) {
                                 });
                                 return {
                                     id: callId,
-                                    content: `STOP: Read budget exhausted (${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD} calls). Do NOT read more files. Use search_files or exec: grep -rn "pattern" path/ to find what you need.`,
+                                    content: `STOP: Read budget exhausted (${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD} calls). Do NOT read more files. Use search_files(pattern, path) to find what you need.`,
                                 };
                             }
+                        }
+                        if (name === 'read_file' || name === 'read_files') {
+                            const filePath = typeof args.path === 'string' ? args.path : '';
+                            const searchTerm = typeof args.search === 'string' ? args.search : '';
                             // Fix 2: Directory scan detection — counts unique files per dir (re-reads are OK)
                             if (filePath) {
                                 const absFilePath = filePath.startsWith('/')
@@ -3280,7 +3408,7 @@ export async function createSession(opts) {
                                     });
                                     return {
                                         id: callId,
-                                        content: `STOP: Directory scan detected — you've read ${uniqueCount} unique files from ${parentDir}/. Use search_files(pattern, '${parentDir}') or exec: grep -rn "pattern" ${parentDir}/ instead of reading files individually.`,
+                                        content: `STOP: Directory scan detected — you've read ${uniqueCount} unique files from ${parentDir}/. Use search_files(pattern, '${parentDir}') instead of reading files individually.`,
                                     };
                                 }
                             }
@@ -3301,7 +3429,7 @@ export async function createSession(opts) {
                                     });
                                     return {
                                         id: callId,
-                                        content: `STOP: You've searched ${searchTermFiles.get(key).size} files for "${searchTerm}" one at a time. This is what search_files does in one call. Use: search_files(pattern="${searchTerm}", path=".") or exec: grep -rn "${searchTerm}" .`,
+                                        content: `STOP: You've searched ${searchTermFiles.get(key).size} files for "${searchTerm}" one at a time. This is what search_files does in one call. Use: search_files(pattern="${searchTerm}", path=".")`,
                                     };
                                 }
                             }
@@ -3374,6 +3502,16 @@ export async function createSession(opts) {
                                 reusedCachedReadOnlyExec = true; // skip re-execution below
                             }
                         }
+                        // Per-file content cache: catches non-consecutive re-reads of unchanged files.
+                        // This fires even when the consecutive-repeat detector misses (interleaved calls).
+                        if (name === 'read_file' && !reusedCachedReadOnlyExec && !reusedCachedReadTool) {
+                            const fileReplay = await toolLoopGuard.getFileContentCache(name, args, ctx.cwd);
+                            if (fileReplay) {
+                                content = fileReplay;
+                                reusedCachedReadTool = true;
+                                // Cache hit — do NOT count toward read budget (no new tokens consumed)
+                            }
+                        }
                         if (READ_FILE_CACHE_TOOLS.has(name) && repeatedReadFileSigs.has(sig)) {
                             const replay = await toolLoopGuard.getReadCacheReplay(name, args, ctx.cwd);
                             if (replay) {
@@ -3485,6 +3623,11 @@ export async function createSession(opts) {
                                     const baseCwd = typeof args?.cwd === 'string' ? String(args.cwd) : ctx.cwd;
                                     await toolLoopGuard.storeReadCache(name, args, baseCwd, content);
                                 }
+                                // Store in per-file content cache for non-consecutive re-read detection
+                                if (name === 'read_file' && typeof content === 'string' && !content.startsWith('ERROR:')) {
+                                    const baseCwd = typeof args?.cwd === 'string' ? String(args.cwd) : ctx.cwd;
+                                    await toolLoopGuard.storeFileContentCache(name, args, baseCwd, content);
+                                }
                                 if (name === 'exec') {
                                     // Successful exec clears blocked-loop counters.
                                     blockedExecAttemptsBySig.clear();
@@ -3658,13 +3801,16 @@ export async function createSession(opts) {
                             toolCallId: callId,
                             result: content,
                         });
-                        // Count only actual read-only executions toward cumulative read budget.
+                        // Count only actual file-read executions toward cumulative read budget.
                         // Cached/replayed read observations should not consume budget.
-                        if (isReadOnlyToolDynamic(name) &&
-                            !reusedCachedReadTool &&
-                            !reusedCachedReadOnlyExec) {
+                        if (isBudgetedReadTool(name) && !reusedCachedReadTool && !reusedCachedReadOnlyExec) {
                             cumulativeReadOnlyCalls += 1;
                         }
+                        // Track total tool calls and edits for analysis paralysis detection
+                        totalToolCallsThisAsk++;
+                        if (isMutationTool(name) && toolSuccess) {
+                            totalEditsThisAsk++;
+                        }
                         // ── Per-file mutation spiral detection ──
                         // Track edits to the same file. If the model keeps editing the same file
                         // over and over, it's likely in an edit→break→read→edit corruption spiral.
@@ -3825,7 +3971,7 @@ export async function createSession(opts) {
                             const callId = resolveCallId(tc);
                             results.push({
                                 id: callId,
-                                content: `STOP: Per-turn read limit (${READ_ONLY_PER_TURN_CAP}). Use search_files or exec with grep instead of reading files one by one.`,
+                                content: `STOP: Per-turn read limit (${READ_ONLY_PER_TURN_CAP}). Use search_files(pattern, path) instead of reading files one by one.`,
                             });
                         }
                         if (cfg.verbose) {
@@ -3941,14 +4087,28 @@ export async function createSession(opts) {
                     // ── Escalating cumulative read budget (§ anti-scan guardrails) ──
                     // Warn zone: append warnings to each read result when approaching the hard cap
                     if (!readBudgetWarned &&
-                        cumulativeReadOnlyCalls > READ_BUDGET_WARN &&
-                        cumulativeReadOnlyCalls <= READ_BUDGET_HARD) {
+                        READ_BUDGET_WARN > 0 &&
+                        cumulativeReadOnlyCalls >= READ_BUDGET_WARN &&
+                        cumulativeReadOnlyCalls < READ_BUDGET_HARD) {
                         readBudgetWarned = true;
                         messages.push({
                             role: 'user',
                             content: `[system] Read budget: ${cumulativeReadOnlyCalls}/${READ_BUDGET_HARD}. Use search_files instead of reading files individually.`,
                         });
                     }
+                    // Analysis paralysis: if the model has made 25+ tool calls with zero edits,
+                    // it's stuck in a read/search loop and needs to be forced into action.
+                    if (!analysisParalysisWarned &&
+                        totalToolCallsThisAsk >= 25 &&
+                        totalEditsThisAsk === 0) {
+                        analysisParalysisWarned = true;
+                        messages.push({
+                            role: 'user',
+                            content: `[system] CRITICAL: You have made ${totalToolCallsThisAsk} tool calls without producing a single edit. ` +
+                                `You are stuck in analysis paralysis. STOP reading and searching. ` +
+                                `You have enough information. Make your edit NOW or explain to the user what is blocking you.`,
+                        });
+                    }
                     // One bounded automatic repair attempt for invalid tool args.
                     if (invalidArgsThisTurn && toolRepairAttempts < MAX_TOOL_REPAIR_ATTEMPTS) {
                         toolRepairAttempts++;