npm - @burtson-labs/agent-core - Versions diffs - 1.6.16 → 1.6.18 - Mend

@burtson-labs/agent-core 1.6.16 → 1.6.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/README.md +2 -0
package/dist/index.d.ts +3 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +8 -1
package/dist/index.js.map +1 -1
package/dist/mcp/activation.js +16 -8
package/dist/mcp/activation.js.map +1 -1
package/dist/mcp/clientPool.js +40 -22
package/dist/mcp/clientPool.js.map +1 -1
package/dist/mcp/server.js +16 -10
package/dist/mcp/server.js.map +1 -1
package/dist/mcp/toolAdapter.js +21 -11
package/dist/mcp/toolAdapter.js.map +1 -1
package/dist/providers/deterministic-provider.d.ts +1 -1
package/dist/providers/deterministic-provider.d.ts.map +1 -1
package/dist/runtime/AgentRuntime.d.ts +2 -2
package/dist/runtime/AgentRuntime.d.ts.map +1 -1
package/dist/security/secretPatterns.js +4 -2
package/dist/security/secretPatterns.js.map +1 -1
package/dist/telemetry/otlpExporter.d.ts +69 -0
package/dist/telemetry/otlpExporter.d.ts.map +1 -0
package/dist/telemetry/otlpExporter.js +321 -0
package/dist/telemetry/otlpExporter.js.map +1 -0
package/dist/tools/ask-user-tool.js +8 -4
package/dist/tools/ask-user-tool.js.map +1 -1
package/dist/tools/compactMessages.js +6 -3
package/dist/tools/compactMessages.js.map +1 -1
package/dist/tools/core-tools.js +151 -81
package/dist/tools/core-tools.js.map +1 -1
package/dist/tools/git-tools.js +22 -11
package/dist/tools/git-tools.js.map +1 -1
package/dist/tools/language-adapters.d.ts +1 -1
package/dist/tools/language-adapters.d.ts.map +1 -1
package/dist/tools/language-adapters.js +36 -18
package/dist/tools/language-adapters.js.map +1 -1
package/dist/tools/loop/finalAnswerNudges.js +12 -6
package/dist/tools/loop/finalAnswerNudges.js.map +1 -1
package/dist/tools/loop/goalAnchor.d.ts.map +1 -1
package/dist/tools/loop/goalAnchor.js +2 -1
package/dist/tools/loop/goalAnchor.js.map +1 -1
package/dist/tools/loop/llmStream.js +11 -8
package/dist/tools/loop/llmStream.js.map +1 -1
package/dist/tools/loop/loopShared.d.ts +20 -0
package/dist/tools/loop/loopShared.d.ts.map +1 -0
package/dist/tools/loop/loopShared.js +105 -0
package/dist/tools/loop/loopShared.js.map +1 -0
package/dist/tools/loop/parallelExecute.d.ts +1 -1
package/dist/tools/loop/parallelExecute.js +2 -1
package/dist/tools/loop/parallelExecute.js.map +1 -1
package/dist/tools/loop/singleToolExecute.js +8 -4
package/dist/tools/loop/singleToolExecute.js.map +1 -1
package/dist/tools/loop/turnSetup.js +9 -6
package/dist/tools/loop/turnSetup.js.map +1 -1
package/dist/tools/ocr.d.ts.map +1 -1
package/dist/tools/ocr.js +7 -5
package/dist/tools/ocr.js.map +1 -1
package/dist/tools/post-edit-checks.js +25 -13
package/dist/tools/post-edit-checks.js.map +1 -1
package/dist/tools/skill-loader.d.ts +1 -1
package/dist/tools/skill-loader.d.ts.map +1 -1
package/dist/tools/skill-loader.js +14 -7
package/dist/tools/skill-loader.js.map +1 -1
package/dist/tools/skill-registry.js +2 -1
package/dist/tools/skill-registry.js.map +1 -1
package/dist/tools/skills/mail-search-skill.js +16 -9
package/dist/tools/skills/mail-search-skill.js.map +1 -1
package/dist/tools/skills/plan-skill.js +4 -2
package/dist/tools/skills/plan-skill.js.map +1 -1
package/dist/tools/skills/semantic-search-skill.js +12 -6
package/dist/tools/skills/semantic-search-skill.js.map +1 -1
package/dist/tools/skills/test-gen-skill.js +8 -4
package/dist/tools/skills/test-gen-skill.js.map +1 -1
package/dist/tools/tool-registry.d.ts +17 -0
package/dist/tools/tool-registry.d.ts.map +1 -1
package/dist/tools/tool-registry.js +110 -30
package/dist/tools/tool-registry.js.map +1 -1
package/dist/tools/tool-use-loop.d.ts +16 -8
package/dist/tools/tool-use-loop.d.ts.map +1 -1
package/dist/tools/tool-use-loop.js +144 -160
package/dist/tools/tool-use-loop.js.map +1 -1
package/dist/tools/tool-use-parser.d.ts +33 -0
package/dist/tools/tool-use-parser.d.ts.map +1 -1
package/dist/tools/tool-use-parser.js +105 -28
package/dist/tools/tool-use-parser.js.map +1 -1
package/dist/tools/toolAvailabilityDetector.d.ts +0 -24
package/dist/tools/toolAvailabilityDetector.d.ts.map +1 -1
package/dist/tools/toolAvailabilityDetector.js +26 -12
package/dist/tools/toolAvailabilityDetector.js.map +1 -1
package/dist/tools/unified-patch.js +16 -8
package/dist/tools/unified-patch.js.map +1 -1
package/dist/utils/event-emitter.d.ts +1 -1
package/dist/utils/event-emitter.d.ts.map +1 -1
package/package.json +20 -1

package/dist/tools/tool-use-loop.js CHANGED Viewed

@@ -18,12 +18,7 @@
  * the host should use the Ollama `tools: [...]` field instead.
  */
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.ToolUseLoop = void 0;
-exports.sleep = sleep;
-exports.isRetryableLlmError = isRetryableLlmError;
-exports.tagRetryableLlmError = tagRetryableLlmError;
-exports.summarizeLlmError = summarizeLlmError;
-exports.isContinuationPrompt = isContinuationPrompt;
+exports.ToolUseLoop = exports.isContinuationPrompt = exports.summarizeLlmError = exports.tagRetryableLlmError = exports.isRetryableLlmError = exports.sleep = void 0;
 exports.isNoticingPrompt = isNoticingPrompt;
 exports.createToolUseLoop = createToolUseLoop;
 const tool_use_parser_1 = require("./tool-use-parser");
@@ -36,87 +31,16 @@ const parallelExecute_1 = require("./loop/parallelExecute");
 const goalAnchor_1 = require("./loop/goalAnchor");
 const finalAnswerNudges_1 = require("./loop/finalAnswerNudges");
 const toolAvailabilityDetector_1 = require("./toolAvailabilityDetector");
+const loopShared_1 = require("./loop/loopShared");
+Object.defineProperty(exports, "sleep", { enumerable: true, get: function () { return loopShared_1.sleep; } });
+Object.defineProperty(exports, "isRetryableLlmError", { enumerable: true, get: function () { return loopShared_1.isRetryableLlmError; } });
+Object.defineProperty(exports, "tagRetryableLlmError", { enumerable: true, get: function () { return loopShared_1.tagRetryableLlmError; } });
+Object.defineProperty(exports, "summarizeLlmError", { enumerable: true, get: function () { return loopShared_1.summarizeLlmError; } });
+Object.defineProperty(exports, "isContinuationPrompt", { enumerable: true, get: function () { return loopShared_1.isContinuationPrompt; } });
 const FILE_EDIT_TOOL_NAMES = new Set(['write_file', 'apply_edit', 'replace_range', 'apply_patch']);
 function isFileEditTool(name) {
     return FILE_EDIT_TOOL_NAMES.has(name);
 }
-function sleep(ms) {
-    return new Promise((resolve) => setTimeout(resolve, ms));
-}
-function getErrorCode(error) {
-    return typeof error === 'object' && error !== null && 'code' in error
-        ? String(error.code ?? '')
-        : undefined;
-}
-function getErrorMessage(error) {
-    return error instanceof Error ? error.message : String(error);
-}
-function isRetryableLlmError(error) {
-    const code = getErrorCode(error);
-    if (code === 'USER_ABORT')
-        return false;
-    const message = getErrorMessage(error);
-    if (/\b429\b|rate limit/i.test(message))
-        return false;
-    return (code === 'WATCHDOG' ||
-        /\b5\d\d\b/.test(message) ||
-        /Upstream model request failed/i.test(message) ||
-        /ECONNREFUSED|ECONNRESET|ETIMEDOUT|EAI_AGAIN|socket hang up|fetch failed|network error|terminated|UND_ERR/i.test(message));
-}
-function tagRetryableLlmError(error) {
-    if (error instanceof Error) {
-        const tagged = error;
-        if (!tagged.code)
-            tagged.code = 'UPSTREAM_MODEL';
-    }
-}
-function summarizeLlmError(error) {
-    const message = getErrorMessage(error).replace(/\s+/g, ' ').trim();
-    return message.length > 180 ? `${message.slice(0, 177)}...` : message;
-}
-/**
- * Detects "keep going" / "continue" / "yes" style prompts that
- * carry no real goal content. The goal-anchor block uses the most recent
- * user message as the recall text; when that text is "good lets keep
- * going" the anchor degenerates into "remind yourself to keep going",
- * which gives the model nothing to anchor on after 20 iterations of
- * drift. Real on a 60-iteration linter-fix
- * turn: every anchor injection cited "good lets keep going" as the
- * goal. Detector lets callers walk back to a prior substantive prompt
- * instead.
- *
- * Length cap (60 chars) + normalized-phrase match keeps false positives
- * down — a sentence like "keep going on the auth refactor for the
- * user-service" is longer than 60 chars and reads as a real goal, so it
- * stays a goal.
- */
-const CONTINUATION_PROMPT_PHRASES = new Set([
-    'continue', 'keep going', 'go on', 'proceed', 'next', 'more',
-    'please continue', 'carry on', 'finish', 'finish it', 'finish up', 'wrap up', 'wrap it up',
-    'good', 'great', 'nice', 'cool', 'sweet', 'perfect', 'ok', 'okay', 'k', 'yes', 'y', 'yep', 'yeah', 'ack', 'done',
-    "let's continue", 'lets continue', "let's keep going", 'lets keep going',
-    'good keep going', 'good lets keep going', "good let's keep going",
-    'good continue', 'ok continue', 'okay continue'
-]);
-function isContinuationPrompt(text) {
-    const trimmed = text.trim();
-    if (trimmed.length === 0 || trimmed.length > 60)
-        return false;
-    // Normalize: lowercase, drop non-word/space punctuation, collapse whitespace.
-    const norm = trimmed
-        .toLowerCase()
-        .replace(/[^\w\s']/g, ' ')
-        .replace(/\s+/g, ' ')
-        .trim();
-    if (CONTINUATION_PROMPT_PHRASES.has(norm))
-        return true;
-    // Permit "please <phrase>" and "<phrase> please" wrappings.
-    for (const phrase of CONTINUATION_PROMPT_PHRASES) {
-        if (norm === `please ${phrase}` || norm === `${phrase} please`)
-            return true;
-    }
-    return false;
-}
 /**
  * "Noticing prompt" detector. Catches user messages that are asking
  * about state ("are we using these?", "did you update X?", "where's
@@ -124,7 +48,7 @@ function isContinuationPrompt(text) {
  * work. These signal that the user spotted a gap in the prior turn
  * and wants the agent to address it — NOT continue the prior plan.
  *
- * Real failure mode captured 2026-05-25 on a Portfolio React refactor:
+ * Real failure mode captured 2026-05-25 on a local React refactor:
  * user asked "I dont think we actually are using these new files are
  * we?" after the agent wrote data files but never wired them into
  * App.jsx. Bandit read the question as a generic "keep going" prompt,
@@ -139,8 +63,9 @@ function isContinuationPrompt(text) {
  */
 function isNoticingPrompt(text) {
     const trimmed = (text || '').trim();
-    if (trimmed.length === 0 || trimmed.length > 220)
+    if (trimmed.length === 0 || trimmed.length > 220) {
         return false;
+    }
     const norm = trimmed.toLowerCase().replace(/[^\w\s'?-]/g, ' ').replace(/\s+/g, ' ').trim();
     // Stems that introduce a noticing/clarifying question. Anchored to
     // the start of the message so a paragraph mentioning "are we"
@@ -163,8 +88,9 @@ function isNoticingPrompt(text) {
         /^wait\b/, // "wait — what about Y?"
         /^(?:i'?m|am\s+i)\s+(?:missing|seeing|reading)\b/,
     ];
-    if (!STEMS.some((re) => re.test(norm)))
+    if (!STEMS.some((re) => re.test(norm))) {
         return false;
+    }
     // Has to contain a question mark OR a concern modal. Lots of false
     // matches without — e.g. "are we" mid-sentence in a feature request.
     const hasQuestion = trimmed.includes('?');
@@ -257,10 +183,17 @@ class ToolUseLoop {
         // explicit "this is a recovery attempt — answer the original goal"
         // framing succeeds. Last resort before terminal throw.
         let finalAnchorRetryUsed = false;
-        const textToolBlock = this.registry.buildSystemPromptBlock();
+        const textToolBlock = effectiveOptions.compactToolBlock
+            ? this.registry.buildCompactSystemPromptBlock()
+            : this.registry.buildSystemPromptBlock();
+        // Lowercased registered tool names — used by the narrated-call
+        // detector to anchor on "I call <real tool>" with near-zero false
+        // positives.
+        const registeredToolNames = new Set(this.registry.getAll().map(t => t.name.toLowerCase()));
         const buildFullSystemPrompt = (useNativeTools) => {
-            if (useNativeTools)
+            if (useNativeTools) {
                 return systemPrompt ?? '';
+            }
             return systemPrompt
                 ? `${systemPrompt}\n\n${textToolBlock}`
                 : textToolBlock;
@@ -278,7 +211,7 @@ class ToolUseLoop {
         // window and the model can drift to a related-but-different topic.
         // Walks back through continuation tokens ("keep going", "yes") to
         // the most recent SUBSTANTIVE prompt. See loop/turnSetup.ts.
-        let { originalGoal, priorUserPromptCount } = (0, turnSetup_1.resolveTurnGoal)({ seedMessages });
+        const { originalGoal, priorUserPromptCount } = (0, turnSetup_1.resolveTurnGoal)({ seedMessages });
         // Track the iteration we last anchored on rather than a boolean
         // so we can re-fire when the model pivots AGAIN later in a long
         // turn. -1 means "never anchored." Re-fire is gated by the
@@ -287,8 +220,9 @@ class ToolUseLoop {
         // continued without resolution for several more iterations.
         let lastGoalAnchorIteration = -1;
         for (const msg of seedMessages) {
-            if (msg.role === 'system')
+            if (msg.role === 'system') {
                 continue;
+            }
             messages.push(msg);
         }
         // Noticing-prompt pivot hint. When the most-recent user message
@@ -305,7 +239,7 @@ class ToolUseLoop {
             });
             messages.push({
                 role: 'user',
-                content: '[Reading-comprehension note for the assistant: the user\'s last message above is a noticing / clarifying question — they spotted a possible gap from prior turns and are asking you to confirm or correct, NOT to continue any prior plan. Before you take any new action, identify what gap the question points at and address it directly. If the question is "are we using X?" the correct first move is to verify whether X is actually being used (read the consumer file, grep for the import, check the call site) and answer honestly — yes/no with evidence. Do NOT create more new artifacts unless the user explicitly says to.]'
+                content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + '[Reading-comprehension note for the assistant: the user\'s last message above is a noticing / clarifying question — they spotted a possible gap from prior turns and are asking you to confirm or correct, NOT to continue any prior plan. Before you take any new action, identify what gap the question points at and address it directly. If the question is "are we using X?" the correct first move is to verify whether X is actually being used (read the consumer file, grep for the import, check the call site) and answer honestly — yes/no with evidence. Do NOT create more new artifacts unless the user explicitly says to.]'
             });
         }
         let iterations = 0;
@@ -334,7 +268,7 @@ class ToolUseLoop {
         // recovery, etc.) each have their own caps, but they can chain — a
         // model can spin through 6+ no-tool-call responses because
         // thinking-off recovery resets consecutiveEmptyRetries=0. Captured
-        // 2026-05-26 in Mark's Portfolio session (turn-2026-05-26T02-30-37):
+        // 2026-05-26 in a real CLI session (turn-2026-05-26T02-30-37):
         // model emitted 6 sequential reasoning-only responses inside
         // iteration 4 before the loop finally terminated with a useless
         // final answer ("I need to stop wrapping tool calls in reasoning
@@ -687,7 +621,7 @@ class ToolUseLoop {
                     // current pace and burn the extension too.
                     messages.push({
                         role: 'user',
-                        content: `You've been making good progress and the iteration budget has been extended by ${CAP_EXTENSION_SIZE} (new limit: ${max}). Keep going, but tighten up: prefer batched edits over single-line ones, and start wrapping up when you have a complete answer rather than running to the new cap. This is the ${iterationCapExtensions === 1 ? 'first' : 'second'} of at most ${MAX_CAP_EXTENSIONS} extensions for this turn.`
+                        content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `You've been making good progress and the iteration budget has been extended by ${CAP_EXTENSION_SIZE} (new limit: ${max}). Keep going, but tighten up: prefer batched edits over single-line ones, and start wrapping up when you have a complete answer rather than running to the new cap. This is the ${iterationCapExtensions === 1 ? 'first' : 'second'} of at most ${MAX_CAP_EXTENSIONS} extensions for this turn.`
                     });
                 }
                 else {
@@ -698,7 +632,7 @@ class ToolUseLoop {
                     // vs edit) reflects what the user actually asked for.
                     messages.push({
                         role: 'user',
-                        content: `${goalRecallBlock}` +
+                        content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `${goalRecallBlock}` +
                             `You have reached the tool-use iteration limit (${max}). Stop calling tools. Produce a final answer with three short sections, in this exact shape:\n` +
                             '\n' +
                             wrapUpBody +
@@ -712,7 +646,7 @@ class ToolUseLoop {
                 emit('tool_loop:total_tool_cap', { iteration: iterations, totalToolsExecuted });
                 messages.push({
                     role: 'user',
-                    content: `${goalRecallBlock}` +
+                    content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `${goalRecallBlock}` +
                         `You have executed ${totalToolsExecuted} tool calls this turn — the per-turn cap (${maxTotalTools}) has been reached. Stop calling tools. Produce a final answer with three short sections:\n` +
                         '\n' +
                         wrapUpBody +
@@ -813,7 +747,7 @@ class ToolUseLoop {
                     break;
                 }
                 catch (error) {
-                    if (nativeTools && nativeToolFailureFallback && !nativeFallbackUsed && isRetryableLlmError(error) && !signal?.aborted) {
+                    if (nativeTools && nativeToolFailureFallback && !nativeFallbackUsed && (0, loopShared_1.isRetryableLlmError)(error) && !signal?.aborted) {
                         nativeFallbackUsed = true;
                         nativeTools = false;
                         nativeSchemas = undefined;
@@ -839,7 +773,7 @@ class ToolUseLoop {
                         // visible markup.
                         messages.push({
                             role: 'user',
-                            content: `[Provider error mid-turn — tool channel switched.] The previous attempt failed with: ${summarizeLlmError(error)}. ` +
+                            content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `[Provider error mid-turn — tool channel switched.] The previous attempt failed with: ${(0, loopShared_1.summarizeLlmError)(error)}. ` +
                                 `I retried with the text-based tool-call channel. ` +
                                 `Re-emit your pending action using the text envelope: ` +
                                 `<tool_call>{"name":"...","params":{...}}</tool_call> outside of any reasoning block. ` +
@@ -848,7 +782,7 @@ class ToolUseLoop {
                         });
                         emit('tool_loop:native_tool_fallback', {
                             iteration: iterations,
-                            reason: summarizeLlmError(error)
+                            reason: (0, loopShared_1.summarizeLlmError)(error)
                         });
                         continue;
                     }
@@ -861,13 +795,13 @@ class ToolUseLoop {
                     // this attempt, any further failure on text is genuinely
                     // terminal — the user has been waiting > 30 s and a clean
                     // error is more helpful than another silent retry.
-                    if (nativeFallbackUsed && !textFallbackRetryUsed && isRetryableLlmError(error) && !signal?.aborted) {
+                    if (nativeFallbackUsed && !textFallbackRetryUsed && (0, loopShared_1.isRetryableLlmError)(error) && !signal?.aborted) {
                         textFallbackRetryUsed = true;
                         emit('tool_loop:text_fallback_retry', {
                             iteration: iterations,
-                            reason: summarizeLlmError(error)
+                            reason: (0, loopShared_1.summarizeLlmError)(error)
                         });
-                        await sleep(2400);
+                        await (0, loopShared_1.sleep)(2400);
                         continue;
                     }
                     // Last-resort final-anchor retry. By this point we've spent
@@ -884,21 +818,21 @@ class ToolUseLoop {
                     if (!finalAnchorRetryUsed
                         && textFallbackRetryUsed
                         && originalGoal.trim().length > 0
-                        && isRetryableLlmError(error)
+                        && (0, loopShared_1.isRetryableLlmError)(error)
                         && !signal?.aborted) {
                         finalAnchorRetryUsed = true;
                         messages.push({
                             role: 'user',
-                            content: `[Recovery attempt — previous channel attempts hit ${summarizeLlmError(error)}. ` +
+                            content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `[Recovery attempt — previous channel attempts hit ${(0, loopShared_1.summarizeLlmError)(error)}. ` +
                                 `Discarding any partial tool_call or reasoning state from those attempts. ` +
                                 `Original user goal restated as a fresh anchor:]\n\n${originalGoal.trim()}`
                         });
                         emit('tool_loop:final_anchor_retry', {
                             iteration: iterations,
-                            reason: summarizeLlmError(error),
+                            reason: (0, loopShared_1.summarizeLlmError)(error),
                             goalPreview: originalGoal.slice(0, 120)
                         });
-                        await sleep(3600);
+                        await (0, loopShared_1.sleep)(3600);
                         continue;
                     }
                     throw error;
@@ -928,7 +862,7 @@ class ToolUseLoop {
             // have their own caps, but they chain — thinking-off recovery
             // resets consecutiveEmptyRetries=0, parse-retry has its own
             // counter, and the model can move between failure modes faster
-            // than any one detector can give up. Mark Portfolio session
+            // than any one detector can give up. Real CLI session
             // 2026-05-26 turn-02-30-37: 6 sequential reasoning-only
             // responses inside one iteration before the loop terminated
             // silently. This counter increments on EVERY response without
@@ -969,9 +903,9 @@ class ToolUseLoop {
                 // Also reset the prefill-recovery one-shot. The recovery budget
                 // is "per stretch of failures," not "once per turn" — without
                 // this reset, a long refactor that recovers from one prefill
-                // stall and then hits another (Mark, gregoryhite-site
-                // 2026-06-02T23-56-38: 26 iterations, prefill burned at iter 25,
-                // iter 26 stalled again with no recovery left) falls straight
+                // stall and then hits another (observed in a real run: 26
+                // iterations, prefill burned at iter 25, iter 26 stalled again
+                // with no recovery left) falls straight
                 // through to the terminal "Bandit stalled" fallback even though
                 // every other detector still has budget. The hard cap on
                 // noToolCallAttemptsThisTurn (5) bounds the total stuck
@@ -1003,7 +937,7 @@ class ToolUseLoop {
                 messages.push({ role: 'assistant', content: scrubbed });
                 messages.push({
                     role: 'user',
-                    content: 'You emitted a `<tool_result>` envelope in your response. Those envelopes are SYSTEM output — they appear BETWEEN your turns, never inside your own message. If you meant to invoke a tool, emit a single `<tool_call>{"name":"...","params":{...}}</tool_call>` and wait for the real result. If the task is complete, give a plain-prose final answer with no XML envelopes. Retry now.'
+                    content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'You emitted a `<tool_result>` envelope in your response. Those envelopes are SYSTEM output — they appear BETWEEN your turns, never inside your own message. If you meant to invoke a tool, emit a single `<tool_call>{"name":"...","params":{...}}</tool_call>` and wait for the real result. If the task is complete, give a plain-prose final answer with no XML envelopes. Retry now.'
                 });
                 continue;
             }
@@ -1034,7 +968,7 @@ class ToolUseLoop {
                 messages.push({ role: 'assistant', content: scrubbed });
                 messages.push({
                     role: 'user',
-                    content: 'You emitted ` ```bandit-tl` (or `bandit-run` / `bandit-subagent`) fenced JSON in your response. Those fences are emitted by the EXTENSION HOST to log real tool execution — you CANNOT produce them. They show up in your context because the host logged actual tool calls, not because you can fabricate them. To actually run a tool, emit `<tool_call>{"name":"...","params":{...}}</tool_call>` and wait for the real result. Your fake fences mean NO work has happened this turn. You have TWO options for your retry, and ONLY two: (a) Emit a real `<tool_call>{"name":"...","params":{...}}</tool_call>` envelope NOW to actually do the work, then wait for the real result. (b) Honestly state "I have not [action] yet" and STOP. Do NOT claim completion. You MUST NOT claim you have fixed / eliminated / resolved / removed / cleaned / verified anything. No "successfully [verb]" phrasing. No numbered lists of "Step 1: I did X" actions. No "the project is now in a healthy state." Until a real `<tool_call>` lands on disk and returns a real tool-result, nothing has changed. Lying about completion is the worst failure mode. Retry now.'
+                    content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'You emitted ` ```bandit-tl` (or `bandit-run` / `bandit-subagent`) fenced JSON in your response. Those fences are emitted by the EXTENSION HOST to log real tool execution — you CANNOT produce them. They show up in your context because the host logged actual tool calls, not because you can fabricate them. To actually run a tool, emit `<tool_call>{"name":"...","params":{...}}</tool_call>` and wait for the real result. Your fake fences mean NO work has happened this turn. You have TWO options for your retry, and ONLY two: (a) Emit a real `<tool_call>{"name":"...","params":{...}}</tool_call>` envelope NOW to actually do the work, then wait for the real result. (b) Honestly state "I have not [action] yet" and STOP. Do NOT claim completion. You MUST NOT claim you have fixed / eliminated / resolved / removed / cleaned / verified anything. No "successfully [verb]" phrasing. No numbered lists of "Step 1: I did X" actions. No "the project is now in a healthy state." Until a real `<tool_call>` lands on disk and returns a real tool-result, nothing has changed. Lying about completion is the worst failure mode. Retry now.'
                 });
                 continue;
             }
@@ -1056,7 +990,10 @@ class ToolUseLoop {
                 && !(0, tool_use_parser_1.hasToolCalls)(response)
                 && toolAbsenceCorrectionsFired < TOOL_ABSENCE_CORRECTION_CAP) {
                 const registeredNames = this.registry.getAll().map((t) => t.name);
-                const absence = (0, toolAvailabilityDetector_1.detectFalseToolAbsence)(response, registeredNames);
+                // Reasoning channels MUST be stripped before prose-matching:
+                // reasoning narrates tool usage by name and false-positives the
+                // absence phrases (see toolAvailabilityDetector.ts header).
+                const absence = (0, toolAvailabilityDetector_1.detectFalseToolAbsence)((0, tool_use_parser_1.stripReasoningChannels)(response), registeredNames);
                 if (absence.detected) {
                     toolAbsenceCorrectionsFired++;
                     emit('tool_loop:false_tool_absence', {
@@ -1088,7 +1025,7 @@ class ToolUseLoop {
                 messages.push({ role: 'assistant', content: response });
                 messages.push({
                     role: 'user',
-                    content: 'The previous tool call returned an error and you produced no follow-up tool_call. ' +
+                    content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'The previous tool call returned an error and you produced no follow-up tool_call. ' +
                         'Do NOT silently abandon the request — the user expects you to either retry with corrected parameters OR state explicitly which precondition failed and why you cannot proceed. ' +
                         'Choose one: (a) emit a corrected `<tool_call>{"name":"...","params":{...}}</tool_call>` now, fixing the param shape or value the error pointed at; ' +
                         '(b) give a one-line final answer naming the exact precondition you lack (e.g. "I cannot trash message X because the message id is unknown — please provide it"). ' +
@@ -1107,12 +1044,12 @@ class ToolUseLoop {
             // without emitting an actual tool_call. Visually the user sees a
             // wall of reasoning text and nothing happens. Strip the reasoning
             // fences before checking emptiness so the same nudge fires.
-            const stripped = response
-                .replace(/<think\b[\s\S]*?<\/think\s*>/gi, '')
-                .replace(/<think\b[\s\S]*$/i, '')
-                .replace(/```bandit-reasoning\b[\s\S]*?```/gi, '')
-                .replace(/```bandit-reasoning\b[\s\S]*$/i, '')
-                .trim();
+            // Strip reasoning channels AND stray fence scaffolding (a bare
+            // leading ``` opener that wraps the reasoning) so the
+            // reasoning-only check isn't fooled into seeing the orphan ``` as
+            // a real answer — which let a "reasoning + no tool call" turn end
+            // with no answer (real CLI run, 2026-06-15).
+            const stripped = (0, tool_use_parser_1.stripToAnswerContent)(response);
             const reasoningOnly = !stripped && response.trim().length > 0;
             // "Narrated but didn't act" detector. Some models (notably ones
             // post-trained for a different tool-call envelope, e.g. OpenAI
@@ -1128,8 +1065,8 @@ class ToolUseLoop {
             // in the model's final clause, not an earlier "I have already
             // searched the file" preamble before a real answer.
             //
-            // Captured 2026-05-25 (Mark, Portfolio IDE session): model emitted
-            // "I'll redesign the portfolio... Let me rewrite both files." with
+            // Captured 2026-05-25 (real IDE session): model emitted
+            // "I'll redesign the page... Let me rewrite both files." with
             // NO tool_call and the turn closed as a final answer because
             // neither `redesign` nor `rewrite` was on the list. A long
             // session ended with zero work shipped. Missing a verb here =
@@ -1137,10 +1074,13 @@ class ToolUseLoop {
             const NARRATE_VERB_RE = /\b(use|uses|used|using|call|calls|called|calling|invoke|invokes|invoked|invoking|execute|executes|executed|executing|run|runs|running|ran|search|searches|searched|searching|look|looks|looked|looking|read|reads|reading|check|checks|checked|checking|find|finds|finding|found|list|lists|listed|listing|fetch|fetches|fetched|fetching|grep|greps|grepped|grepping|explore|explores|explored|exploring|locate|locates|located|locating|plan|plans|planned|planning|start|starts|started|starting|begin|begins|began|beginning|create|creates|created|creating|write|writes|wrote|writing|rewrite|rewrites|rewrote|rewriting|rewritten|build|builds|built|building|rebuild|rebuilds|rebuilt|rebuilding|update|updates|updated|updating|implement|implements|implemented|implementing|refactor|refactors|refactored|refactoring|redesign|redesigns|redesigned|redesigning|design|designs|designed|designing|generate|generates|generated|generating|scaffold|scaffolds|scaffolded|scaffolding|set\s+up|setting\s+up|tackle|tackles|tackled|tackling|do|does|did|doing|make|makes|made|making|batch|batches|batched|batching|execute|prepare|prepares|prepared|preparing|draft|drafts|drafted|drafting|outline|outlines|outlined|outlining|organize|organizes|organized|organizing|structure|structures|structured|structuring|kick\s+off|kicking\s+off|fix|fixes|fixed|fixing|edit|edits|edited|editing|modify|modifies|modified|modifying|patch|patches|patched|patching|adjust|adjusts|adjusted|adjusting|replace|replaces|replaced|replacing|swap|swaps|swapped|swapping|polish|polishes|polished|polishing|clean\s+up|cleaning\s+up|tidy|tidies|tidied|tidying|finalize|finalizes|finalized|finalizing|finish|finishes|finished|finishing|complete|completes|completed|completing|wire|wires|wired|wiring|hook|hooks|hooked|hooking|render|renders|rendered|rendering|style|styles|styled|styling|theme|themes|themed|theming|redo|redoes|redid|redoing|port|ports|ported|porting|migrate|migrates|migrated|migrating|configure|configures|configured|configuring|install|installs|installed|installing|remove|removes|removed|removing|delete|deletes|deleted|deleting|rename|renames|renamed|renaming)\b/i;
             const NARRATE_INTENT_RE = /\b(we (?:will|need to|should)|we'?ll|we'?re going to|i'?ll|i will|let me|let'?s|going to|i'?m going to|i need to)\b/i;
             // Real code fences pass through; narrate only fires when the
-            // model emitted no structured payload at all. Check the STRIPPED
-            // response, not the raw one — `bandit-reasoning` fences are
-            // reasoning, not structured output.
-            const hasCodeFence = /```[a-zA-Z0-9_-]*\s*\n/.test(stripped);
+            // model emitted no structured payload at all. Use the
+            // reasoning-stripped response (NOT `stripped`, which also removes
+            // bare fence-marker lines) so a genuine ```json / ```diff payload
+            // still suppresses the narrate nudge and reaches its own
+            // auto-promote detector. `bandit-reasoning` fences are reasoning,
+            // not structured output, so they're excluded either way.
+            const hasCodeFence = /```[a-zA-Z0-9_-]*\s*\n/.test((0, tool_use_parser_1.stripReasoningChannels)(response));
             const tailMatch = stripped.match(/(?:[.!?]\s+)([^.!?]*)$/);
             const tail = (tailMatch ? tailMatch[1] : stripped).slice(-200);
             const narratedButNoAction = !(0, tool_use_parser_1.hasToolCalls)(response) &&
@@ -1149,6 +1089,22 @@ class ToolUseLoop {
                 stripped.length < 240 &&
                 NARRATE_INTENT_RE.test(tail) &&
                 NARRATE_VERB_RE.test(tail);
+            // Performative narrated call: "I call read_file with path=README.md".
+            // The generic gate above caps stripped.length at 240 to avoid false
+            // positives on real answers that merely contain narrate verbs — but
+            // when the final clause NAMES A REGISTERED TOOL in a performative
+            // phrase, the length cap is wrong: a long planning recap that ends
+            // "I call read_file with path=…" is a stall no matter how long the
+            // recap is, and tool-name anchoring keeps the false-positive rate
+            // near zero. Captured 2026-06-12 (real CLI session,
+            // gemma4:e4b): iteration 1 emitted a reasoning recap ending with
+            // exactly that sentence and no tool_call — the generic gate missed
+            // it (over the length cap; intent list lacks present-tense "I
+            // call") and the turn closed as a final answer.
+            const narratedCallMatch = stripped.slice(-300).match(/\b(?:i\s+(?:will\s+|now\s+|then\s+)?(?:call|invoke|run|use)|calling|invoking|let'?s\s+(?:call|run|use))\s+(?:the\s+)?`?([a-z][a-z0-9_]*)`?/i);
+            const narratedToolCallNoAction = !(0, tool_use_parser_1.hasToolCalls)(response) &&
+                !!narratedCallMatch &&
+                registeredToolNames.has(narratedCallMatch[1].toLowerCase());
             // Empty-response retry: was previously gated to `iterations > 0`
             // under the assumption "empty first response = provider outage."
             // That assumption was wrong — with bandit-logic
@@ -1160,7 +1116,7 @@ class ToolUseLoop {
             // the model gets a second chance (and the thinking-off recovery
             // below can flip it to non-thinking mode if the second pass also
             // empties).
-            const shouldNudge = (!response.trim() || reasoningOnly || narratedButNoAction) &&
+            const shouldNudge = (!response.trim() || reasoningOnly || narratedButNoAction || narratedToolCallNoAction) &&
                 !hitLimit &&
                 consecutiveEmptyRetries < 2 &&
                 !thinkingOffRecoveryAttempted;
@@ -1170,16 +1126,17 @@ class ToolUseLoop {
                     iteration: iterations,
                     attempt: consecutiveEmptyRetries,
                     reasoningOnly,
-                    narratedButNoAction
+                    narratedButNoAction,
+                    narratedToolCallNoAction
                 });
-                const nudgeMessage = narratedButNoAction
+                const nudgeMessage = (narratedButNoAction || narratedToolCallNoAction)
                     ? 'You announced your next step in prose ("we will search…" / "let me check…" / "use X to find Y") but did NOT emit a `<tool_call>` envelope. Announcing intent is not enough — you must actually invoke the tool. Emit the call now in this exact format, OUTSIDE of any reasoning block, with NO commentary and NO markdown fence:\n\n<tool_call>{"name":"<tool>","params":{"<key>":"<value>"}}</tool_call>\n\nReplace name/params with the right values for your task. Or, if the task is already answerable from what you know, give a final answer instead.'
                     : reasoningOnly
                         ? 'You completed reasoning but emitted no tool_call AND no final answer. The reasoning text alone does not run a tool — you must emit a `<tool_call>` envelope OUTSIDE the reasoning block. Format example (replace name/params for your task):\n\n<tool_call>{"name":"<tool>","params":{"<key>":"<value>"}}</tool_call>\n\nNo prose around it, no markdown fence, just the bare tag. If the task is answerable without a tool, write a complete final answer instead. Do not stop after only thinking.'
                         : 'Your previous response was empty. Either emit a `<tool_call>{"name":"<tool>","params":{...}}</tool_call>` to invoke a tool, OR produce a complete final answer using what you have. Do not respond with an empty message.';
                 messages.push({
                     role: 'user',
-                    content: nudgeMessage
+                    content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + nudgeMessage
                 });
                 continue;
             }
@@ -1212,7 +1169,7 @@ class ToolUseLoop {
                 });
                 messages.push({
                     role: 'user',
-                    content: 'Switching to non-thinking mode for this attempt because reasoning-only retries exhausted. Emit either a tool_call or a complete final answer. No more reasoning preamble.'
+                    content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Switching to non-thinking mode for this attempt because reasoning-only retries exhausted. Emit either a tool_call or a complete final answer. No more reasoning preamble.'
                 });
                 continue;
             }
@@ -1260,8 +1217,8 @@ class ToolUseLoop {
                 messages.push({
                     role: 'user',
                     content: firstRetry
-                        ? 'Your previous tool_call was not valid JSON — I could not parse it. Common cause: unescaped `"` characters inside a string value (for example `["", "", ""]` inside a `content` string). Retry the tool call with properly escaped JSON: every `"` inside a string value must be written as `\\"`, and every newline as `\\n`. If the content is very long, consider `replace_range` for a line-numbered block or breaking the change into smaller edits.'
-                        : 'Your tool_call still did not parse. Do NOT retry with the same shape or the same escaping failure. Switch tactics: (a) call `replace_range` for a large block whose line numbers you just read, (b) call `write_file` for a new file, or (c) split the change into multiple small `apply_edit` calls that each target just one method or block (e.g. 3-5 lines of `find`, 5-10 lines of `replace`) instead of rewriting the whole class. Pick the smallest scope that accomplishes the next step. If you cannot produce a valid tool call, respond with a plain-prose final answer acknowledging you could not complete the edit.'
+                        ? tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Your previous tool_call was not valid JSON — I could not parse it. Common cause: unescaped `"` characters inside a string value (for example `["", "", ""]` inside a `content` string). Retry the tool call with properly escaped JSON: every `"` inside a string value must be written as `\\"`, and every newline as `\\n`. If the content is very long, consider `replace_range` for a line-numbered block or breaking the change into smaller edits.'
+                        : tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Your tool_call still did not parse. Do NOT retry with the same shape or the same escaping failure. Switch tactics: (a) call `replace_range` for a large block whose line numbers you just read, (b) call `write_file` for a new file, or (c) split the change into multiple small `apply_edit` calls that each target just one method or block (e.g. 3-5 lines of `find`, 5-10 lines of `replace`) instead of rewriting the whole class. Pick the smallest scope that accomplishes the next step. If you cannot produce a valid tool call, respond with a plain-prose final answer acknowledging you could not complete the edit.'
                 });
                 continue;
             }
@@ -1275,7 +1232,7 @@ class ToolUseLoop {
             if (!hitLimit && !(0, tool_use_parser_1.hasToolCalls)(response)) {
                 const normalized = response.toLowerCase().replace(/\s+/g, ' ').trim();
                 const prior = recentNonToolResponses[recentNonToolResponses.length - 1];
-                const looksLikeLoop = !!prior && (() => {
+                const looksLikeLoop = Boolean(prior) && (() => {
                     // Cheap similarity: longest common prefix / max length. If two
                     // consecutive no-tool responses share >60% of their text by
                     // prefix the model is repeating itself. More sophisticated
@@ -1284,8 +1241,9 @@ class ToolUseLoop {
                     const short = prior.length < normalized.length ? prior : normalized;
                     const long = prior.length < normalized.length ? normalized : prior;
                     let matched = 0;
-                    while (matched < short.length && short[matched] === long[matched])
+                    while (matched < short.length && short[matched] === long[matched]) {
                         matched++;
+                    }
                     return matched / short.length > 0.6;
                 })();
                 // Also flag the self-contradiction signature from the real
@@ -1306,7 +1264,7 @@ class ToolUseLoop {
                     });
                     messages.push({
                         role: 'user',
-                        content: 'STOP deliberating. Your last response either repeated itself, contradicted itself (e.g. "Wait, I see X / Actually I\'ll try X"), or was aborted mid-stream as a loop. Do NOT continue speculating about what files might exist. Take exactly one of these actions now: (a) invoke a tool (`list_files`, `read_file`, `search_code`, etc.) to answer the question with real data, OR (b) give up and tell the user plainly that you could not complete the task and why. Do not write more than two sentences of prose before either calling a tool or terminating.'
+                        content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'STOP deliberating. Your last response either repeated itself, contradicted itself (e.g. "Wait, I see X / Actually I\'ll try X"), or was aborted mid-stream as a loop. Do NOT continue speculating about what files might exist. Take exactly one of these actions now: (a) invoke a tool (`list_files`, `read_file`, `search_code`, etc.) to answer the question with real data, OR (b) give up and tell the user plainly that you could not complete the task and why. Do not write more than two sentences of prose before either calling a tool or terminating.'
                     });
                     recentNonToolResponses.length = 0;
                     continue;
@@ -1396,7 +1354,7 @@ class ToolUseLoop {
                                 // without being so loud that it derails prose responses.
                                 messages.push({
                                     role: 'user',
-                                    content: 'Note: I detected a JSON todo list in your response and auto-promoted it to a todo_write call. Next time, emit `<tool_call>{"name":"todo_write","params":{"items":"..."}}</tool_call>` directly instead of pasting JSON as a code block — pasted JSON does not update your plan, only the tool call does.'
+                                    content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Note: I detected a JSON todo list in your response and auto-promoted it to a todo_write call. Next time, emit `<tool_call>{"name":"todo_write","params":{"items":"..."}}</tool_call>` directly instead of pasting JSON as a code block — pasted JSON does not update your plan, only the tool call does.'
                                 });
                                 iterations++;
                                 continue;
@@ -1424,7 +1382,26 @@ class ToolUseLoop {
                         responsePreview: response.slice(0, 300)
                     });
                 }
-                const finalResponse = (0, tool_use_parser_1.stripToolCallMarkup)(response).trim();
+                // Reasoning channels are streamed live by the host for display —
+                // leaving them in the terminal answer double-renders them, and on
+                // fabrication-retry exhaustion it prints the model's confusion
+                // narrative as if it were the answer (real CLI run,
+                // 2026-06-12T20-19 turn: three near-identical "the user is
+                // correcting my formatting error" reasoning blocks rendered above
+                // the real answer). The stall fallback below still inspects the
+                // raw `response`, so reasoning-only turns keep their fallback.
+                // ORDER MATTERS: reasoning channels strip FIRST. Reasoning text
+                // routinely MENTIONS envelopes in backticks ("I included a
+                // `<tool_result>` envelope…"); if markup stripping ran first, its
+                // envelope regex would match from that in-fence mention through
+                // to the real closing tag, eat the fence's closing ``` along the
+                // way, and the unclosed-fence cleanup would then wipe the entire
+                // rest of the answer.
+                const finalResponse = (0, tool_use_parser_1.stripToolCallMarkup)(response
+                    .replace(/<think\b[\s\S]*?<\/think\s*>/gi, '')
+                    .replace(/<think\b[\s\S]*$/i, '')
+                    .replace(/```bandit-reasoning\b[\s\S]*?```/gi, '')
+                    .replace(/```bandit-reasoning\b[\s\S]*$/i, '')).trim();
                 // False-completion detector. Small models regularly end a turn
                 // with "I refactored the file" / "here is the updated code" text
                 // without ever emitting a file-edit tool call.
@@ -1434,14 +1411,25 @@ class ToolUseLoop {
                 // this turn, push one corrective user message into the loop
                 // and continue for one more iteration. The nudge is capped at
                 // one per turn so a truly confused model can still terminate.
-                if (!hitLimit && !falseCompletionNudged && editToolsInvoked === 0) {
+                //
+                // ONLY fires when the goal actually implies an edit. Without this
+                // gate the detector demanded an edit on a purely informational
+                // "tell me about this repo" turn: the model correctly said "I have
+                // completed the overview" (a completion phrase), no edit ran
+                // (none was asked for), so the nudge fired and replaced the good
+                // markdown overview with a defensive "no edits are required"
+                // answer — plus a wall of "automated harness check" reasoning.
+                // An analysis goal that does NOT also imply an edit can never
+                // false-complete, so skip it. (real CLI run, 2026-06-12.)
+                const goalCouldExpectEdit = promptImpliesFileEdit || !promptWantsAnalysis;
+                if (!hitLimit && !falseCompletionNudged && editToolsInvoked === 0 && goalCouldExpectEdit) {
                     const claimsCompletion = FALSE_COMPLETION_PATTERNS.some(re => re.test(finalResponse));
                     if (claimsCompletion) {
                         falseCompletionNudged = true;
                         emit('tool_loop:false_completion_nudge', { iteration: iterations, responsePreview: finalResponse.slice(0, 200) });
                         messages.push({
                             role: 'user',
-                            content: 'Your response either claims work is done OR apologizes and asks what to do next — but I see NO successful `write_file`, `apply_edit`, `replace_range`, or `apply_patch` tool call in this turn, so nothing on disk has changed. ' +
+                            content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Your response either claims work is done OR apologizes and asks what to do next — but I see NO successful `write_file`, `apply_edit`, `replace_range`, or `apply_patch` tool call in this turn, so nothing on disk has changed. ' +
                                 'Do NOT ask the user which task to resume, do NOT promise to escape JSON "in your next tool call", and do NOT defer. Either (a) emit a real edit tool call NOW with the actual change — use `replace_range` for a large block whose line numbers you just read, `apply_edit` for a small exact replacement, or `write_file` for a new file — or (b) respond honestly that you could not complete the task and briefly explain why. Retry the tool call yourself; the user cannot help you escape JSON.'
                         });
                         continue;
@@ -1480,7 +1468,7 @@ class ToolUseLoop {
                         });
                         messages.push({
                             role: 'user',
-                            content: `Your response describes edits to ${fileSet.size} files (${[...fileSet].slice(0, 8).join(', ')}${fileSet.size > 8 ? ', …' : ''}), but only ${editToolsInvoked} successful edit${editToolsInvoked === 1 ? '' : 's'} actually fired this turn. ` +
+                            content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `Your response describes edits to ${fileSet.size} files (${[...fileSet].slice(0, 8).join(', ')}${fileSet.size > 8 ? ', …' : ''}), but only ${editToolsInvoked} successful edit${editToolsInvoked === 1 ? '' : 's'} actually fired this turn. ` +
                                 `The remaining ${fileSet.size - editToolsInvoked} file(s) were NOT modified — nothing landed on disk for them. ` +
                                 'Either (a) emit the missing `apply_edit` / `replace_range` / `write_file` tool calls now to actually do the work, OR (b) revise your response to honestly describe ONLY the edits that successfully applied. Do not summarize work that did not happen.'
                         });
@@ -1491,7 +1479,7 @@ class ToolUseLoop {
                 // ("break out", "split", "refactor", "extract", "move") imply
                 // mutation of the SOURCE file the user wants restructured, not
                 // just creation of new sibling files. Failure mode observed
-                // 2026-05-25 on a Portfolio React refactor: model read App.jsx,
+                // 2026-05-25 on a local React refactor: model read App.jsx,
                 // wrote 5 new component files, never touched App.jsx, declared
                 // completion. User had to follow up "are we using these?" to
                 // force the integration step — and even that follow-up turn
@@ -1523,7 +1511,7 @@ class ToolUseLoop {
                         const writeCount = filesWrittenThisTurn.size;
                         messages.push({
                             role: 'user',
-                            content: `The user's goal contains a refactor verb (refactor/break out/split/extract/move) which implies the SOURCE file(s) should be modified, not just supplemented with new siblings. You read ${readPreview}${readNotWritten.length > 3 ? ' and others' : ''} for context, then wrote ${writeCount} NEW file(s), but you NEVER modified the file(s) you read. The refactor is incomplete: the source file still contains the old monolithic code. ` +
+                            content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `The user's goal contains a refactor verb (refactor/break out/split/extract/move) which implies the SOURCE file(s) should be modified, not just supplemented with new siblings. You read ${readPreview}${readNotWritten.length > 3 ? ' and others' : ''} for context, then wrote ${writeCount} NEW file(s), but you NEVER modified the file(s) you read. The refactor is incomplete: the source file still contains the old monolithic code. ` +
                                 `Emit the missing apply_edit/replace_range/write_file call on the source file now — it should import from the new files and drop the inlined code that's been extracted. If the refactor is genuinely a "scaffold only, leave source untouched" task, say so explicitly and explain why the source doesn't need to change.`
                         });
                         continue;
@@ -1552,8 +1540,9 @@ class ToolUseLoop {
                     let match;
                     while ((match = fenceRe.exec(finalResponse)) !== null) {
                         const nonEmpty = match[1].split('\n').filter(l => l.trim().length > 0).length;
-                        if (nonEmpty > biggestFenceLines)
+                        if (nonEmpty > biggestFenceLines) {
                             biggestFenceLines = nonEmpty;
+                        }
                     }
                     if (biggestFenceLines >= MIN_LINES) {
                         codeFenceHallucinationNudged = true;
@@ -1564,7 +1553,7 @@ class ToolUseLoop {
                         });
                         messages.push({
                             role: 'user',
-                            content: 'You produced a substantial code block in your reply but never emitted a `write_file`, `apply_edit`, `replace_range`, or `apply_patch` tool call — so the change is NOT on disk. ' +
+                            content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'You produced a substantial code block in your reply but never emitted a `write_file`, `apply_edit`, `replace_range`, or `apply_patch` tool call — so the change is NOT on disk. ' +
                                 'Do not ask the user to paste your code into a file themselves. Take exactly one of these actions now: (a) call `replace_range`, `apply_edit`, or `write_file` with the real change to the correct file, OR (b) say plainly that you could not locate the target file and explain what you searched for. Do not wrap up with another prose + code-fence response.'
                         });
                         continue;
@@ -1648,7 +1637,7 @@ class ToolUseLoop {
                     });
                     messages.push({
                         role: 'user',
-                        content: 'Your first response had reasoning but emitted NO tool call — that is a hard stall for a subagent (you exist to gather information; reasoning alone produces zero output). ' +
+                        content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Your first response had reasoning but emitted NO tool call — that is a hard stall for a subagent (you exist to gather information; reasoning alone produces zero output). ' +
                             'For your next response, emit a tool call. The minimum viable starting move for ANY exploration goal is:\n\n' +
                             '<tool_call>{"name":"list_files","params":{"path":"."}}</tool_call>\n\n' +
                             'Copy that exact envelope as the very first thing you emit (you may keep the reasoning block before it if your model needs to think first, but the tool_call envelope MUST appear in this turn). ' +
@@ -1670,7 +1659,7 @@ class ToolUseLoop {
                 // user saw nothing.
                 //
                 // The gate also covers the "regurgitated reasoning after
-                // native→text channel fallback" case. Mark Portfolio
+                // native→text channel fallback" case. Real CLI
                 // 2026-05-31T17-39-53 cleanup turn: native-tool path 500'd,
                 // text-channel recovery prompted the model to re-emit its
                 // pending action, but the model just echoed its prior
@@ -1682,12 +1671,7 @@ class ToolUseLoop {
                 // before testing emptiness — if the response would render to
                 // the user as nothing-actionable, the fallback fires and the
                 // user sees what the model was thinking instead of silence.
-                const reasoningStripped = response
-                    .replace(/<think\b[\s\S]*?<\/think\s*>/gi, '')
-                    .replace(/<think\b[\s\S]*$/i, '')
-                    .replace(/```bandit-reasoning\b[\s\S]*?```/gi, '')
-                    .replace(/```bandit-reasoning\b[\s\S]*$/i, '')
-                    .trim();
+                const reasoningStripped = (0, tool_use_parser_1.stripToAnswerContent)(response);
                 const visibleAfterStrip = (0, tool_use_parser_1.stripToolCallMarkup)(reasoningStripped).trim();
                 if (!visibleAfterStrip) {
                     // Pull the last 1-2 sentences of reasoning so the user sees
@@ -1713,7 +1697,7 @@ class ToolUseLoop {
                 // and the inline empty-retry / narrate-no-action detector
                 // already used its retry budget (consecutiveEmptyRetries >= 2)
                 // so it couldn't nudge again, the user is left reading a
-                // promise the model never kept. Mark Portfolio
+                // promise the model never kept. Real CLI
                 // 2026-05-31T17-39-53 cleanup turn: after a native→text channel
                 // recovery, the model emitted "Let me revert it:" with a
                 // dangling colon and no tool call; the user saw the prose end
@@ -1730,13 +1714,13 @@ class ToolUseLoop {
                 // The trailing colon + intent phrase combination is the
                 // smoking gun. We DON'T also require NARRATE_VERB_RE here:
                 // the existing inline detector's verb list misses "revert"
-                // (Portfolio 2026-05-31) and would miss any other one-off
+                // (real run 2026-05-31) and would miss any other one-off
                 // action verb a model might use. The colon alone is rare
                 // enough in a legit final answer that pairing it with
                 // "let me" / "I'll" / "we'll" / etc. is specific enough.
                 //
-                // Period-terminated variant (added 2026-06-03 after Mark's
-                // gregoryhite-site run): the model ended with "Let me fix
+                // Period-terminated variant (added 2026-06-03 after a real
+                // run): the model ended with "Let me fix
                 // all three project cards at once." — full sentence, full
                 // stop, no colon. Both prefill and thinking-off recovery
                 // had been spent earlier in the turn so the user saw the
@@ -1856,7 +1840,7 @@ class ToolUseLoop {
                 toolCalls = [];
                 messages.push({
                     role: 'user',
-                    content: `You have revised the plan in ${consecutiveTodoOnlyIterations + 1} consecutive iterations without executing any step. ` +
+                    content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `You have revised the plan in ${consecutiveTodoOnlyIterations + 1} consecutive iterations without executing any step. ` +
                         'Execute the first pending task now using a concrete tool — `search_code`, `read_file`, `apply_edit`, `replace_range`, `write_file`, or `run_command`. ' +
                         'Once a task is actually DONE (tool call succeeded), you may call `todo_write` again to mark it completed — but not to re-plan. ' +
                         'If you cannot identify a next step, respond to the user with a short honest explanation and stop.'
@@ -1881,7 +1865,7 @@ class ToolUseLoop {
                 });
                 messages.push({
                     role: 'user',
-                    content: `You have spent ${consecutiveApplyEditOnlyIterations} consecutive iterations on apply_edit alone. ` +
+                    content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `You have spent ${consecutiveApplyEditOnlyIterations} consecutive iterations on apply_edit alone. ` +
                         'If these are mechanical fixes of the same shape (one type annotation, one rename, one import path, one missing semicolon per call), STOP doing them one at a time — you will exhaust the iteration budget before the file is clean.\n' +
                         '\n' +
                         'Better tactics, in order of preference:\n' +
@@ -1995,7 +1979,7 @@ class ToolUseLoop {
                     });
                     messages.push({
                         role: 'user',
-                        content: `You just spawned ${bgSpawns.length} background subagents:\n${goalLines}\n\n` +
+                        content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `You just spawned ${bgSpawns.length} background subagents:\n${goalLines}\n\n` +
                             'Do NOT do those same explorations yourself in the next iteration — the subagents will deliver their synopses via the auto-inject path on a later turn. ' +
                             'Choose ONE of: ' +
                             '(a) work on a different, independent piece of the task that those subagents are NOT covering, ' +
@@ -2036,7 +2020,7 @@ class ToolUseLoop {
                     });
                     messages.push({
                         role: 'user',
-                        content: 'You set up a plan with `todo_write` earlier but have since completed ' +
+                        content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'You set up a plan with `todo_write` earlier but have since completed ' +
                             `${editsSinceLastTodo} edit${editsSinceLastTodo === 1 ? '' : 's'} without updating it. ` +
                             'Call `todo_write` now with the current status — mark finished items as `completed` and leave remaining items as `pending`. ' +
                             "The Plan block in the user's UI mirrors your last `todo_write`, so skipping this leaves them looking at a stale checklist while real work has landed."