@burtson-labs/agent-core 1.6.16 → 1.6.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +2 -0
  2. package/dist/index.d.ts +3 -1
  3. package/dist/index.d.ts.map +1 -1
  4. package/dist/index.js +8 -1
  5. package/dist/index.js.map +1 -1
  6. package/dist/mcp/activation.js +16 -8
  7. package/dist/mcp/activation.js.map +1 -1
  8. package/dist/mcp/clientPool.js +40 -22
  9. package/dist/mcp/clientPool.js.map +1 -1
  10. package/dist/mcp/server.js +16 -10
  11. package/dist/mcp/server.js.map +1 -1
  12. package/dist/mcp/toolAdapter.js +21 -11
  13. package/dist/mcp/toolAdapter.js.map +1 -1
  14. package/dist/providers/deterministic-provider.d.ts +1 -1
  15. package/dist/providers/deterministic-provider.d.ts.map +1 -1
  16. package/dist/runtime/AgentRuntime.d.ts +2 -2
  17. package/dist/runtime/AgentRuntime.d.ts.map +1 -1
  18. package/dist/security/secretPatterns.js +4 -2
  19. package/dist/security/secretPatterns.js.map +1 -1
  20. package/dist/telemetry/otlpExporter.d.ts +69 -0
  21. package/dist/telemetry/otlpExporter.d.ts.map +1 -0
  22. package/dist/telemetry/otlpExporter.js +321 -0
  23. package/dist/telemetry/otlpExporter.js.map +1 -0
  24. package/dist/tools/ask-user-tool.js +8 -4
  25. package/dist/tools/ask-user-tool.js.map +1 -1
  26. package/dist/tools/compactMessages.js +6 -3
  27. package/dist/tools/compactMessages.js.map +1 -1
  28. package/dist/tools/core-tools.js +151 -81
  29. package/dist/tools/core-tools.js.map +1 -1
  30. package/dist/tools/git-tools.js +22 -11
  31. package/dist/tools/git-tools.js.map +1 -1
  32. package/dist/tools/language-adapters.d.ts +1 -1
  33. package/dist/tools/language-adapters.d.ts.map +1 -1
  34. package/dist/tools/language-adapters.js +36 -18
  35. package/dist/tools/language-adapters.js.map +1 -1
  36. package/dist/tools/loop/finalAnswerNudges.js +12 -6
  37. package/dist/tools/loop/finalAnswerNudges.js.map +1 -1
  38. package/dist/tools/loop/goalAnchor.d.ts.map +1 -1
  39. package/dist/tools/loop/goalAnchor.js +2 -1
  40. package/dist/tools/loop/goalAnchor.js.map +1 -1
  41. package/dist/tools/loop/llmStream.js +11 -8
  42. package/dist/tools/loop/llmStream.js.map +1 -1
  43. package/dist/tools/loop/loopShared.d.ts +20 -0
  44. package/dist/tools/loop/loopShared.d.ts.map +1 -0
  45. package/dist/tools/loop/loopShared.js +105 -0
  46. package/dist/tools/loop/loopShared.js.map +1 -0
  47. package/dist/tools/loop/parallelExecute.d.ts +1 -1
  48. package/dist/tools/loop/parallelExecute.js +2 -1
  49. package/dist/tools/loop/parallelExecute.js.map +1 -1
  50. package/dist/tools/loop/singleToolExecute.js +8 -4
  51. package/dist/tools/loop/singleToolExecute.js.map +1 -1
  52. package/dist/tools/loop/turnSetup.js +9 -6
  53. package/dist/tools/loop/turnSetup.js.map +1 -1
  54. package/dist/tools/ocr.d.ts.map +1 -1
  55. package/dist/tools/ocr.js +7 -5
  56. package/dist/tools/ocr.js.map +1 -1
  57. package/dist/tools/post-edit-checks.js +25 -13
  58. package/dist/tools/post-edit-checks.js.map +1 -1
  59. package/dist/tools/skill-loader.d.ts +1 -1
  60. package/dist/tools/skill-loader.d.ts.map +1 -1
  61. package/dist/tools/skill-loader.js +14 -7
  62. package/dist/tools/skill-loader.js.map +1 -1
  63. package/dist/tools/skill-registry.js +2 -1
  64. package/dist/tools/skill-registry.js.map +1 -1
  65. package/dist/tools/skills/mail-search-skill.js +16 -9
  66. package/dist/tools/skills/mail-search-skill.js.map +1 -1
  67. package/dist/tools/skills/plan-skill.js +4 -2
  68. package/dist/tools/skills/plan-skill.js.map +1 -1
  69. package/dist/tools/skills/semantic-search-skill.js +12 -6
  70. package/dist/tools/skills/semantic-search-skill.js.map +1 -1
  71. package/dist/tools/skills/test-gen-skill.js +8 -4
  72. package/dist/tools/skills/test-gen-skill.js.map +1 -1
  73. package/dist/tools/tool-registry.d.ts +17 -0
  74. package/dist/tools/tool-registry.d.ts.map +1 -1
  75. package/dist/tools/tool-registry.js +110 -30
  76. package/dist/tools/tool-registry.js.map +1 -1
  77. package/dist/tools/tool-use-loop.d.ts +16 -8
  78. package/dist/tools/tool-use-loop.d.ts.map +1 -1
  79. package/dist/tools/tool-use-loop.js +144 -160
  80. package/dist/tools/tool-use-loop.js.map +1 -1
  81. package/dist/tools/tool-use-parser.d.ts +33 -0
  82. package/dist/tools/tool-use-parser.d.ts.map +1 -1
  83. package/dist/tools/tool-use-parser.js +105 -28
  84. package/dist/tools/tool-use-parser.js.map +1 -1
  85. package/dist/tools/toolAvailabilityDetector.d.ts +0 -24
  86. package/dist/tools/toolAvailabilityDetector.d.ts.map +1 -1
  87. package/dist/tools/toolAvailabilityDetector.js +26 -12
  88. package/dist/tools/toolAvailabilityDetector.js.map +1 -1
  89. package/dist/tools/unified-patch.js +16 -8
  90. package/dist/tools/unified-patch.js.map +1 -1
  91. package/dist/utils/event-emitter.d.ts +1 -1
  92. package/dist/utils/event-emitter.d.ts.map +1 -1
  93. package/package.json +20 -1
@@ -18,12 +18,7 @@
18
18
  * the host should use the Ollama `tools: [...]` field instead.
19
19
  */
20
20
  Object.defineProperty(exports, "__esModule", { value: true });
21
- exports.ToolUseLoop = void 0;
22
- exports.sleep = sleep;
23
- exports.isRetryableLlmError = isRetryableLlmError;
24
- exports.tagRetryableLlmError = tagRetryableLlmError;
25
- exports.summarizeLlmError = summarizeLlmError;
26
- exports.isContinuationPrompt = isContinuationPrompt;
21
+ exports.ToolUseLoop = exports.isContinuationPrompt = exports.summarizeLlmError = exports.tagRetryableLlmError = exports.isRetryableLlmError = exports.sleep = void 0;
27
22
  exports.isNoticingPrompt = isNoticingPrompt;
28
23
  exports.createToolUseLoop = createToolUseLoop;
29
24
  const tool_use_parser_1 = require("./tool-use-parser");
@@ -36,87 +31,16 @@ const parallelExecute_1 = require("./loop/parallelExecute");
36
31
  const goalAnchor_1 = require("./loop/goalAnchor");
37
32
  const finalAnswerNudges_1 = require("./loop/finalAnswerNudges");
38
33
  const toolAvailabilityDetector_1 = require("./toolAvailabilityDetector");
34
+ const loopShared_1 = require("./loop/loopShared");
35
+ Object.defineProperty(exports, "sleep", { enumerable: true, get: function () { return loopShared_1.sleep; } });
36
+ Object.defineProperty(exports, "isRetryableLlmError", { enumerable: true, get: function () { return loopShared_1.isRetryableLlmError; } });
37
+ Object.defineProperty(exports, "tagRetryableLlmError", { enumerable: true, get: function () { return loopShared_1.tagRetryableLlmError; } });
38
+ Object.defineProperty(exports, "summarizeLlmError", { enumerable: true, get: function () { return loopShared_1.summarizeLlmError; } });
39
+ Object.defineProperty(exports, "isContinuationPrompt", { enumerable: true, get: function () { return loopShared_1.isContinuationPrompt; } });
39
40
  const FILE_EDIT_TOOL_NAMES = new Set(['write_file', 'apply_edit', 'replace_range', 'apply_patch']);
40
41
  function isFileEditTool(name) {
41
42
  return FILE_EDIT_TOOL_NAMES.has(name);
42
43
  }
43
- function sleep(ms) {
44
- return new Promise((resolve) => setTimeout(resolve, ms));
45
- }
46
- function getErrorCode(error) {
47
- return typeof error === 'object' && error !== null && 'code' in error
48
- ? String(error.code ?? '')
49
- : undefined;
50
- }
51
- function getErrorMessage(error) {
52
- return error instanceof Error ? error.message : String(error);
53
- }
54
- function isRetryableLlmError(error) {
55
- const code = getErrorCode(error);
56
- if (code === 'USER_ABORT')
57
- return false;
58
- const message = getErrorMessage(error);
59
- if (/\b429\b|rate limit/i.test(message))
60
- return false;
61
- return (code === 'WATCHDOG' ||
62
- /\b5\d\d\b/.test(message) ||
63
- /Upstream model request failed/i.test(message) ||
64
- /ECONNREFUSED|ECONNRESET|ETIMEDOUT|EAI_AGAIN|socket hang up|fetch failed|network error|terminated|UND_ERR/i.test(message));
65
- }
66
- function tagRetryableLlmError(error) {
67
- if (error instanceof Error) {
68
- const tagged = error;
69
- if (!tagged.code)
70
- tagged.code = 'UPSTREAM_MODEL';
71
- }
72
- }
73
- function summarizeLlmError(error) {
74
- const message = getErrorMessage(error).replace(/\s+/g, ' ').trim();
75
- return message.length > 180 ? `${message.slice(0, 177)}...` : message;
76
- }
77
- /**
78
- * Detects "keep going" / "continue" / "yes" style prompts that
79
- * carry no real goal content. The goal-anchor block uses the most recent
80
- * user message as the recall text; when that text is "good lets keep
81
- * going" the anchor degenerates into "remind yourself to keep going",
82
- * which gives the model nothing to anchor on after 20 iterations of
83
- * drift. Real on a 60-iteration linter-fix
84
- * turn: every anchor injection cited "good lets keep going" as the
85
- * goal. Detector lets callers walk back to a prior substantive prompt
86
- * instead.
87
- *
88
- * Length cap (60 chars) + normalized-phrase match keeps false positives
89
- * down — a sentence like "keep going on the auth refactor for the
90
- * user-service" is longer than 60 chars and reads as a real goal, so it
91
- * stays a goal.
92
- */
93
- const CONTINUATION_PROMPT_PHRASES = new Set([
94
- 'continue', 'keep going', 'go on', 'proceed', 'next', 'more',
95
- 'please continue', 'carry on', 'finish', 'finish it', 'finish up', 'wrap up', 'wrap it up',
96
- 'good', 'great', 'nice', 'cool', 'sweet', 'perfect', 'ok', 'okay', 'k', 'yes', 'y', 'yep', 'yeah', 'ack', 'done',
97
- "let's continue", 'lets continue', "let's keep going", 'lets keep going',
98
- 'good keep going', 'good lets keep going', "good let's keep going",
99
- 'good continue', 'ok continue', 'okay continue'
100
- ]);
101
- function isContinuationPrompt(text) {
102
- const trimmed = text.trim();
103
- if (trimmed.length === 0 || trimmed.length > 60)
104
- return false;
105
- // Normalize: lowercase, drop non-word/space punctuation, collapse whitespace.
106
- const norm = trimmed
107
- .toLowerCase()
108
- .replace(/[^\w\s']/g, ' ')
109
- .replace(/\s+/g, ' ')
110
- .trim();
111
- if (CONTINUATION_PROMPT_PHRASES.has(norm))
112
- return true;
113
- // Permit "please <phrase>" and "<phrase> please" wrappings.
114
- for (const phrase of CONTINUATION_PROMPT_PHRASES) {
115
- if (norm === `please ${phrase}` || norm === `${phrase} please`)
116
- return true;
117
- }
118
- return false;
119
- }
120
44
  /**
121
45
  * "Noticing prompt" detector. Catches user messages that are asking
122
46
  * about state ("are we using these?", "did you update X?", "where's
@@ -124,7 +48,7 @@ function isContinuationPrompt(text) {
124
48
  * work. These signal that the user spotted a gap in the prior turn
125
49
  * and wants the agent to address it — NOT continue the prior plan.
126
50
  *
127
- * Real failure mode captured 2026-05-25 on a Portfolio React refactor:
51
+ * Real failure mode captured 2026-05-25 on a local React refactor:
128
52
  * user asked "I dont think we actually are using these new files are
129
53
  * we?" after the agent wrote data files but never wired them into
130
54
  * App.jsx. Bandit read the question as a generic "keep going" prompt,
@@ -139,8 +63,9 @@ function isContinuationPrompt(text) {
139
63
  */
140
64
  function isNoticingPrompt(text) {
141
65
  const trimmed = (text || '').trim();
142
- if (trimmed.length === 0 || trimmed.length > 220)
66
+ if (trimmed.length === 0 || trimmed.length > 220) {
143
67
  return false;
68
+ }
144
69
  const norm = trimmed.toLowerCase().replace(/[^\w\s'?-]/g, ' ').replace(/\s+/g, ' ').trim();
145
70
  // Stems that introduce a noticing/clarifying question. Anchored to
146
71
  // the start of the message so a paragraph mentioning "are we"
@@ -163,8 +88,9 @@ function isNoticingPrompt(text) {
163
88
  /^wait\b/, // "wait — what about Y?"
164
89
  /^(?:i'?m|am\s+i)\s+(?:missing|seeing|reading)\b/,
165
90
  ];
166
- if (!STEMS.some((re) => re.test(norm)))
91
+ if (!STEMS.some((re) => re.test(norm))) {
167
92
  return false;
93
+ }
168
94
  // Has to contain a question mark OR a concern modal. Lots of false
169
95
  // matches without — e.g. "are we" mid-sentence in a feature request.
170
96
  const hasQuestion = trimmed.includes('?');
@@ -257,10 +183,17 @@ class ToolUseLoop {
257
183
  // explicit "this is a recovery attempt — answer the original goal"
258
184
  // framing succeeds. Last resort before terminal throw.
259
185
  let finalAnchorRetryUsed = false;
260
- const textToolBlock = this.registry.buildSystemPromptBlock();
186
+ const textToolBlock = effectiveOptions.compactToolBlock
187
+ ? this.registry.buildCompactSystemPromptBlock()
188
+ : this.registry.buildSystemPromptBlock();
189
+ // Lowercased registered tool names — used by the narrated-call
190
+ // detector to anchor on "I call <real tool>" with near-zero false
191
+ // positives.
192
+ const registeredToolNames = new Set(this.registry.getAll().map(t => t.name.toLowerCase()));
261
193
  const buildFullSystemPrompt = (useNativeTools) => {
262
- if (useNativeTools)
194
+ if (useNativeTools) {
263
195
  return systemPrompt ?? '';
196
+ }
264
197
  return systemPrompt
265
198
  ? `${systemPrompt}\n\n${textToolBlock}`
266
199
  : textToolBlock;
@@ -278,7 +211,7 @@ class ToolUseLoop {
278
211
  // window and the model can drift to a related-but-different topic.
279
212
  // Walks back through continuation tokens ("keep going", "yes") to
280
213
  // the most recent SUBSTANTIVE prompt. See loop/turnSetup.ts.
281
- let { originalGoal, priorUserPromptCount } = (0, turnSetup_1.resolveTurnGoal)({ seedMessages });
214
+ const { originalGoal, priorUserPromptCount } = (0, turnSetup_1.resolveTurnGoal)({ seedMessages });
282
215
  // Track the iteration we last anchored on rather than a boolean
283
216
  // so we can re-fire when the model pivots AGAIN later in a long
284
217
  // turn. -1 means "never anchored." Re-fire is gated by the
@@ -287,8 +220,9 @@ class ToolUseLoop {
287
220
  // continued without resolution for several more iterations.
288
221
  let lastGoalAnchorIteration = -1;
289
222
  for (const msg of seedMessages) {
290
- if (msg.role === 'system')
223
+ if (msg.role === 'system') {
291
224
  continue;
225
+ }
292
226
  messages.push(msg);
293
227
  }
294
228
  // Noticing-prompt pivot hint. When the most-recent user message
@@ -305,7 +239,7 @@ class ToolUseLoop {
305
239
  });
306
240
  messages.push({
307
241
  role: 'user',
308
- content: '[Reading-comprehension note for the assistant: the user\'s last message above is a noticing / clarifying question — they spotted a possible gap from prior turns and are asking you to confirm or correct, NOT to continue any prior plan. Before you take any new action, identify what gap the question points at and address it directly. If the question is "are we using X?" the correct first move is to verify whether X is actually being used (read the consumer file, grep for the import, check the call site) and answer honestly — yes/no with evidence. Do NOT create more new artifacts unless the user explicitly says to.]'
242
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + '[Reading-comprehension note for the assistant: the user\'s last message above is a noticing / clarifying question — they spotted a possible gap from prior turns and are asking you to confirm or correct, NOT to continue any prior plan. Before you take any new action, identify what gap the question points at and address it directly. If the question is "are we using X?" the correct first move is to verify whether X is actually being used (read the consumer file, grep for the import, check the call site) and answer honestly — yes/no with evidence. Do NOT create more new artifacts unless the user explicitly says to.]'
309
243
  });
310
244
  }
311
245
  let iterations = 0;
@@ -334,7 +268,7 @@ class ToolUseLoop {
334
268
  // recovery, etc.) each have their own caps, but they can chain — a
335
269
  // model can spin through 6+ no-tool-call responses because
336
270
  // thinking-off recovery resets consecutiveEmptyRetries=0. Captured
337
- // 2026-05-26 in Mark's Portfolio session (turn-2026-05-26T02-30-37):
271
+ // 2026-05-26 in a real CLI session (turn-2026-05-26T02-30-37):
338
272
  // model emitted 6 sequential reasoning-only responses inside
339
273
  // iteration 4 before the loop finally terminated with a useless
340
274
  // final answer ("I need to stop wrapping tool calls in reasoning
@@ -687,7 +621,7 @@ class ToolUseLoop {
687
621
  // current pace and burn the extension too.
688
622
  messages.push({
689
623
  role: 'user',
690
- content: `You've been making good progress and the iteration budget has been extended by ${CAP_EXTENSION_SIZE} (new limit: ${max}). Keep going, but tighten up: prefer batched edits over single-line ones, and start wrapping up when you have a complete answer rather than running to the new cap. This is the ${iterationCapExtensions === 1 ? 'first' : 'second'} of at most ${MAX_CAP_EXTENSIONS} extensions for this turn.`
624
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `You've been making good progress and the iteration budget has been extended by ${CAP_EXTENSION_SIZE} (new limit: ${max}). Keep going, but tighten up: prefer batched edits over single-line ones, and start wrapping up when you have a complete answer rather than running to the new cap. This is the ${iterationCapExtensions === 1 ? 'first' : 'second'} of at most ${MAX_CAP_EXTENSIONS} extensions for this turn.`
691
625
  });
692
626
  }
693
627
  else {
@@ -698,7 +632,7 @@ class ToolUseLoop {
698
632
  // vs edit) reflects what the user actually asked for.
699
633
  messages.push({
700
634
  role: 'user',
701
- content: `${goalRecallBlock}` +
635
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `${goalRecallBlock}` +
702
636
  `You have reached the tool-use iteration limit (${max}). Stop calling tools. Produce a final answer with three short sections, in this exact shape:\n` +
703
637
  '\n' +
704
638
  wrapUpBody +
@@ -712,7 +646,7 @@ class ToolUseLoop {
712
646
  emit('tool_loop:total_tool_cap', { iteration: iterations, totalToolsExecuted });
713
647
  messages.push({
714
648
  role: 'user',
715
- content: `${goalRecallBlock}` +
649
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `${goalRecallBlock}` +
716
650
  `You have executed ${totalToolsExecuted} tool calls this turn — the per-turn cap (${maxTotalTools}) has been reached. Stop calling tools. Produce a final answer with three short sections:\n` +
717
651
  '\n' +
718
652
  wrapUpBody +
@@ -813,7 +747,7 @@ class ToolUseLoop {
813
747
  break;
814
748
  }
815
749
  catch (error) {
816
- if (nativeTools && nativeToolFailureFallback && !nativeFallbackUsed && isRetryableLlmError(error) && !signal?.aborted) {
750
+ if (nativeTools && nativeToolFailureFallback && !nativeFallbackUsed && (0, loopShared_1.isRetryableLlmError)(error) && !signal?.aborted) {
817
751
  nativeFallbackUsed = true;
818
752
  nativeTools = false;
819
753
  nativeSchemas = undefined;
@@ -839,7 +773,7 @@ class ToolUseLoop {
839
773
  // visible markup.
840
774
  messages.push({
841
775
  role: 'user',
842
- content: `[Provider error mid-turn — tool channel switched.] The previous attempt failed with: ${summarizeLlmError(error)}. ` +
776
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `[Provider error mid-turn — tool channel switched.] The previous attempt failed with: ${(0, loopShared_1.summarizeLlmError)(error)}. ` +
843
777
  `I retried with the text-based tool-call channel. ` +
844
778
  `Re-emit your pending action using the text envelope: ` +
845
779
  `<tool_call>{"name":"...","params":{...}}</tool_call> outside of any reasoning block. ` +
@@ -848,7 +782,7 @@ class ToolUseLoop {
848
782
  });
849
783
  emit('tool_loop:native_tool_fallback', {
850
784
  iteration: iterations,
851
- reason: summarizeLlmError(error)
785
+ reason: (0, loopShared_1.summarizeLlmError)(error)
852
786
  });
853
787
  continue;
854
788
  }
@@ -861,13 +795,13 @@ class ToolUseLoop {
861
795
  // this attempt, any further failure on text is genuinely
862
796
  // terminal — the user has been waiting > 30 s and a clean
863
797
  // error is more helpful than another silent retry.
864
- if (nativeFallbackUsed && !textFallbackRetryUsed && isRetryableLlmError(error) && !signal?.aborted) {
798
+ if (nativeFallbackUsed && !textFallbackRetryUsed && (0, loopShared_1.isRetryableLlmError)(error) && !signal?.aborted) {
865
799
  textFallbackRetryUsed = true;
866
800
  emit('tool_loop:text_fallback_retry', {
867
801
  iteration: iterations,
868
- reason: summarizeLlmError(error)
802
+ reason: (0, loopShared_1.summarizeLlmError)(error)
869
803
  });
870
- await sleep(2400);
804
+ await (0, loopShared_1.sleep)(2400);
871
805
  continue;
872
806
  }
873
807
  // Last-resort final-anchor retry. By this point we've spent
@@ -884,21 +818,21 @@ class ToolUseLoop {
884
818
  if (!finalAnchorRetryUsed
885
819
  && textFallbackRetryUsed
886
820
  && originalGoal.trim().length > 0
887
- && isRetryableLlmError(error)
821
+ && (0, loopShared_1.isRetryableLlmError)(error)
888
822
  && !signal?.aborted) {
889
823
  finalAnchorRetryUsed = true;
890
824
  messages.push({
891
825
  role: 'user',
892
- content: `[Recovery attempt — previous channel attempts hit ${summarizeLlmError(error)}. ` +
826
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `[Recovery attempt — previous channel attempts hit ${(0, loopShared_1.summarizeLlmError)(error)}. ` +
893
827
  `Discarding any partial tool_call or reasoning state from those attempts. ` +
894
828
  `Original user goal restated as a fresh anchor:]\n\n${originalGoal.trim()}`
895
829
  });
896
830
  emit('tool_loop:final_anchor_retry', {
897
831
  iteration: iterations,
898
- reason: summarizeLlmError(error),
832
+ reason: (0, loopShared_1.summarizeLlmError)(error),
899
833
  goalPreview: originalGoal.slice(0, 120)
900
834
  });
901
- await sleep(3600);
835
+ await (0, loopShared_1.sleep)(3600);
902
836
  continue;
903
837
  }
904
838
  throw error;
@@ -928,7 +862,7 @@ class ToolUseLoop {
928
862
  // have their own caps, but they chain — thinking-off recovery
929
863
  // resets consecutiveEmptyRetries=0, parse-retry has its own
930
864
  // counter, and the model can move between failure modes faster
931
- // than any one detector can give up. Mark Portfolio session
865
+ // than any one detector can give up. Real CLI session
932
866
  // 2026-05-26 turn-02-30-37: 6 sequential reasoning-only
933
867
  // responses inside one iteration before the loop terminated
934
868
  // silently. This counter increments on EVERY response without
@@ -969,9 +903,9 @@ class ToolUseLoop {
969
903
  // Also reset the prefill-recovery one-shot. The recovery budget
970
904
  // is "per stretch of failures," not "once per turn" — without
971
905
  // this reset, a long refactor that recovers from one prefill
972
- // stall and then hits another (Mark, gregoryhite-site
973
- // 2026-06-02T23-56-38: 26 iterations, prefill burned at iter 25,
974
- // iter 26 stalled again with no recovery left) falls straight
906
+ // stall and then hits another (observed in a real run: 26
907
+ // iterations, prefill burned at iter 25, iter 26 stalled again
908
+ // with no recovery left) falls straight
975
909
  // through to the terminal "Bandit stalled" fallback even though
976
910
  // every other detector still has budget. The hard cap on
977
911
  // noToolCallAttemptsThisTurn (5) bounds the total stuck
@@ -1003,7 +937,7 @@ class ToolUseLoop {
1003
937
  messages.push({ role: 'assistant', content: scrubbed });
1004
938
  messages.push({
1005
939
  role: 'user',
1006
- content: 'You emitted a `<tool_result>` envelope in your response. Those envelopes are SYSTEM output — they appear BETWEEN your turns, never inside your own message. If you meant to invoke a tool, emit a single `<tool_call>{"name":"...","params":{...}}</tool_call>` and wait for the real result. If the task is complete, give a plain-prose final answer with no XML envelopes. Retry now.'
940
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'You emitted a `<tool_result>` envelope in your response. Those envelopes are SYSTEM output — they appear BETWEEN your turns, never inside your own message. If you meant to invoke a tool, emit a single `<tool_call>{"name":"...","params":{...}}</tool_call>` and wait for the real result. If the task is complete, give a plain-prose final answer with no XML envelopes. Retry now.'
1007
941
  });
1008
942
  continue;
1009
943
  }
@@ -1034,7 +968,7 @@ class ToolUseLoop {
1034
968
  messages.push({ role: 'assistant', content: scrubbed });
1035
969
  messages.push({
1036
970
  role: 'user',
1037
- content: 'You emitted ` ```bandit-tl` (or `bandit-run` / `bandit-subagent`) fenced JSON in your response. Those fences are emitted by the EXTENSION HOST to log real tool execution — you CANNOT produce them. They show up in your context because the host logged actual tool calls, not because you can fabricate them. To actually run a tool, emit `<tool_call>{"name":"...","params":{...}}</tool_call>` and wait for the real result. Your fake fences mean NO work has happened this turn. You have TWO options for your retry, and ONLY two: (a) Emit a real `<tool_call>{"name":"...","params":{...}}</tool_call>` envelope NOW to actually do the work, then wait for the real result. (b) Honestly state "I have not [action] yet" and STOP. Do NOT claim completion. You MUST NOT claim you have fixed / eliminated / resolved / removed / cleaned / verified anything. No "successfully [verb]" phrasing. No numbered lists of "Step 1: I did X" actions. No "the project is now in a healthy state." Until a real `<tool_call>` lands on disk and returns a real tool-result, nothing has changed. Lying about completion is the worst failure mode. Retry now.'
971
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'You emitted ` ```bandit-tl` (or `bandit-run` / `bandit-subagent`) fenced JSON in your response. Those fences are emitted by the EXTENSION HOST to log real tool execution — you CANNOT produce them. They show up in your context because the host logged actual tool calls, not because you can fabricate them. To actually run a tool, emit `<tool_call>{"name":"...","params":{...}}</tool_call>` and wait for the real result. Your fake fences mean NO work has happened this turn. You have TWO options for your retry, and ONLY two: (a) Emit a real `<tool_call>{"name":"...","params":{...}}</tool_call>` envelope NOW to actually do the work, then wait for the real result. (b) Honestly state "I have not [action] yet" and STOP. Do NOT claim completion. You MUST NOT claim you have fixed / eliminated / resolved / removed / cleaned / verified anything. No "successfully [verb]" phrasing. No numbered lists of "Step 1: I did X" actions. No "the project is now in a healthy state." Until a real `<tool_call>` lands on disk and returns a real tool-result, nothing has changed. Lying about completion is the worst failure mode. Retry now.'
1038
972
  });
1039
973
  continue;
1040
974
  }
@@ -1056,7 +990,10 @@ class ToolUseLoop {
1056
990
  && !(0, tool_use_parser_1.hasToolCalls)(response)
1057
991
  && toolAbsenceCorrectionsFired < TOOL_ABSENCE_CORRECTION_CAP) {
1058
992
  const registeredNames = this.registry.getAll().map((t) => t.name);
1059
- const absence = (0, toolAvailabilityDetector_1.detectFalseToolAbsence)(response, registeredNames);
993
+ // Reasoning channels MUST be stripped before prose-matching:
994
+ // reasoning narrates tool usage by name and false-positives the
995
+ // absence phrases (see toolAvailabilityDetector.ts header).
996
+ const absence = (0, toolAvailabilityDetector_1.detectFalseToolAbsence)((0, tool_use_parser_1.stripReasoningChannels)(response), registeredNames);
1060
997
  if (absence.detected) {
1061
998
  toolAbsenceCorrectionsFired++;
1062
999
  emit('tool_loop:false_tool_absence', {
@@ -1088,7 +1025,7 @@ class ToolUseLoop {
1088
1025
  messages.push({ role: 'assistant', content: response });
1089
1026
  messages.push({
1090
1027
  role: 'user',
1091
- content: 'The previous tool call returned an error and you produced no follow-up tool_call. ' +
1028
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'The previous tool call returned an error and you produced no follow-up tool_call. ' +
1092
1029
  'Do NOT silently abandon the request — the user expects you to either retry with corrected parameters OR state explicitly which precondition failed and why you cannot proceed. ' +
1093
1030
  'Choose one: (a) emit a corrected `<tool_call>{"name":"...","params":{...}}</tool_call>` now, fixing the param shape or value the error pointed at; ' +
1094
1031
  '(b) give a one-line final answer naming the exact precondition you lack (e.g. "I cannot trash message X because the message id is unknown — please provide it"). ' +
@@ -1107,12 +1044,12 @@ class ToolUseLoop {
1107
1044
  // without emitting an actual tool_call. Visually the user sees a
1108
1045
  // wall of reasoning text and nothing happens. Strip the reasoning
1109
1046
  // fences before checking emptiness so the same nudge fires.
1110
- const stripped = response
1111
- .replace(/<think\b[\s\S]*?<\/think\s*>/gi, '')
1112
- .replace(/<think\b[\s\S]*$/i, '')
1113
- .replace(/```bandit-reasoning\b[\s\S]*?```/gi, '')
1114
- .replace(/```bandit-reasoning\b[\s\S]*$/i, '')
1115
- .trim();
1047
+ // Strip reasoning channels AND stray fence scaffolding (a bare
1048
+ // leading ``` opener that wraps the reasoning) so the
1049
+ // reasoning-only check isn't fooled into seeing the orphan ``` as
1050
+ // a real answer — which let a "reasoning + no tool call" turn end
1051
+ // with no answer (real CLI run, 2026-06-15).
1052
+ const stripped = (0, tool_use_parser_1.stripToAnswerContent)(response);
1116
1053
  const reasoningOnly = !stripped && response.trim().length > 0;
1117
1054
  // "Narrated but didn't act" detector. Some models (notably ones
1118
1055
  // post-trained for a different tool-call envelope, e.g. OpenAI
@@ -1128,8 +1065,8 @@ class ToolUseLoop {
1128
1065
  // in the model's final clause, not an earlier "I have already
1129
1066
  // searched the file" preamble before a real answer.
1130
1067
  //
1131
- // Captured 2026-05-25 (Mark, Portfolio IDE session): model emitted
1132
- // "I'll redesign the portfolio... Let me rewrite both files." with
1068
+ // Captured 2026-05-25 (real IDE session): model emitted
1069
+ // "I'll redesign the page... Let me rewrite both files." with
1133
1070
  // NO tool_call and the turn closed as a final answer because
1134
1071
  // neither `redesign` nor `rewrite` was on the list. A long
1135
1072
  // session ended with zero work shipped. Missing a verb here =
@@ -1137,10 +1074,13 @@ class ToolUseLoop {
1137
1074
  const NARRATE_VERB_RE = /\b(use|uses|used|using|call|calls|called|calling|invoke|invokes|invoked|invoking|execute|executes|executed|executing|run|runs|running|ran|search|searches|searched|searching|look|looks|looked|looking|read|reads|reading|check|checks|checked|checking|find|finds|finding|found|list|lists|listed|listing|fetch|fetches|fetched|fetching|grep|greps|grepped|grepping|explore|explores|explored|exploring|locate|locates|located|locating|plan|plans|planned|planning|start|starts|started|starting|begin|begins|began|beginning|create|creates|created|creating|write|writes|wrote|writing|rewrite|rewrites|rewrote|rewriting|rewritten|build|builds|built|building|rebuild|rebuilds|rebuilt|rebuilding|update|updates|updated|updating|implement|implements|implemented|implementing|refactor|refactors|refactored|refactoring|redesign|redesigns|redesigned|redesigning|design|designs|designed|designing|generate|generates|generated|generating|scaffold|scaffolds|scaffolded|scaffolding|set\s+up|setting\s+up|tackle|tackles|tackled|tackling|do|does|did|doing|make|makes|made|making|batch|batches|batched|batching|execute|prepare|prepares|prepared|preparing|draft|drafts|drafted|drafting|outline|outlines|outlined|outlining|organize|organizes|organized|organizing|structure|structures|structured|structuring|kick\s+off|kicking\s+off|fix|fixes|fixed|fixing|edit|edits|edited|editing|modify|modifies|modified|modifying|patch|patches|patched|patching|adjust|adjusts|adjusted|adjusting|replace|replaces|replaced|replacing|swap|swaps|swapped|swapping|polish|polishes|polished|polishing|clean\s+up|cleaning\s+up|tidy|tidies|tidied|tidying|finalize|finalizes|finalized|finalizing|finish|finishes|finished|finishing|complete|completes|completed|completing|wire|wires|wired|wiring|hook|hooks|hooked|hooking|render|renders|rendered|rendering|style|styles|styled|styling|theme|themes|themed|theming|redo|redoes|redid|redoing|port|ports|ported|porting|migrate|migrates|migrated|migrating|configure|configures|configured|configuring|install|installs|installed|installing|remove|removes|removed|removing|delete|deletes|deleted|deleting|rename|renames|renamed|renaming)\b/i;
1138
1075
  const NARRATE_INTENT_RE = /\b(we (?:will|need to|should)|we'?ll|we'?re going to|i'?ll|i will|let me|let'?s|going to|i'?m going to|i need to)\b/i;
1139
1076
  // Real code fences pass through; narrate only fires when the
1140
- // model emitted no structured payload at all. Check the STRIPPED
1141
- // response, not the raw one — `bandit-reasoning` fences are
1142
- // reasoning, not structured output.
1143
- const hasCodeFence = /```[a-zA-Z0-9_-]*\s*\n/.test(stripped);
1077
+ // model emitted no structured payload at all. Use the
1078
+ // reasoning-stripped response (NOT `stripped`, which also removes
1079
+ // bare fence-marker lines) so a genuine ```json / ```diff payload
1080
+ // still suppresses the narrate nudge and reaches its own
1081
+ // auto-promote detector. `bandit-reasoning` fences are reasoning,
1082
+ // not structured output, so they're excluded either way.
1083
+ const hasCodeFence = /```[a-zA-Z0-9_-]*\s*\n/.test((0, tool_use_parser_1.stripReasoningChannels)(response));
1144
1084
  const tailMatch = stripped.match(/(?:[.!?]\s+)([^.!?]*)$/);
1145
1085
  const tail = (tailMatch ? tailMatch[1] : stripped).slice(-200);
1146
1086
  const narratedButNoAction = !(0, tool_use_parser_1.hasToolCalls)(response) &&
@@ -1149,6 +1089,22 @@ class ToolUseLoop {
1149
1089
  stripped.length < 240 &&
1150
1090
  NARRATE_INTENT_RE.test(tail) &&
1151
1091
  NARRATE_VERB_RE.test(tail);
1092
+ // Performative narrated call: "I call read_file with path=README.md".
1093
+ // The generic gate above caps stripped.length at 240 to avoid false
1094
+ // positives on real answers that merely contain narrate verbs — but
1095
+ // when the final clause NAMES A REGISTERED TOOL in a performative
1096
+ // phrase, the length cap is wrong: a long planning recap that ends
1097
+ // "I call read_file with path=…" is a stall no matter how long the
1098
+ // recap is, and tool-name anchoring keeps the false-positive rate
1099
+ // near zero. Captured 2026-06-12 (real CLI session,
1100
+ // gemma4:e4b): iteration 1 emitted a reasoning recap ending with
1101
+ // exactly that sentence and no tool_call — the generic gate missed
1102
+ // it (over the length cap; intent list lacks present-tense "I
1103
+ // call") and the turn closed as a final answer.
1104
+ const narratedCallMatch = stripped.slice(-300).match(/\b(?:i\s+(?:will\s+|now\s+|then\s+)?(?:call|invoke|run|use)|calling|invoking|let'?s\s+(?:call|run|use))\s+(?:the\s+)?`?([a-z][a-z0-9_]*)`?/i);
1105
+ const narratedToolCallNoAction = !(0, tool_use_parser_1.hasToolCalls)(response) &&
1106
+ !!narratedCallMatch &&
1107
+ registeredToolNames.has(narratedCallMatch[1].toLowerCase());
1152
1108
  // Empty-response retry: was previously gated to `iterations > 0`
1153
1109
  // under the assumption "empty first response = provider outage."
1154
1110
  // That assumption was wrong — with bandit-logic
@@ -1160,7 +1116,7 @@ class ToolUseLoop {
1160
1116
  // the model gets a second chance (and the thinking-off recovery
1161
1117
  // below can flip it to non-thinking mode if the second pass also
1162
1118
  // empties).
1163
- const shouldNudge = (!response.trim() || reasoningOnly || narratedButNoAction) &&
1119
+ const shouldNudge = (!response.trim() || reasoningOnly || narratedButNoAction || narratedToolCallNoAction) &&
1164
1120
  !hitLimit &&
1165
1121
  consecutiveEmptyRetries < 2 &&
1166
1122
  !thinkingOffRecoveryAttempted;
@@ -1170,16 +1126,17 @@ class ToolUseLoop {
1170
1126
  iteration: iterations,
1171
1127
  attempt: consecutiveEmptyRetries,
1172
1128
  reasoningOnly,
1173
- narratedButNoAction
1129
+ narratedButNoAction,
1130
+ narratedToolCallNoAction
1174
1131
  });
1175
- const nudgeMessage = narratedButNoAction
1132
+ const nudgeMessage = (narratedButNoAction || narratedToolCallNoAction)
1176
1133
  ? 'You announced your next step in prose ("we will search…" / "let me check…" / "use X to find Y") but did NOT emit a `<tool_call>` envelope. Announcing intent is not enough — you must actually invoke the tool. Emit the call now in this exact format, OUTSIDE of any reasoning block, with NO commentary and NO markdown fence:\n\n<tool_call>{"name":"<tool>","params":{"<key>":"<value>"}}</tool_call>\n\nReplace name/params with the right values for your task. Or, if the task is already answerable from what you know, give a final answer instead.'
1177
1134
  : reasoningOnly
1178
1135
  ? 'You completed reasoning but emitted no tool_call AND no final answer. The reasoning text alone does not run a tool — you must emit a `<tool_call>` envelope OUTSIDE the reasoning block. Format example (replace name/params for your task):\n\n<tool_call>{"name":"<tool>","params":{"<key>":"<value>"}}</tool_call>\n\nNo prose around it, no markdown fence, just the bare tag. If the task is answerable without a tool, write a complete final answer instead. Do not stop after only thinking.'
1179
1136
  : 'Your previous response was empty. Either emit a `<tool_call>{"name":"<tool>","params":{...}}</tool_call>` to invoke a tool, OR produce a complete final answer using what you have. Do not respond with an empty message.';
1180
1137
  messages.push({
1181
1138
  role: 'user',
1182
- content: nudgeMessage
1139
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + nudgeMessage
1183
1140
  });
1184
1141
  continue;
1185
1142
  }
@@ -1212,7 +1169,7 @@ class ToolUseLoop {
1212
1169
  });
1213
1170
  messages.push({
1214
1171
  role: 'user',
1215
- content: 'Switching to non-thinking mode for this attempt because reasoning-only retries exhausted. Emit either a tool_call or a complete final answer. No more reasoning preamble.'
1172
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Switching to non-thinking mode for this attempt because reasoning-only retries exhausted. Emit either a tool_call or a complete final answer. No more reasoning preamble.'
1216
1173
  });
1217
1174
  continue;
1218
1175
  }
@@ -1260,8 +1217,8 @@ class ToolUseLoop {
1260
1217
  messages.push({
1261
1218
  role: 'user',
1262
1219
  content: firstRetry
1263
- ? 'Your previous tool_call was not valid JSON — I could not parse it. Common cause: unescaped `"` characters inside a string value (for example `["", "", ""]` inside a `content` string). Retry the tool call with properly escaped JSON: every `"` inside a string value must be written as `\\"`, and every newline as `\\n`. If the content is very long, consider `replace_range` for a line-numbered block or breaking the change into smaller edits.'
1264
- : 'Your tool_call still did not parse. Do NOT retry with the same shape or the same escaping failure. Switch tactics: (a) call `replace_range` for a large block whose line numbers you just read, (b) call `write_file` for a new file, or (c) split the change into multiple small `apply_edit` calls that each target just one method or block (e.g. 3-5 lines of `find`, 5-10 lines of `replace`) instead of rewriting the whole class. Pick the smallest scope that accomplishes the next step. If you cannot produce a valid tool call, respond with a plain-prose final answer acknowledging you could not complete the edit.'
1220
+ ? tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Your previous tool_call was not valid JSON — I could not parse it. Common cause: unescaped `"` characters inside a string value (for example `["", "", ""]` inside a `content` string). Retry the tool call with properly escaped JSON: every `"` inside a string value must be written as `\\"`, and every newline as `\\n`. If the content is very long, consider `replace_range` for a line-numbered block or breaking the change into smaller edits.'
1221
+ : tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Your tool_call still did not parse. Do NOT retry with the same shape or the same escaping failure. Switch tactics: (a) call `replace_range` for a large block whose line numbers you just read, (b) call `write_file` for a new file, or (c) split the change into multiple small `apply_edit` calls that each target just one method or block (e.g. 3-5 lines of `find`, 5-10 lines of `replace`) instead of rewriting the whole class. Pick the smallest scope that accomplishes the next step. If you cannot produce a valid tool call, respond with a plain-prose final answer acknowledging you could not complete the edit.'
1265
1222
  });
1266
1223
  continue;
1267
1224
  }
@@ -1275,7 +1232,7 @@ class ToolUseLoop {
1275
1232
  if (!hitLimit && !(0, tool_use_parser_1.hasToolCalls)(response)) {
1276
1233
  const normalized = response.toLowerCase().replace(/\s+/g, ' ').trim();
1277
1234
  const prior = recentNonToolResponses[recentNonToolResponses.length - 1];
1278
- const looksLikeLoop = !!prior && (() => {
1235
+ const looksLikeLoop = Boolean(prior) && (() => {
1279
1236
  // Cheap similarity: longest common prefix / max length. If two
1280
1237
  // consecutive no-tool responses share >60% of their text by
1281
1238
  // prefix the model is repeating itself. More sophisticated
@@ -1284,8 +1241,9 @@ class ToolUseLoop {
1284
1241
  const short = prior.length < normalized.length ? prior : normalized;
1285
1242
  const long = prior.length < normalized.length ? normalized : prior;
1286
1243
  let matched = 0;
1287
- while (matched < short.length && short[matched] === long[matched])
1244
+ while (matched < short.length && short[matched] === long[matched]) {
1288
1245
  matched++;
1246
+ }
1289
1247
  return matched / short.length > 0.6;
1290
1248
  })();
1291
1249
  // Also flag the self-contradiction signature from the real
@@ -1306,7 +1264,7 @@ class ToolUseLoop {
1306
1264
  });
1307
1265
  messages.push({
1308
1266
  role: 'user',
1309
- content: 'STOP deliberating. Your last response either repeated itself, contradicted itself (e.g. "Wait, I see X / Actually I\'ll try X"), or was aborted mid-stream as a loop. Do NOT continue speculating about what files might exist. Take exactly one of these actions now: (a) invoke a tool (`list_files`, `read_file`, `search_code`, etc.) to answer the question with real data, OR (b) give up and tell the user plainly that you could not complete the task and why. Do not write more than two sentences of prose before either calling a tool or terminating.'
1267
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'STOP deliberating. Your last response either repeated itself, contradicted itself (e.g. "Wait, I see X / Actually I\'ll try X"), or was aborted mid-stream as a loop. Do NOT continue speculating about what files might exist. Take exactly one of these actions now: (a) invoke a tool (`list_files`, `read_file`, `search_code`, etc.) to answer the question with real data, OR (b) give up and tell the user plainly that you could not complete the task and why. Do not write more than two sentences of prose before either calling a tool or terminating.'
1310
1268
  });
1311
1269
  recentNonToolResponses.length = 0;
1312
1270
  continue;
@@ -1396,7 +1354,7 @@ class ToolUseLoop {
1396
1354
  // without being so loud that it derails prose responses.
1397
1355
  messages.push({
1398
1356
  role: 'user',
1399
- content: 'Note: I detected a JSON todo list in your response and auto-promoted it to a todo_write call. Next time, emit `<tool_call>{"name":"todo_write","params":{"items":"..."}}</tool_call>` directly instead of pasting JSON as a code block — pasted JSON does not update your plan, only the tool call does.'
1357
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Note: I detected a JSON todo list in your response and auto-promoted it to a todo_write call. Next time, emit `<tool_call>{"name":"todo_write","params":{"items":"..."}}</tool_call>` directly instead of pasting JSON as a code block — pasted JSON does not update your plan, only the tool call does.'
1400
1358
  });
1401
1359
  iterations++;
1402
1360
  continue;
@@ -1424,7 +1382,26 @@ class ToolUseLoop {
1424
1382
  responsePreview: response.slice(0, 300)
1425
1383
  });
1426
1384
  }
1427
- const finalResponse = (0, tool_use_parser_1.stripToolCallMarkup)(response).trim();
1385
+ // Reasoning channels are streamed live by the host for display —
1386
+ // leaving them in the terminal answer double-renders them, and on
1387
+ // fabrication-retry exhaustion it prints the model's confusion
1388
+ // narrative as if it were the answer (real CLI run,
1389
+ // 2026-06-12T20-19 turn: three near-identical "the user is
1390
+ // correcting my formatting error" reasoning blocks rendered above
1391
+ // the real answer). The stall fallback below still inspects the
1392
+ // raw `response`, so reasoning-only turns keep their fallback.
1393
+ // ORDER MATTERS: reasoning channels strip FIRST. Reasoning text
1394
+ // routinely MENTIONS envelopes in backticks ("I included a
1395
+ // `<tool_result>` envelope…"); if markup stripping ran first, its
1396
+ // envelope regex would match from that in-fence mention through
1397
+ // to the real closing tag, eat the fence's closing ``` along the
1398
+ // way, and the unclosed-fence cleanup would then wipe the entire
1399
+ // rest of the answer.
1400
+ const finalResponse = (0, tool_use_parser_1.stripToolCallMarkup)(response
1401
+ .replace(/<think\b[\s\S]*?<\/think\s*>/gi, '')
1402
+ .replace(/<think\b[\s\S]*$/i, '')
1403
+ .replace(/```bandit-reasoning\b[\s\S]*?```/gi, '')
1404
+ .replace(/```bandit-reasoning\b[\s\S]*$/i, '')).trim();
1428
1405
  // False-completion detector. Small models regularly end a turn
1429
1406
  // with "I refactored the file" / "here is the updated code" text
1430
1407
  // without ever emitting a file-edit tool call.
@@ -1434,14 +1411,25 @@ class ToolUseLoop {
1434
1411
  // this turn, push one corrective user message into the loop
1435
1412
  // and continue for one more iteration. The nudge is capped at
1436
1413
  // one per turn so a truly confused model can still terminate.
1437
- if (!hitLimit && !falseCompletionNudged && editToolsInvoked === 0) {
1414
+ //
1415
+ // ONLY fires when the goal actually implies an edit. Without this
1416
+ // gate the detector demanded an edit on a purely informational
1417
+ // "tell me about this repo" turn: the model correctly said "I have
1418
+ // completed the overview" (a completion phrase), no edit ran
1419
+ // (none was asked for), so the nudge fired and replaced the good
1420
+ // markdown overview with a defensive "no edits are required"
1421
+ // answer — plus a wall of "automated harness check" reasoning.
1422
+ // An analysis goal that does NOT also imply an edit can never
1423
+ // false-complete, so skip it. (real CLI run, 2026-06-12.)
1424
+ const goalCouldExpectEdit = promptImpliesFileEdit || !promptWantsAnalysis;
1425
+ if (!hitLimit && !falseCompletionNudged && editToolsInvoked === 0 && goalCouldExpectEdit) {
1438
1426
  const claimsCompletion = FALSE_COMPLETION_PATTERNS.some(re => re.test(finalResponse));
1439
1427
  if (claimsCompletion) {
1440
1428
  falseCompletionNudged = true;
1441
1429
  emit('tool_loop:false_completion_nudge', { iteration: iterations, responsePreview: finalResponse.slice(0, 200) });
1442
1430
  messages.push({
1443
1431
  role: 'user',
1444
- content: 'Your response either claims work is done OR apologizes and asks what to do next — but I see NO successful `write_file`, `apply_edit`, `replace_range`, or `apply_patch` tool call in this turn, so nothing on disk has changed. ' +
1432
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Your response either claims work is done OR apologizes and asks what to do next — but I see NO successful `write_file`, `apply_edit`, `replace_range`, or `apply_patch` tool call in this turn, so nothing on disk has changed. ' +
1445
1433
  'Do NOT ask the user which task to resume, do NOT promise to escape JSON "in your next tool call", and do NOT defer. Either (a) emit a real edit tool call NOW with the actual change — use `replace_range` for a large block whose line numbers you just read, `apply_edit` for a small exact replacement, or `write_file` for a new file — or (b) respond honestly that you could not complete the task and briefly explain why. Retry the tool call yourself; the user cannot help you escape JSON.'
1446
1434
  });
1447
1435
  continue;
@@ -1480,7 +1468,7 @@ class ToolUseLoop {
1480
1468
  });
1481
1469
  messages.push({
1482
1470
  role: 'user',
1483
- content: `Your response describes edits to ${fileSet.size} files (${[...fileSet].slice(0, 8).join(', ')}${fileSet.size > 8 ? ', …' : ''}), but only ${editToolsInvoked} successful edit${editToolsInvoked === 1 ? '' : 's'} actually fired this turn. ` +
1471
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `Your response describes edits to ${fileSet.size} files (${[...fileSet].slice(0, 8).join(', ')}${fileSet.size > 8 ? ', …' : ''}), but only ${editToolsInvoked} successful edit${editToolsInvoked === 1 ? '' : 's'} actually fired this turn. ` +
1484
1472
  `The remaining ${fileSet.size - editToolsInvoked} file(s) were NOT modified — nothing landed on disk for them. ` +
1485
1473
  'Either (a) emit the missing `apply_edit` / `replace_range` / `write_file` tool calls now to actually do the work, OR (b) revise your response to honestly describe ONLY the edits that successfully applied. Do not summarize work that did not happen.'
1486
1474
  });
@@ -1491,7 +1479,7 @@ class ToolUseLoop {
1491
1479
  // ("break out", "split", "refactor", "extract", "move") imply
1492
1480
  // mutation of the SOURCE file the user wants restructured, not
1493
1481
  // just creation of new sibling files. Failure mode observed
1494
- // 2026-05-25 on a Portfolio React refactor: model read App.jsx,
1482
+ // 2026-05-25 on a local React refactor: model read App.jsx,
1495
1483
  // wrote 5 new component files, never touched App.jsx, declared
1496
1484
  // completion. User had to follow up "are we using these?" to
1497
1485
  // force the integration step — and even that follow-up turn
@@ -1523,7 +1511,7 @@ class ToolUseLoop {
1523
1511
  const writeCount = filesWrittenThisTurn.size;
1524
1512
  messages.push({
1525
1513
  role: 'user',
1526
- content: `The user's goal contains a refactor verb (refactor/break out/split/extract/move) which implies the SOURCE file(s) should be modified, not just supplemented with new siblings. You read ${readPreview}${readNotWritten.length > 3 ? ' and others' : ''} for context, then wrote ${writeCount} NEW file(s), but you NEVER modified the file(s) you read. The refactor is incomplete: the source file still contains the old monolithic code. ` +
1514
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `The user's goal contains a refactor verb (refactor/break out/split/extract/move) which implies the SOURCE file(s) should be modified, not just supplemented with new siblings. You read ${readPreview}${readNotWritten.length > 3 ? ' and others' : ''} for context, then wrote ${writeCount} NEW file(s), but you NEVER modified the file(s) you read. The refactor is incomplete: the source file still contains the old monolithic code. ` +
1527
1515
  `Emit the missing apply_edit/replace_range/write_file call on the source file now — it should import from the new files and drop the inlined code that's been extracted. If the refactor is genuinely a "scaffold only, leave source untouched" task, say so explicitly and explain why the source doesn't need to change.`
1528
1516
  });
1529
1517
  continue;
@@ -1552,8 +1540,9 @@ class ToolUseLoop {
1552
1540
  let match;
1553
1541
  while ((match = fenceRe.exec(finalResponse)) !== null) {
1554
1542
  const nonEmpty = match[1].split('\n').filter(l => l.trim().length > 0).length;
1555
- if (nonEmpty > biggestFenceLines)
1543
+ if (nonEmpty > biggestFenceLines) {
1556
1544
  biggestFenceLines = nonEmpty;
1545
+ }
1557
1546
  }
1558
1547
  if (biggestFenceLines >= MIN_LINES) {
1559
1548
  codeFenceHallucinationNudged = true;
@@ -1564,7 +1553,7 @@ class ToolUseLoop {
1564
1553
  });
1565
1554
  messages.push({
1566
1555
  role: 'user',
1567
- content: 'You produced a substantial code block in your reply but never emitted a `write_file`, `apply_edit`, `replace_range`, or `apply_patch` tool call — so the change is NOT on disk. ' +
1556
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'You produced a substantial code block in your reply but never emitted a `write_file`, `apply_edit`, `replace_range`, or `apply_patch` tool call — so the change is NOT on disk. ' +
1568
1557
  'Do not ask the user to paste your code into a file themselves. Take exactly one of these actions now: (a) call `replace_range`, `apply_edit`, or `write_file` with the real change to the correct file, OR (b) say plainly that you could not locate the target file and explain what you searched for. Do not wrap up with another prose + code-fence response.'
1569
1558
  });
1570
1559
  continue;
@@ -1648,7 +1637,7 @@ class ToolUseLoop {
1648
1637
  });
1649
1638
  messages.push({
1650
1639
  role: 'user',
1651
- content: 'Your first response had reasoning but emitted NO tool call — that is a hard stall for a subagent (you exist to gather information; reasoning alone produces zero output). ' +
1640
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Your first response had reasoning but emitted NO tool call — that is a hard stall for a subagent (you exist to gather information; reasoning alone produces zero output). ' +
1652
1641
  'For your next response, emit a tool call. The minimum viable starting move for ANY exploration goal is:\n\n' +
1653
1642
  '<tool_call>{"name":"list_files","params":{"path":"."}}</tool_call>\n\n' +
1654
1643
  'Copy that exact envelope as the very first thing you emit (you may keep the reasoning block before it if your model needs to think first, but the tool_call envelope MUST appear in this turn). ' +
@@ -1670,7 +1659,7 @@ class ToolUseLoop {
1670
1659
  // user saw nothing.
1671
1660
  //
1672
1661
  // The gate also covers the "regurgitated reasoning after
1673
- // native→text channel fallback" case. Mark Portfolio
1662
+ // native→text channel fallback" case. Real CLI
1674
1663
  // 2026-05-31T17-39-53 cleanup turn: native-tool path 500'd,
1675
1664
  // text-channel recovery prompted the model to re-emit its
1676
1665
  // pending action, but the model just echoed its prior
@@ -1682,12 +1671,7 @@ class ToolUseLoop {
1682
1671
  // before testing emptiness — if the response would render to
1683
1672
  // the user as nothing-actionable, the fallback fires and the
1684
1673
  // user sees what the model was thinking instead of silence.
1685
- const reasoningStripped = response
1686
- .replace(/<think\b[\s\S]*?<\/think\s*>/gi, '')
1687
- .replace(/<think\b[\s\S]*$/i, '')
1688
- .replace(/```bandit-reasoning\b[\s\S]*?```/gi, '')
1689
- .replace(/```bandit-reasoning\b[\s\S]*$/i, '')
1690
- .trim();
1674
+ const reasoningStripped = (0, tool_use_parser_1.stripToAnswerContent)(response);
1691
1675
  const visibleAfterStrip = (0, tool_use_parser_1.stripToolCallMarkup)(reasoningStripped).trim();
1692
1676
  if (!visibleAfterStrip) {
1693
1677
  // Pull the last 1-2 sentences of reasoning so the user sees
@@ -1713,7 +1697,7 @@ class ToolUseLoop {
1713
1697
  // and the inline empty-retry / narrate-no-action detector
1714
1698
  // already used its retry budget (consecutiveEmptyRetries >= 2)
1715
1699
  // so it couldn't nudge again, the user is left reading a
1716
- // promise the model never kept. Mark Portfolio
1700
+ // promise the model never kept. Real CLI
1717
1701
  // 2026-05-31T17-39-53 cleanup turn: after a native→text channel
1718
1702
  // recovery, the model emitted "Let me revert it:" with a
1719
1703
  // dangling colon and no tool call; the user saw the prose end
@@ -1730,13 +1714,13 @@ class ToolUseLoop {
1730
1714
  // The trailing colon + intent phrase combination is the
1731
1715
  // smoking gun. We DON'T also require NARRATE_VERB_RE here:
1732
1716
  // the existing inline detector's verb list misses "revert"
1733
- // (Portfolio 2026-05-31) and would miss any other one-off
1717
+ // (real run 2026-05-31) and would miss any other one-off
1734
1718
  // action verb a model might use. The colon alone is rare
1735
1719
  // enough in a legit final answer that pairing it with
1736
1720
  // "let me" / "I'll" / "we'll" / etc. is specific enough.
1737
1721
  //
1738
- // Period-terminated variant (added 2026-06-03 after Mark's
1739
- // gregoryhite-site run): the model ended with "Let me fix
1722
+ // Period-terminated variant (added 2026-06-03 after a real
1723
+ // run): the model ended with "Let me fix
1740
1724
  // all three project cards at once." — full sentence, full
1741
1725
  // stop, no colon. Both prefill and thinking-off recovery
1742
1726
  // had been spent earlier in the turn so the user saw the
@@ -1856,7 +1840,7 @@ class ToolUseLoop {
1856
1840
  toolCalls = [];
1857
1841
  messages.push({
1858
1842
  role: 'user',
1859
- content: `You have revised the plan in ${consecutiveTodoOnlyIterations + 1} consecutive iterations without executing any step. ` +
1843
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `You have revised the plan in ${consecutiveTodoOnlyIterations + 1} consecutive iterations without executing any step. ` +
1860
1844
  'Execute the first pending task now using a concrete tool — `search_code`, `read_file`, `apply_edit`, `replace_range`, `write_file`, or `run_command`. ' +
1861
1845
  'Once a task is actually DONE (tool call succeeded), you may call `todo_write` again to mark it completed — but not to re-plan. ' +
1862
1846
  'If you cannot identify a next step, respond to the user with a short honest explanation and stop.'
@@ -1881,7 +1865,7 @@ class ToolUseLoop {
1881
1865
  });
1882
1866
  messages.push({
1883
1867
  role: 'user',
1884
- content: `You have spent ${consecutiveApplyEditOnlyIterations} consecutive iterations on apply_edit alone. ` +
1868
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `You have spent ${consecutiveApplyEditOnlyIterations} consecutive iterations on apply_edit alone. ` +
1885
1869
  'If these are mechanical fixes of the same shape (one type annotation, one rename, one import path, one missing semicolon per call), STOP doing them one at a time — you will exhaust the iteration budget before the file is clean.\n' +
1886
1870
  '\n' +
1887
1871
  'Better tactics, in order of preference:\n' +
@@ -1995,7 +1979,7 @@ class ToolUseLoop {
1995
1979
  });
1996
1980
  messages.push({
1997
1981
  role: 'user',
1998
- content: `You just spawned ${bgSpawns.length} background subagents:\n${goalLines}\n\n` +
1982
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `You just spawned ${bgSpawns.length} background subagents:\n${goalLines}\n\n` +
1999
1983
  'Do NOT do those same explorations yourself in the next iteration — the subagents will deliver their synopses via the auto-inject path on a later turn. ' +
2000
1984
  'Choose ONE of: ' +
2001
1985
  '(a) work on a different, independent piece of the task that those subagents are NOT covering, ' +
@@ -2036,7 +2020,7 @@ class ToolUseLoop {
2036
2020
  });
2037
2021
  messages.push({
2038
2022
  role: 'user',
2039
- content: 'You set up a plan with `todo_write` earlier but have since completed ' +
2023
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'You set up a plan with `todo_write` earlier but have since completed ' +
2040
2024
  `${editsSinceLastTodo} edit${editsSinceLastTodo === 1 ? '' : 's'} without updating it. ` +
2041
2025
  'Call `todo_write` now with the current status — mark finished items as `completed` and leave remaining items as `pending`. ' +
2042
2026
  "The Plan block in the user's UI mirrors your last `todo_write`, so skipping this leaves them looking at a stale checklist while real work has landed."