@burtson-labs/agent-core 1.6.17 → 1.6.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/tools/core-tools.js +6 -6
  2. package/dist/tools/core-tools.js.map +1 -1
  3. package/dist/tools/language-adapters.d.ts +1 -1
  4. package/dist/tools/language-adapters.d.ts.map +1 -1
  5. package/dist/tools/language-adapters.js +12 -6
  6. package/dist/tools/language-adapters.js.map +1 -1
  7. package/dist/tools/loop/goalAnchor.d.ts.map +1 -1
  8. package/dist/tools/loop/goalAnchor.js +2 -1
  9. package/dist/tools/loop/goalAnchor.js.map +1 -1
  10. package/dist/tools/loop/llmStream.js +5 -5
  11. package/dist/tools/loop/llmStream.js.map +1 -1
  12. package/dist/tools/loop/loopShared.d.ts +20 -0
  13. package/dist/tools/loop/loopShared.d.ts.map +1 -0
  14. package/dist/tools/loop/loopShared.js +105 -0
  15. package/dist/tools/loop/loopShared.js.map +1 -0
  16. package/dist/tools/loop/parallelExecute.d.ts +1 -1
  17. package/dist/tools/loop/turnSetup.js +3 -3
  18. package/dist/tools/loop/turnSetup.js.map +1 -1
  19. package/dist/tools/skills/mail-search-skill.js +2 -2
  20. package/dist/tools/skills/mail-search-skill.js.map +1 -1
  21. package/dist/tools/tool-registry.d.ts +17 -0
  22. package/dist/tools/tool-registry.d.ts.map +1 -1
  23. package/dist/tools/tool-registry.js +100 -25
  24. package/dist/tools/tool-registry.js.map +1 -1
  25. package/dist/tools/tool-use-loop.d.ts +15 -7
  26. package/dist/tools/tool-use-loop.d.ts.map +1 -1
  27. package/dist/tools/tool-use-loop.js +130 -158
  28. package/dist/tools/tool-use-loop.js.map +1 -1
  29. package/dist/tools/tool-use-parser.d.ts +33 -0
  30. package/dist/tools/tool-use-parser.d.ts.map +1 -1
  31. package/dist/tools/tool-use-parser.js +49 -0
  32. package/dist/tools/tool-use-parser.js.map +1 -1
  33. package/dist/tools/toolAvailabilityDetector.d.ts +0 -24
  34. package/dist/tools/toolAvailabilityDetector.d.ts.map +1 -1
  35. package/dist/tools/toolAvailabilityDetector.js +24 -11
  36. package/dist/tools/toolAvailabilityDetector.js.map +1 -1
  37. package/package.json +20 -1
@@ -18,12 +18,7 @@
18
18
  * the host should use the Ollama `tools: [...]` field instead.
19
19
  */
20
20
  Object.defineProperty(exports, "__esModule", { value: true });
21
- exports.ToolUseLoop = void 0;
22
- exports.sleep = sleep;
23
- exports.isRetryableLlmError = isRetryableLlmError;
24
- exports.tagRetryableLlmError = tagRetryableLlmError;
25
- exports.summarizeLlmError = summarizeLlmError;
26
- exports.isContinuationPrompt = isContinuationPrompt;
21
+ exports.ToolUseLoop = exports.isContinuationPrompt = exports.summarizeLlmError = exports.tagRetryableLlmError = exports.isRetryableLlmError = exports.sleep = void 0;
27
22
  exports.isNoticingPrompt = isNoticingPrompt;
28
23
  exports.createToolUseLoop = createToolUseLoop;
29
24
  const tool_use_parser_1 = require("./tool-use-parser");
@@ -36,93 +31,16 @@ const parallelExecute_1 = require("./loop/parallelExecute");
36
31
  const goalAnchor_1 = require("./loop/goalAnchor");
37
32
  const finalAnswerNudges_1 = require("./loop/finalAnswerNudges");
38
33
  const toolAvailabilityDetector_1 = require("./toolAvailabilityDetector");
34
+ const loopShared_1 = require("./loop/loopShared");
35
+ Object.defineProperty(exports, "sleep", { enumerable: true, get: function () { return loopShared_1.sleep; } });
36
+ Object.defineProperty(exports, "isRetryableLlmError", { enumerable: true, get: function () { return loopShared_1.isRetryableLlmError; } });
37
+ Object.defineProperty(exports, "tagRetryableLlmError", { enumerable: true, get: function () { return loopShared_1.tagRetryableLlmError; } });
38
+ Object.defineProperty(exports, "summarizeLlmError", { enumerable: true, get: function () { return loopShared_1.summarizeLlmError; } });
39
+ Object.defineProperty(exports, "isContinuationPrompt", { enumerable: true, get: function () { return loopShared_1.isContinuationPrompt; } });
39
40
  const FILE_EDIT_TOOL_NAMES = new Set(['write_file', 'apply_edit', 'replace_range', 'apply_patch']);
40
41
  function isFileEditTool(name) {
41
42
  return FILE_EDIT_TOOL_NAMES.has(name);
42
43
  }
43
- function sleep(ms) {
44
- return new Promise((resolve) => setTimeout(resolve, ms));
45
- }
46
- function getErrorCode(error) {
47
- return typeof error === 'object' && error !== null && 'code' in error
48
- ? String(error.code ?? '')
49
- : undefined;
50
- }
51
- function getErrorMessage(error) {
52
- return error instanceof Error ? error.message : String(error);
53
- }
54
- function isRetryableLlmError(error) {
55
- const code = getErrorCode(error);
56
- if (code === 'USER_ABORT') {
57
- return false;
58
- }
59
- const message = getErrorMessage(error);
60
- if (/\b429\b|rate limit/i.test(message)) {
61
- return false;
62
- }
63
- return (code === 'WATCHDOG' ||
64
- /\b5\d\d\b/.test(message) ||
65
- /Upstream model request failed/i.test(message) ||
66
- /ECONNREFUSED|ECONNRESET|ETIMEDOUT|EAI_AGAIN|socket hang up|fetch failed|network error|terminated|UND_ERR/i.test(message));
67
- }
68
- function tagRetryableLlmError(error) {
69
- if (error instanceof Error) {
70
- const tagged = error;
71
- if (!tagged.code) {
72
- tagged.code = 'UPSTREAM_MODEL';
73
- }
74
- }
75
- }
76
- function summarizeLlmError(error) {
77
- const message = getErrorMessage(error).replace(/\s+/g, ' ').trim();
78
- return message.length > 180 ? `${message.slice(0, 177)}...` : message;
79
- }
80
- /**
81
- * Detects "keep going" / "continue" / "yes" style prompts that
82
- * carry no real goal content. The goal-anchor block uses the most recent
83
- * user message as the recall text; when that text is "good lets keep
84
- * going" the anchor degenerates into "remind yourself to keep going",
85
- * which gives the model nothing to anchor on after 20 iterations of
86
- * drift. Real on a 60-iteration linter-fix
87
- * turn: every anchor injection cited "good lets keep going" as the
88
- * goal. Detector lets callers walk back to a prior substantive prompt
89
- * instead.
90
- *
91
- * Length cap (60 chars) + normalized-phrase match keeps false positives
92
- * down — a sentence like "keep going on the auth refactor for the
93
- * user-service" is longer than 60 chars and reads as a real goal, so it
94
- * stays a goal.
95
- */
96
- const CONTINUATION_PROMPT_PHRASES = new Set([
97
- 'continue', 'keep going', 'go on', 'proceed', 'next', 'more',
98
- 'please continue', 'carry on', 'finish', 'finish it', 'finish up', 'wrap up', 'wrap it up',
99
- 'good', 'great', 'nice', 'cool', 'sweet', 'perfect', 'ok', 'okay', 'k', 'yes', 'y', 'yep', 'yeah', 'ack', 'done',
100
- "let's continue", 'lets continue', "let's keep going", 'lets keep going',
101
- 'good keep going', 'good lets keep going', "good let's keep going",
102
- 'good continue', 'ok continue', 'okay continue'
103
- ]);
104
- function isContinuationPrompt(text) {
105
- const trimmed = text.trim();
106
- if (trimmed.length === 0 || trimmed.length > 60) {
107
- return false;
108
- }
109
- // Normalize: lowercase, drop non-word/space punctuation, collapse whitespace.
110
- const norm = trimmed
111
- .toLowerCase()
112
- .replace(/[^\w\s']/g, ' ')
113
- .replace(/\s+/g, ' ')
114
- .trim();
115
- if (CONTINUATION_PROMPT_PHRASES.has(norm)) {
116
- return true;
117
- }
118
- // Permit "please <phrase>" and "<phrase> please" wrappings.
119
- for (const phrase of CONTINUATION_PROMPT_PHRASES) {
120
- if (norm === `please ${phrase}` || norm === `${phrase} please`) {
121
- return true;
122
- }
123
- }
124
- return false;
125
- }
126
44
  /**
127
45
  * "Noticing prompt" detector. Catches user messages that are asking
128
46
  * about state ("are we using these?", "did you update X?", "where's
@@ -130,7 +48,7 @@ function isContinuationPrompt(text) {
130
48
  * work. These signal that the user spotted a gap in the prior turn
131
49
  * and wants the agent to address it — NOT continue the prior plan.
132
50
  *
133
- * Real failure mode captured 2026-05-25 on a Portfolio React refactor:
51
+ * Real failure mode captured 2026-05-25 on a local React refactor:
134
52
  * user asked "I dont think we actually are using these new files are
135
53
  * we?" after the agent wrote data files but never wired them into
136
54
  * App.jsx. Bandit read the question as a generic "keep going" prompt,
@@ -265,7 +183,13 @@ class ToolUseLoop {
265
183
  // explicit "this is a recovery attempt — answer the original goal"
266
184
  // framing succeeds. Last resort before terminal throw.
267
185
  let finalAnchorRetryUsed = false;
268
- const textToolBlock = this.registry.buildSystemPromptBlock();
186
+ const textToolBlock = effectiveOptions.compactToolBlock
187
+ ? this.registry.buildCompactSystemPromptBlock()
188
+ : this.registry.buildSystemPromptBlock();
189
+ // Lowercased registered tool names — used by the narrated-call
190
+ // detector to anchor on "I call <real tool>" with near-zero false
191
+ // positives.
192
+ const registeredToolNames = new Set(this.registry.getAll().map(t => t.name.toLowerCase()));
269
193
  const buildFullSystemPrompt = (useNativeTools) => {
270
194
  if (useNativeTools) {
271
195
  return systemPrompt ?? '';
@@ -315,7 +239,7 @@ class ToolUseLoop {
315
239
  });
316
240
  messages.push({
317
241
  role: 'user',
318
- content: '[Reading-comprehension note for the assistant: the user\'s last message above is a noticing / clarifying question — they spotted a possible gap from prior turns and are asking you to confirm or correct, NOT to continue any prior plan. Before you take any new action, identify what gap the question points at and address it directly. If the question is "are we using X?" the correct first move is to verify whether X is actually being used (read the consumer file, grep for the import, check the call site) and answer honestly — yes/no with evidence. Do NOT create more new artifacts unless the user explicitly says to.]'
242
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + '[Reading-comprehension note for the assistant: the user\'s last message above is a noticing / clarifying question — they spotted a possible gap from prior turns and are asking you to confirm or correct, NOT to continue any prior plan. Before you take any new action, identify what gap the question points at and address it directly. If the question is "are we using X?" the correct first move is to verify whether X is actually being used (read the consumer file, grep for the import, check the call site) and answer honestly — yes/no with evidence. Do NOT create more new artifacts unless the user explicitly says to.]'
319
243
  });
320
244
  }
321
245
  let iterations = 0;
@@ -344,7 +268,7 @@ class ToolUseLoop {
344
268
  // recovery, etc.) each have their own caps, but they can chain — a
345
269
  // model can spin through 6+ no-tool-call responses because
346
270
  // thinking-off recovery resets consecutiveEmptyRetries=0. Captured
347
- // 2026-05-26 in Mark's Portfolio session (turn-2026-05-26T02-30-37):
271
+ // 2026-05-26 in a real CLI session (turn-2026-05-26T02-30-37):
348
272
  // model emitted 6 sequential reasoning-only responses inside
349
273
  // iteration 4 before the loop finally terminated with a useless
350
274
  // final answer ("I need to stop wrapping tool calls in reasoning
@@ -697,7 +621,7 @@ class ToolUseLoop {
697
621
  // current pace and burn the extension too.
698
622
  messages.push({
699
623
  role: 'user',
700
- content: `You've been making good progress and the iteration budget has been extended by ${CAP_EXTENSION_SIZE} (new limit: ${max}). Keep going, but tighten up: prefer batched edits over single-line ones, and start wrapping up when you have a complete answer rather than running to the new cap. This is the ${iterationCapExtensions === 1 ? 'first' : 'second'} of at most ${MAX_CAP_EXTENSIONS} extensions for this turn.`
624
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `You've been making good progress and the iteration budget has been extended by ${CAP_EXTENSION_SIZE} (new limit: ${max}). Keep going, but tighten up: prefer batched edits over single-line ones, and start wrapping up when you have a complete answer rather than running to the new cap. This is the ${iterationCapExtensions === 1 ? 'first' : 'second'} of at most ${MAX_CAP_EXTENSIONS} extensions for this turn.`
701
625
  });
702
626
  }
703
627
  else {
@@ -708,7 +632,7 @@ class ToolUseLoop {
708
632
  // vs edit) reflects what the user actually asked for.
709
633
  messages.push({
710
634
  role: 'user',
711
- content: `${goalRecallBlock}` +
635
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `${goalRecallBlock}` +
712
636
  `You have reached the tool-use iteration limit (${max}). Stop calling tools. Produce a final answer with three short sections, in this exact shape:\n` +
713
637
  '\n' +
714
638
  wrapUpBody +
@@ -722,7 +646,7 @@ class ToolUseLoop {
722
646
  emit('tool_loop:total_tool_cap', { iteration: iterations, totalToolsExecuted });
723
647
  messages.push({
724
648
  role: 'user',
725
- content: `${goalRecallBlock}` +
649
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `${goalRecallBlock}` +
726
650
  `You have executed ${totalToolsExecuted} tool calls this turn — the per-turn cap (${maxTotalTools}) has been reached. Stop calling tools. Produce a final answer with three short sections:\n` +
727
651
  '\n' +
728
652
  wrapUpBody +
@@ -823,7 +747,7 @@ class ToolUseLoop {
823
747
  break;
824
748
  }
825
749
  catch (error) {
826
- if (nativeTools && nativeToolFailureFallback && !nativeFallbackUsed && isRetryableLlmError(error) && !signal?.aborted) {
750
+ if (nativeTools && nativeToolFailureFallback && !nativeFallbackUsed && (0, loopShared_1.isRetryableLlmError)(error) && !signal?.aborted) {
827
751
  nativeFallbackUsed = true;
828
752
  nativeTools = false;
829
753
  nativeSchemas = undefined;
@@ -849,7 +773,7 @@ class ToolUseLoop {
849
773
  // visible markup.
850
774
  messages.push({
851
775
  role: 'user',
852
- content: `[Provider error mid-turn — tool channel switched.] The previous attempt failed with: ${summarizeLlmError(error)}. ` +
776
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `[Provider error mid-turn — tool channel switched.] The previous attempt failed with: ${(0, loopShared_1.summarizeLlmError)(error)}. ` +
853
777
  `I retried with the text-based tool-call channel. ` +
854
778
  `Re-emit your pending action using the text envelope: ` +
855
779
  `<tool_call>{"name":"...","params":{...}}</tool_call> outside of any reasoning block. ` +
@@ -858,7 +782,7 @@ class ToolUseLoop {
858
782
  });
859
783
  emit('tool_loop:native_tool_fallback', {
860
784
  iteration: iterations,
861
- reason: summarizeLlmError(error)
785
+ reason: (0, loopShared_1.summarizeLlmError)(error)
862
786
  });
863
787
  continue;
864
788
  }
@@ -871,13 +795,13 @@ class ToolUseLoop {
871
795
  // this attempt, any further failure on text is genuinely
872
796
  // terminal — the user has been waiting > 30 s and a clean
873
797
  // error is more helpful than another silent retry.
874
- if (nativeFallbackUsed && !textFallbackRetryUsed && isRetryableLlmError(error) && !signal?.aborted) {
798
+ if (nativeFallbackUsed && !textFallbackRetryUsed && (0, loopShared_1.isRetryableLlmError)(error) && !signal?.aborted) {
875
799
  textFallbackRetryUsed = true;
876
800
  emit('tool_loop:text_fallback_retry', {
877
801
  iteration: iterations,
878
- reason: summarizeLlmError(error)
802
+ reason: (0, loopShared_1.summarizeLlmError)(error)
879
803
  });
880
- await sleep(2400);
804
+ await (0, loopShared_1.sleep)(2400);
881
805
  continue;
882
806
  }
883
807
  // Last-resort final-anchor retry. By this point we've spent
@@ -894,21 +818,21 @@ class ToolUseLoop {
894
818
  if (!finalAnchorRetryUsed
895
819
  && textFallbackRetryUsed
896
820
  && originalGoal.trim().length > 0
897
- && isRetryableLlmError(error)
821
+ && (0, loopShared_1.isRetryableLlmError)(error)
898
822
  && !signal?.aborted) {
899
823
  finalAnchorRetryUsed = true;
900
824
  messages.push({
901
825
  role: 'user',
902
- content: `[Recovery attempt — previous channel attempts hit ${summarizeLlmError(error)}. ` +
826
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `[Recovery attempt — previous channel attempts hit ${(0, loopShared_1.summarizeLlmError)(error)}. ` +
903
827
  `Discarding any partial tool_call or reasoning state from those attempts. ` +
904
828
  `Original user goal restated as a fresh anchor:]\n\n${originalGoal.trim()}`
905
829
  });
906
830
  emit('tool_loop:final_anchor_retry', {
907
831
  iteration: iterations,
908
- reason: summarizeLlmError(error),
832
+ reason: (0, loopShared_1.summarizeLlmError)(error),
909
833
  goalPreview: originalGoal.slice(0, 120)
910
834
  });
911
- await sleep(3600);
835
+ await (0, loopShared_1.sleep)(3600);
912
836
  continue;
913
837
  }
914
838
  throw error;
@@ -938,7 +862,7 @@ class ToolUseLoop {
938
862
  // have their own caps, but they chain — thinking-off recovery
939
863
  // resets consecutiveEmptyRetries=0, parse-retry has its own
940
864
  // counter, and the model can move between failure modes faster
941
- // than any one detector can give up. Mark Portfolio session
865
+ // than any one detector can give up. Real CLI session
942
866
  // 2026-05-26 turn-02-30-37: 6 sequential reasoning-only
943
867
  // responses inside one iteration before the loop terminated
944
868
  // silently. This counter increments on EVERY response without
@@ -979,9 +903,9 @@ class ToolUseLoop {
979
903
  // Also reset the prefill-recovery one-shot. The recovery budget
980
904
  // is "per stretch of failures," not "once per turn" — without
981
905
  // this reset, a long refactor that recovers from one prefill
982
- // stall and then hits another (Mark, gregoryhite-site
983
- // 2026-06-02T23-56-38: 26 iterations, prefill burned at iter 25,
984
- // iter 26 stalled again with no recovery left) falls straight
906
+ // stall and then hits another (observed in a real run: 26
907
+ // iterations, prefill burned at iter 25, iter 26 stalled again
908
+ // with no recovery left) falls straight
985
909
  // through to the terminal "Bandit stalled" fallback even though
986
910
  // every other detector still has budget. The hard cap on
987
911
  // noToolCallAttemptsThisTurn (5) bounds the total stuck
@@ -1013,7 +937,7 @@ class ToolUseLoop {
1013
937
  messages.push({ role: 'assistant', content: scrubbed });
1014
938
  messages.push({
1015
939
  role: 'user',
1016
- content: 'You emitted a `<tool_result>` envelope in your response. Those envelopes are SYSTEM output — they appear BETWEEN your turns, never inside your own message. If you meant to invoke a tool, emit a single `<tool_call>{"name":"...","params":{...}}</tool_call>` and wait for the real result. If the task is complete, give a plain-prose final answer with no XML envelopes. Retry now.'
940
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'You emitted a `<tool_result>` envelope in your response. Those envelopes are SYSTEM output — they appear BETWEEN your turns, never inside your own message. If you meant to invoke a tool, emit a single `<tool_call>{"name":"...","params":{...}}</tool_call>` and wait for the real result. If the task is complete, give a plain-prose final answer with no XML envelopes. Retry now.'
1017
941
  });
1018
942
  continue;
1019
943
  }
@@ -1044,7 +968,7 @@ class ToolUseLoop {
1044
968
  messages.push({ role: 'assistant', content: scrubbed });
1045
969
  messages.push({
1046
970
  role: 'user',
1047
- content: 'You emitted ` ```bandit-tl` (or `bandit-run` / `bandit-subagent`) fenced JSON in your response. Those fences are emitted by the EXTENSION HOST to log real tool execution — you CANNOT produce them. They show up in your context because the host logged actual tool calls, not because you can fabricate them. To actually run a tool, emit `<tool_call>{"name":"...","params":{...}}</tool_call>` and wait for the real result. Your fake fences mean NO work has happened this turn. You have TWO options for your retry, and ONLY two: (a) Emit a real `<tool_call>{"name":"...","params":{...}}</tool_call>` envelope NOW to actually do the work, then wait for the real result. (b) Honestly state "I have not [action] yet" and STOP. Do NOT claim completion. You MUST NOT claim you have fixed / eliminated / resolved / removed / cleaned / verified anything. No "successfully [verb]" phrasing. No numbered lists of "Step 1: I did X" actions. No "the project is now in a healthy state." Until a real `<tool_call>` lands on disk and returns a real tool-result, nothing has changed. Lying about completion is the worst failure mode. Retry now.'
971
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'You emitted ` ```bandit-tl` (or `bandit-run` / `bandit-subagent`) fenced JSON in your response. Those fences are emitted by the EXTENSION HOST to log real tool execution — you CANNOT produce them. They show up in your context because the host logged actual tool calls, not because you can fabricate them. To actually run a tool, emit `<tool_call>{"name":"...","params":{...}}</tool_call>` and wait for the real result. Your fake fences mean NO work has happened this turn. You have TWO options for your retry, and ONLY two: (a) Emit a real `<tool_call>{"name":"...","params":{...}}</tool_call>` envelope NOW to actually do the work, then wait for the real result. (b) Honestly state "I have not [action] yet" and STOP. Do NOT claim completion. You MUST NOT claim you have fixed / eliminated / resolved / removed / cleaned / verified anything. No "successfully [verb]" phrasing. No numbered lists of "Step 1: I did X" actions. No "the project is now in a healthy state." Until a real `<tool_call>` lands on disk and returns a real tool-result, nothing has changed. Lying about completion is the worst failure mode. Retry now.'
1048
972
  });
1049
973
  continue;
1050
974
  }
@@ -1066,7 +990,10 @@ class ToolUseLoop {
1066
990
  && !(0, tool_use_parser_1.hasToolCalls)(response)
1067
991
  && toolAbsenceCorrectionsFired < TOOL_ABSENCE_CORRECTION_CAP) {
1068
992
  const registeredNames = this.registry.getAll().map((t) => t.name);
1069
- const absence = (0, toolAvailabilityDetector_1.detectFalseToolAbsence)(response, registeredNames);
993
+ // Reasoning channels MUST be stripped before prose-matching:
994
+ // reasoning narrates tool usage by name and false-positives the
995
+ // absence phrases (see toolAvailabilityDetector.ts header).
996
+ const absence = (0, toolAvailabilityDetector_1.detectFalseToolAbsence)((0, tool_use_parser_1.stripReasoningChannels)(response), registeredNames);
1070
997
  if (absence.detected) {
1071
998
  toolAbsenceCorrectionsFired++;
1072
999
  emit('tool_loop:false_tool_absence', {
@@ -1098,7 +1025,7 @@ class ToolUseLoop {
1098
1025
  messages.push({ role: 'assistant', content: response });
1099
1026
  messages.push({
1100
1027
  role: 'user',
1101
- content: 'The previous tool call returned an error and you produced no follow-up tool_call. ' +
1028
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'The previous tool call returned an error and you produced no follow-up tool_call. ' +
1102
1029
  'Do NOT silently abandon the request — the user expects you to either retry with corrected parameters OR state explicitly which precondition failed and why you cannot proceed. ' +
1103
1030
  'Choose one: (a) emit a corrected `<tool_call>{"name":"...","params":{...}}</tool_call>` now, fixing the param shape or value the error pointed at; ' +
1104
1031
  '(b) give a one-line final answer naming the exact precondition you lack (e.g. "I cannot trash message X because the message id is unknown — please provide it"). ' +
@@ -1117,12 +1044,12 @@ class ToolUseLoop {
1117
1044
  // without emitting an actual tool_call. Visually the user sees a
1118
1045
  // wall of reasoning text and nothing happens. Strip the reasoning
1119
1046
  // fences before checking emptiness so the same nudge fires.
1120
- const stripped = response
1121
- .replace(/<think\b[\s\S]*?<\/think\s*>/gi, '')
1122
- .replace(/<think\b[\s\S]*$/i, '')
1123
- .replace(/```bandit-reasoning\b[\s\S]*?```/gi, '')
1124
- .replace(/```bandit-reasoning\b[\s\S]*$/i, '')
1125
- .trim();
1047
+ // Strip reasoning channels AND stray fence scaffolding (a bare
1048
+ // leading ``` opener that wraps the reasoning) so the
1049
+ // reasoning-only check isn't fooled into seeing the orphan ``` as
1050
+ // a real answer — which let a "reasoning + no tool call" turn end
1051
+ // with no answer (real CLI run, 2026-06-15).
1052
+ const stripped = (0, tool_use_parser_1.stripToAnswerContent)(response);
1126
1053
  const reasoningOnly = !stripped && response.trim().length > 0;
1127
1054
  // "Narrated but didn't act" detector. Some models (notably ones
1128
1055
  // post-trained for a different tool-call envelope, e.g. OpenAI
@@ -1138,8 +1065,8 @@ class ToolUseLoop {
1138
1065
  // in the model's final clause, not an earlier "I have already
1139
1066
  // searched the file" preamble before a real answer.
1140
1067
  //
1141
- // Captured 2026-05-25 (Mark, Portfolio IDE session): model emitted
1142
- // "I'll redesign the portfolio... Let me rewrite both files." with
1068
+ // Captured 2026-05-25 (real IDE session): model emitted
1069
+ // "I'll redesign the page... Let me rewrite both files." with
1143
1070
  // NO tool_call and the turn closed as a final answer because
1144
1071
  // neither `redesign` nor `rewrite` was on the list. A long
1145
1072
  // session ended with zero work shipped. Missing a verb here =
@@ -1147,10 +1074,13 @@ class ToolUseLoop {
1147
1074
  const NARRATE_VERB_RE = /\b(use|uses|used|using|call|calls|called|calling|invoke|invokes|invoked|invoking|execute|executes|executed|executing|run|runs|running|ran|search|searches|searched|searching|look|looks|looked|looking|read|reads|reading|check|checks|checked|checking|find|finds|finding|found|list|lists|listed|listing|fetch|fetches|fetched|fetching|grep|greps|grepped|grepping|explore|explores|explored|exploring|locate|locates|located|locating|plan|plans|planned|planning|start|starts|started|starting|begin|begins|began|beginning|create|creates|created|creating|write|writes|wrote|writing|rewrite|rewrites|rewrote|rewriting|rewritten|build|builds|built|building|rebuild|rebuilds|rebuilt|rebuilding|update|updates|updated|updating|implement|implements|implemented|implementing|refactor|refactors|refactored|refactoring|redesign|redesigns|redesigned|redesigning|design|designs|designed|designing|generate|generates|generated|generating|scaffold|scaffolds|scaffolded|scaffolding|set\s+up|setting\s+up|tackle|tackles|tackled|tackling|do|does|did|doing|make|makes|made|making|batch|batches|batched|batching|execute|prepare|prepares|prepared|preparing|draft|drafts|drafted|drafting|outline|outlines|outlined|outlining|organize|organizes|organized|organizing|structure|structures|structured|structuring|kick\s+off|kicking\s+off|fix|fixes|fixed|fixing|edit|edits|edited|editing|modify|modifies|modified|modifying|patch|patches|patched|patching|adjust|adjusts|adjusted|adjusting|replace|replaces|replaced|replacing|swap|swaps|swapped|swapping|polish|polishes|polished|polishing|clean\s+up|cleaning\s+up|tidy|tidies|tidied|tidying|finalize|finalizes|finalized|finalizing|finish|finishes|finished|finishing|complete|completes|completed|completing|wire|wires|wired|wiring|hook|hooks|hooked|hooking|render|renders|rendered|rendering|style|styles|styled|styling|theme|themes|themed|theming|redo|redoes|redid|redoing|port|ports|ported|porting|migrate|migrates|migrated|migrating|configure|configures|configured|configuring|install|installs|installed|installing|remove|removes|removed|removing|delete|deletes|deleted|deleting|rename|renames|renamed|renaming)\b/i;
1148
1075
  const NARRATE_INTENT_RE = /\b(we (?:will|need to|should)|we'?ll|we'?re going to|i'?ll|i will|let me|let'?s|going to|i'?m going to|i need to)\b/i;
1149
1076
  // Real code fences pass through; narrate only fires when the
1150
- // model emitted no structured payload at all. Check the STRIPPED
1151
- // response, not the raw one — `bandit-reasoning` fences are
1152
- // reasoning, not structured output.
1153
- const hasCodeFence = /```[a-zA-Z0-9_-]*\s*\n/.test(stripped);
1077
+ // model emitted no structured payload at all. Use the
1078
+ // reasoning-stripped response (NOT `stripped`, which also removes
1079
+ // bare fence-marker lines) so a genuine ```json / ```diff payload
1080
+ // still suppresses the narrate nudge and reaches its own
1081
+ // auto-promote detector. `bandit-reasoning` fences are reasoning,
1082
+ // not structured output, so they're excluded either way.
1083
+ const hasCodeFence = /```[a-zA-Z0-9_-]*\s*\n/.test((0, tool_use_parser_1.stripReasoningChannels)(response));
1154
1084
  const tailMatch = stripped.match(/(?:[.!?]\s+)([^.!?]*)$/);
1155
1085
  const tail = (tailMatch ? tailMatch[1] : stripped).slice(-200);
1156
1086
  const narratedButNoAction = !(0, tool_use_parser_1.hasToolCalls)(response) &&
@@ -1159,6 +1089,22 @@ class ToolUseLoop {
1159
1089
  stripped.length < 240 &&
1160
1090
  NARRATE_INTENT_RE.test(tail) &&
1161
1091
  NARRATE_VERB_RE.test(tail);
1092
+ // Performative narrated call: "I call read_file with path=README.md".
1093
+ // The generic gate above caps stripped.length at 240 to avoid false
1094
+ // positives on real answers that merely contain narrate verbs — but
1095
+ // when the final clause NAMES A REGISTERED TOOL in a performative
1096
+ // phrase, the length cap is wrong: a long planning recap that ends
1097
+ // "I call read_file with path=…" is a stall no matter how long the
1098
+ // recap is, and tool-name anchoring keeps the false-positive rate
1099
+ // near zero. Captured 2026-06-12 (real CLI session,
1100
+ // gemma4:e4b): iteration 1 emitted a reasoning recap ending with
1101
+ // exactly that sentence and no tool_call — the generic gate missed
1102
+ // it (over the length cap; intent list lacks present-tense "I
1103
+ // call") and the turn closed as a final answer.
1104
+ const narratedCallMatch = stripped.slice(-300).match(/\b(?:i\s+(?:will\s+|now\s+|then\s+)?(?:call|invoke|run|use)|calling|invoking|let'?s\s+(?:call|run|use))\s+(?:the\s+)?`?([a-z][a-z0-9_]*)`?/i);
1105
+ const narratedToolCallNoAction = !(0, tool_use_parser_1.hasToolCalls)(response) &&
1106
+ !!narratedCallMatch &&
1107
+ registeredToolNames.has(narratedCallMatch[1].toLowerCase());
1162
1108
  // Empty-response retry: was previously gated to `iterations > 0`
1163
1109
  // under the assumption "empty first response = provider outage."
1164
1110
  // That assumption was wrong — with bandit-logic
@@ -1170,7 +1116,7 @@ class ToolUseLoop {
1170
1116
  // the model gets a second chance (and the thinking-off recovery
1171
1117
  // below can flip it to non-thinking mode if the second pass also
1172
1118
  // empties).
1173
- const shouldNudge = (!response.trim() || reasoningOnly || narratedButNoAction) &&
1119
+ const shouldNudge = (!response.trim() || reasoningOnly || narratedButNoAction || narratedToolCallNoAction) &&
1174
1120
  !hitLimit &&
1175
1121
  consecutiveEmptyRetries < 2 &&
1176
1122
  !thinkingOffRecoveryAttempted;
@@ -1180,16 +1126,17 @@ class ToolUseLoop {
1180
1126
  iteration: iterations,
1181
1127
  attempt: consecutiveEmptyRetries,
1182
1128
  reasoningOnly,
1183
- narratedButNoAction
1129
+ narratedButNoAction,
1130
+ narratedToolCallNoAction
1184
1131
  });
1185
- const nudgeMessage = narratedButNoAction
1132
+ const nudgeMessage = (narratedButNoAction || narratedToolCallNoAction)
1186
1133
  ? 'You announced your next step in prose ("we will search…" / "let me check…" / "use X to find Y") but did NOT emit a `<tool_call>` envelope. Announcing intent is not enough — you must actually invoke the tool. Emit the call now in this exact format, OUTSIDE of any reasoning block, with NO commentary and NO markdown fence:\n\n<tool_call>{"name":"<tool>","params":{"<key>":"<value>"}}</tool_call>\n\nReplace name/params with the right values for your task. Or, if the task is already answerable from what you know, give a final answer instead.'
1187
1134
  : reasoningOnly
1188
1135
  ? 'You completed reasoning but emitted no tool_call AND no final answer. The reasoning text alone does not run a tool — you must emit a `<tool_call>` envelope OUTSIDE the reasoning block. Format example (replace name/params for your task):\n\n<tool_call>{"name":"<tool>","params":{"<key>":"<value>"}}</tool_call>\n\nNo prose around it, no markdown fence, just the bare tag. If the task is answerable without a tool, write a complete final answer instead. Do not stop after only thinking.'
1189
1136
  : 'Your previous response was empty. Either emit a `<tool_call>{"name":"<tool>","params":{...}}</tool_call>` to invoke a tool, OR produce a complete final answer using what you have. Do not respond with an empty message.';
1190
1137
  messages.push({
1191
1138
  role: 'user',
1192
- content: nudgeMessage
1139
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + nudgeMessage
1193
1140
  });
1194
1141
  continue;
1195
1142
  }
@@ -1222,7 +1169,7 @@ class ToolUseLoop {
1222
1169
  });
1223
1170
  messages.push({
1224
1171
  role: 'user',
1225
- content: 'Switching to non-thinking mode for this attempt because reasoning-only retries exhausted. Emit either a tool_call or a complete final answer. No more reasoning preamble.'
1172
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Switching to non-thinking mode for this attempt because reasoning-only retries exhausted. Emit either a tool_call or a complete final answer. No more reasoning preamble.'
1226
1173
  });
1227
1174
  continue;
1228
1175
  }
@@ -1270,8 +1217,8 @@ class ToolUseLoop {
1270
1217
  messages.push({
1271
1218
  role: 'user',
1272
1219
  content: firstRetry
1273
- ? 'Your previous tool_call was not valid JSON — I could not parse it. Common cause: unescaped `"` characters inside a string value (for example `["", "", ""]` inside a `content` string). Retry the tool call with properly escaped JSON: every `"` inside a string value must be written as `\\"`, and every newline as `\\n`. If the content is very long, consider `replace_range` for a line-numbered block or breaking the change into smaller edits.'
1274
- : 'Your tool_call still did not parse. Do NOT retry with the same shape or the same escaping failure. Switch tactics: (a) call `replace_range` for a large block whose line numbers you just read, (b) call `write_file` for a new file, or (c) split the change into multiple small `apply_edit` calls that each target just one method or block (e.g. 3-5 lines of `find`, 5-10 lines of `replace`) instead of rewriting the whole class. Pick the smallest scope that accomplishes the next step. If you cannot produce a valid tool call, respond with a plain-prose final answer acknowledging you could not complete the edit.'
1220
+ ? tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Your previous tool_call was not valid JSON — I could not parse it. Common cause: unescaped `"` characters inside a string value (for example `["", "", ""]` inside a `content` string). Retry the tool call with properly escaped JSON: every `"` inside a string value must be written as `\\"`, and every newline as `\\n`. If the content is very long, consider `replace_range` for a line-numbered block or breaking the change into smaller edits.'
1221
+ : tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Your tool_call still did not parse. Do NOT retry with the same shape or the same escaping failure. Switch tactics: (a) call `replace_range` for a large block whose line numbers you just read, (b) call `write_file` for a new file, or (c) split the change into multiple small `apply_edit` calls that each target just one method or block (e.g. 3-5 lines of `find`, 5-10 lines of `replace`) instead of rewriting the whole class. Pick the smallest scope that accomplishes the next step. If you cannot produce a valid tool call, respond with a plain-prose final answer acknowledging you could not complete the edit.'
1275
1222
  });
1276
1223
  continue;
1277
1224
  }
@@ -1317,7 +1264,7 @@ class ToolUseLoop {
1317
1264
  });
1318
1265
  messages.push({
1319
1266
  role: 'user',
1320
- content: 'STOP deliberating. Your last response either repeated itself, contradicted itself (e.g. "Wait, I see X / Actually I\'ll try X"), or was aborted mid-stream as a loop. Do NOT continue speculating about what files might exist. Take exactly one of these actions now: (a) invoke a tool (`list_files`, `read_file`, `search_code`, etc.) to answer the question with real data, OR (b) give up and tell the user plainly that you could not complete the task and why. Do not write more than two sentences of prose before either calling a tool or terminating.'
1267
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'STOP deliberating. Your last response either repeated itself, contradicted itself (e.g. "Wait, I see X / Actually I\'ll try X"), or was aborted mid-stream as a loop. Do NOT continue speculating about what files might exist. Take exactly one of these actions now: (a) invoke a tool (`list_files`, `read_file`, `search_code`, etc.) to answer the question with real data, OR (b) give up and tell the user plainly that you could not complete the task and why. Do not write more than two sentences of prose before either calling a tool or terminating.'
1321
1268
  });
1322
1269
  recentNonToolResponses.length = 0;
1323
1270
  continue;
@@ -1407,7 +1354,7 @@ class ToolUseLoop {
1407
1354
  // without being so loud that it derails prose responses.
1408
1355
  messages.push({
1409
1356
  role: 'user',
1410
- content: 'Note: I detected a JSON todo list in your response and auto-promoted it to a todo_write call. Next time, emit `<tool_call>{"name":"todo_write","params":{"items":"..."}}</tool_call>` directly instead of pasting JSON as a code block — pasted JSON does not update your plan, only the tool call does.'
1357
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Note: I detected a JSON todo list in your response and auto-promoted it to a todo_write call. Next time, emit `<tool_call>{"name":"todo_write","params":{"items":"..."}}</tool_call>` directly instead of pasting JSON as a code block — pasted JSON does not update your plan, only the tool call does.'
1411
1358
  });
1412
1359
  iterations++;
1413
1360
  continue;
@@ -1435,7 +1382,26 @@ class ToolUseLoop {
1435
1382
  responsePreview: response.slice(0, 300)
1436
1383
  });
1437
1384
  }
1438
- const finalResponse = (0, tool_use_parser_1.stripToolCallMarkup)(response).trim();
1385
+ // Reasoning channels are streamed live by the host for display —
1386
+ // leaving them in the terminal answer double-renders them, and on
1387
+ // fabrication-retry exhaustion it prints the model's confusion
1388
+ // narrative as if it were the answer (real CLI run,
1389
+ // 2026-06-12T20-19 turn: three near-identical "the user is
1390
+ // correcting my formatting error" reasoning blocks rendered above
1391
+ // the real answer). The stall fallback below still inspects the
1392
+ // raw `response`, so reasoning-only turns keep their fallback.
1393
+ // ORDER MATTERS: reasoning channels strip FIRST. Reasoning text
1394
+ // routinely MENTIONS envelopes in backticks ("I included a
1395
+ // `<tool_result>` envelope…"); if markup stripping ran first, its
1396
+ // envelope regex would match from that in-fence mention through
1397
+ // to the real closing tag, eat the fence's closing ``` along the
1398
+ // way, and the unclosed-fence cleanup would then wipe the entire
1399
+ // rest of the answer.
1400
+ const finalResponse = (0, tool_use_parser_1.stripToolCallMarkup)(response
1401
+ .replace(/<think\b[\s\S]*?<\/think\s*>/gi, '')
1402
+ .replace(/<think\b[\s\S]*$/i, '')
1403
+ .replace(/```bandit-reasoning\b[\s\S]*?```/gi, '')
1404
+ .replace(/```bandit-reasoning\b[\s\S]*$/i, '')).trim();
1439
1405
  // False-completion detector. Small models regularly end a turn
1440
1406
  // with "I refactored the file" / "here is the updated code" text
1441
1407
  // without ever emitting a file-edit tool call.
@@ -1445,14 +1411,25 @@ class ToolUseLoop {
1445
1411
  // this turn, push one corrective user message into the loop
1446
1412
  // and continue for one more iteration. The nudge is capped at
1447
1413
  // one per turn so a truly confused model can still terminate.
1448
- if (!hitLimit && !falseCompletionNudged && editToolsInvoked === 0) {
1414
+ //
1415
+ // ONLY fires when the goal actually implies an edit. Without this
1416
+ // gate the detector demanded an edit on a purely informational
1417
+ // "tell me about this repo" turn: the model correctly said "I have
1418
+ // completed the overview" (a completion phrase), no edit ran
1419
+ // (none was asked for), so the nudge fired and replaced the good
1420
+ // markdown overview with a defensive "no edits are required"
1421
+ // answer — plus a wall of "automated harness check" reasoning.
1422
+ // An analysis goal that does NOT also imply an edit can never
1423
+ // false-complete, so skip it. (real CLI run, 2026-06-12.)
1424
+ const goalCouldExpectEdit = promptImpliesFileEdit || !promptWantsAnalysis;
1425
+ if (!hitLimit && !falseCompletionNudged && editToolsInvoked === 0 && goalCouldExpectEdit) {
1449
1426
  const claimsCompletion = FALSE_COMPLETION_PATTERNS.some(re => re.test(finalResponse));
1450
1427
  if (claimsCompletion) {
1451
1428
  falseCompletionNudged = true;
1452
1429
  emit('tool_loop:false_completion_nudge', { iteration: iterations, responsePreview: finalResponse.slice(0, 200) });
1453
1430
  messages.push({
1454
1431
  role: 'user',
1455
- content: 'Your response either claims work is done OR apologizes and asks what to do next — but I see NO successful `write_file`, `apply_edit`, `replace_range`, or `apply_patch` tool call in this turn, so nothing on disk has changed. ' +
1432
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Your response either claims work is done OR apologizes and asks what to do next — but I see NO successful `write_file`, `apply_edit`, `replace_range`, or `apply_patch` tool call in this turn, so nothing on disk has changed. ' +
1456
1433
  'Do NOT ask the user which task to resume, do NOT promise to escape JSON "in your next tool call", and do NOT defer. Either (a) emit a real edit tool call NOW with the actual change — use `replace_range` for a large block whose line numbers you just read, `apply_edit` for a small exact replacement, or `write_file` for a new file — or (b) respond honestly that you could not complete the task and briefly explain why. Retry the tool call yourself; the user cannot help you escape JSON.'
1457
1434
  });
1458
1435
  continue;
@@ -1491,7 +1468,7 @@ class ToolUseLoop {
1491
1468
  });
1492
1469
  messages.push({
1493
1470
  role: 'user',
1494
- content: `Your response describes edits to ${fileSet.size} files (${[...fileSet].slice(0, 8).join(', ')}${fileSet.size > 8 ? ', …' : ''}), but only ${editToolsInvoked} successful edit${editToolsInvoked === 1 ? '' : 's'} actually fired this turn. ` +
1471
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `Your response describes edits to ${fileSet.size} files (${[...fileSet].slice(0, 8).join(', ')}${fileSet.size > 8 ? ', …' : ''}), but only ${editToolsInvoked} successful edit${editToolsInvoked === 1 ? '' : 's'} actually fired this turn. ` +
1495
1472
  `The remaining ${fileSet.size - editToolsInvoked} file(s) were NOT modified — nothing landed on disk for them. ` +
1496
1473
  'Either (a) emit the missing `apply_edit` / `replace_range` / `write_file` tool calls now to actually do the work, OR (b) revise your response to honestly describe ONLY the edits that successfully applied. Do not summarize work that did not happen.'
1497
1474
  });
@@ -1502,7 +1479,7 @@ class ToolUseLoop {
1502
1479
  // ("break out", "split", "refactor", "extract", "move") imply
1503
1480
  // mutation of the SOURCE file the user wants restructured, not
1504
1481
  // just creation of new sibling files. Failure mode observed
1505
- // 2026-05-25 on a Portfolio React refactor: model read App.jsx,
1482
+ // 2026-05-25 on a local React refactor: model read App.jsx,
1506
1483
  // wrote 5 new component files, never touched App.jsx, declared
1507
1484
  // completion. User had to follow up "are we using these?" to
1508
1485
  // force the integration step — and even that follow-up turn
@@ -1534,7 +1511,7 @@ class ToolUseLoop {
1534
1511
  const writeCount = filesWrittenThisTurn.size;
1535
1512
  messages.push({
1536
1513
  role: 'user',
1537
- content: `The user's goal contains a refactor verb (refactor/break out/split/extract/move) which implies the SOURCE file(s) should be modified, not just supplemented with new siblings. You read ${readPreview}${readNotWritten.length > 3 ? ' and others' : ''} for context, then wrote ${writeCount} NEW file(s), but you NEVER modified the file(s) you read. The refactor is incomplete: the source file still contains the old monolithic code. ` +
1514
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `The user's goal contains a refactor verb (refactor/break out/split/extract/move) which implies the SOURCE file(s) should be modified, not just supplemented with new siblings. You read ${readPreview}${readNotWritten.length > 3 ? ' and others' : ''} for context, then wrote ${writeCount} NEW file(s), but you NEVER modified the file(s) you read. The refactor is incomplete: the source file still contains the old monolithic code. ` +
1538
1515
  `Emit the missing apply_edit/replace_range/write_file call on the source file now — it should import from the new files and drop the inlined code that's been extracted. If the refactor is genuinely a "scaffold only, leave source untouched" task, say so explicitly and explain why the source doesn't need to change.`
1539
1516
  });
1540
1517
  continue;
@@ -1576,7 +1553,7 @@ class ToolUseLoop {
1576
1553
  });
1577
1554
  messages.push({
1578
1555
  role: 'user',
1579
- content: 'You produced a substantial code block in your reply but never emitted a `write_file`, `apply_edit`, `replace_range`, or `apply_patch` tool call — so the change is NOT on disk. ' +
1556
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'You produced a substantial code block in your reply but never emitted a `write_file`, `apply_edit`, `replace_range`, or `apply_patch` tool call — so the change is NOT on disk. ' +
1580
1557
  'Do not ask the user to paste your code into a file themselves. Take exactly one of these actions now: (a) call `replace_range`, `apply_edit`, or `write_file` with the real change to the correct file, OR (b) say plainly that you could not locate the target file and explain what you searched for. Do not wrap up with another prose + code-fence response.'
1581
1558
  });
1582
1559
  continue;
@@ -1660,7 +1637,7 @@ class ToolUseLoop {
1660
1637
  });
1661
1638
  messages.push({
1662
1639
  role: 'user',
1663
- content: 'Your first response had reasoning but emitted NO tool call — that is a hard stall for a subagent (you exist to gather information; reasoning alone produces zero output). ' +
1640
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'Your first response had reasoning but emitted NO tool call — that is a hard stall for a subagent (you exist to gather information; reasoning alone produces zero output). ' +
1664
1641
  'For your next response, emit a tool call. The minimum viable starting move for ANY exploration goal is:\n\n' +
1665
1642
  '<tool_call>{"name":"list_files","params":{"path":"."}}</tool_call>\n\n' +
1666
1643
  'Copy that exact envelope as the very first thing you emit (you may keep the reasoning block before it if your model needs to think first, but the tool_call envelope MUST appear in this turn). ' +
@@ -1682,7 +1659,7 @@ class ToolUseLoop {
1682
1659
  // user saw nothing.
1683
1660
  //
1684
1661
  // The gate also covers the "regurgitated reasoning after
1685
- // native→text channel fallback" case. Mark Portfolio
1662
+ // native→text channel fallback" case. Real CLI
1686
1663
  // 2026-05-31T17-39-53 cleanup turn: native-tool path 500'd,
1687
1664
  // text-channel recovery prompted the model to re-emit its
1688
1665
  // pending action, but the model just echoed its prior
@@ -1694,12 +1671,7 @@ class ToolUseLoop {
1694
1671
  // before testing emptiness — if the response would render to
1695
1672
  // the user as nothing-actionable, the fallback fires and the
1696
1673
  // user sees what the model was thinking instead of silence.
1697
- const reasoningStripped = response
1698
- .replace(/<think\b[\s\S]*?<\/think\s*>/gi, '')
1699
- .replace(/<think\b[\s\S]*$/i, '')
1700
- .replace(/```bandit-reasoning\b[\s\S]*?```/gi, '')
1701
- .replace(/```bandit-reasoning\b[\s\S]*$/i, '')
1702
- .trim();
1674
+ const reasoningStripped = (0, tool_use_parser_1.stripToAnswerContent)(response);
1703
1675
  const visibleAfterStrip = (0, tool_use_parser_1.stripToolCallMarkup)(reasoningStripped).trim();
1704
1676
  if (!visibleAfterStrip) {
1705
1677
  // Pull the last 1-2 sentences of reasoning so the user sees
@@ -1725,7 +1697,7 @@ class ToolUseLoop {
1725
1697
  // and the inline empty-retry / narrate-no-action detector
1726
1698
  // already used its retry budget (consecutiveEmptyRetries >= 2)
1727
1699
  // so it couldn't nudge again, the user is left reading a
1728
- // promise the model never kept. Mark Portfolio
1700
+ // promise the model never kept. Real CLI
1729
1701
  // 2026-05-31T17-39-53 cleanup turn: after a native→text channel
1730
1702
  // recovery, the model emitted "Let me revert it:" with a
1731
1703
  // dangling colon and no tool call; the user saw the prose end
@@ -1742,13 +1714,13 @@ class ToolUseLoop {
1742
1714
  // The trailing colon + intent phrase combination is the
1743
1715
  // smoking gun. We DON'T also require NARRATE_VERB_RE here:
1744
1716
  // the existing inline detector's verb list misses "revert"
1745
- // (Portfolio 2026-05-31) and would miss any other one-off
1717
+ // (real run 2026-05-31) and would miss any other one-off
1746
1718
  // action verb a model might use. The colon alone is rare
1747
1719
  // enough in a legit final answer that pairing it with
1748
1720
  // "let me" / "I'll" / "we'll" / etc. is specific enough.
1749
1721
  //
1750
- // Period-terminated variant (added 2026-06-03 after Mark's
1751
- // gregoryhite-site run): the model ended with "Let me fix
1722
+ // Period-terminated variant (added 2026-06-03 after a real
1723
+ // run): the model ended with "Let me fix
1752
1724
  // all three project cards at once." — full sentence, full
1753
1725
  // stop, no colon. Both prefill and thinking-off recovery
1754
1726
  // had been spent earlier in the turn so the user saw the
@@ -1868,7 +1840,7 @@ class ToolUseLoop {
1868
1840
  toolCalls = [];
1869
1841
  messages.push({
1870
1842
  role: 'user',
1871
- content: `You have revised the plan in ${consecutiveTodoOnlyIterations + 1} consecutive iterations without executing any step. ` +
1843
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `You have revised the plan in ${consecutiveTodoOnlyIterations + 1} consecutive iterations without executing any step. ` +
1872
1844
  'Execute the first pending task now using a concrete tool — `search_code`, `read_file`, `apply_edit`, `replace_range`, `write_file`, or `run_command`. ' +
1873
1845
  'Once a task is actually DONE (tool call succeeded), you may call `todo_write` again to mark it completed — but not to re-plan. ' +
1874
1846
  'If you cannot identify a next step, respond to the user with a short honest explanation and stop.'
@@ -1893,7 +1865,7 @@ class ToolUseLoop {
1893
1865
  });
1894
1866
  messages.push({
1895
1867
  role: 'user',
1896
- content: `You have spent ${consecutiveApplyEditOnlyIterations} consecutive iterations on apply_edit alone. ` +
1868
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `You have spent ${consecutiveApplyEditOnlyIterations} consecutive iterations on apply_edit alone. ` +
1897
1869
  'If these are mechanical fixes of the same shape (one type annotation, one rename, one import path, one missing semicolon per call), STOP doing them one at a time — you will exhaust the iteration budget before the file is clean.\n' +
1898
1870
  '\n' +
1899
1871
  'Better tactics, in order of preference:\n' +
@@ -2007,7 +1979,7 @@ class ToolUseLoop {
2007
1979
  });
2008
1980
  messages.push({
2009
1981
  role: 'user',
2010
- content: `You just spawned ${bgSpawns.length} background subagents:\n${goalLines}\n\n` +
1982
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + `You just spawned ${bgSpawns.length} background subagents:\n${goalLines}\n\n` +
2011
1983
  'Do NOT do those same explorations yourself in the next iteration — the subagents will deliver their synopses via the auto-inject path on a later turn. ' +
2012
1984
  'Choose ONE of: ' +
2013
1985
  '(a) work on a different, independent piece of the task that those subagents are NOT covering, ' +
@@ -2048,7 +2020,7 @@ class ToolUseLoop {
2048
2020
  });
2049
2021
  messages.push({
2050
2022
  role: 'user',
2051
- content: 'You set up a plan with `todo_write` earlier but have since completed ' +
2023
+ content: tool_use_parser_1.AUTOMATED_NUDGE_PREFIX + 'You set up a plan with `todo_write` earlier but have since completed ' +
2052
2024
  `${editsSinceLastTodo} edit${editsSinceLastTodo === 1 ? '' : 's'} without updating it. ` +
2053
2025
  'Call `todo_write` now with the current status — mark finished items as `completed` and leave remaining items as `pending`. ' +
2054
2026
  "The Plan block in the user's UI mirrors your last `todo_write`, so skipping this leaves them looking at a stale checklist while real work has landed."