@miller-tech/uap 1.20.23 → 1.20.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -4581,6 +4581,34 @@ async def _apply_malformed_tool_guardrail(
|
|
|
4581
4581
|
)
|
|
4582
4582
|
current_issue = retry_issue
|
|
4583
4583
|
|
|
4584
|
+
# Option 2 (PR #154): When retries exhaust during review phase, reset to
|
|
4585
|
+
# bootstrap instead of returning guardrail fallback. This re-enables all
|
|
4586
|
+
# tools (including previously excluded cycling ones) and gives the model
|
|
4587
|
+
# a clean shot. The cycle detector will catch re-cycling if it recurs.
|
|
4588
|
+
if monitor.tool_turn_phase == "review":
|
|
4589
|
+
logger.warning(
|
|
4590
|
+
"TOOL RESPONSE review-phase reset: session=%s retries exhausted in review "
|
|
4591
|
+
"(kind=%s malformed=%d), resetting to bootstrap for fresh attempt",
|
|
4592
|
+
session_id,
|
|
4593
|
+
current_issue.kind or issue.kind,
|
|
4594
|
+
monitor.malformed_tool_streak,
|
|
4595
|
+
)
|
|
4596
|
+
monitor.reset_tool_turn_state(reason="review_retry_exhausted")
|
|
4597
|
+
monitor.malformed_tool_streak = 0
|
|
4598
|
+
monitor.invalid_tool_call_streak = 0
|
|
4599
|
+
# Return the best response we have — even if degraded — to keep
|
|
4600
|
+
# the conversation moving rather than returning a guardrail stub.
|
|
4601
|
+
degraded_text = _sanitize_tool_call_apology_text(
|
|
4602
|
+
_openai_message_text(working_resp)
|
|
4603
|
+
).strip()
|
|
4604
|
+
if degraded_text and not _looks_malformed_tool_payload(degraded_text):
|
|
4605
|
+
return _build_safe_text_openai_response(
|
|
4606
|
+
working_resp, degraded_text, finish_reason="tool_calls",
|
|
4607
|
+
)
|
|
4608
|
+
return _build_clean_guardrail_openai_response(
|
|
4609
|
+
working_resp, finish_reason="tool_calls",
|
|
4610
|
+
)
|
|
4611
|
+
|
|
4584
4612
|
logger.error(
|
|
4585
4613
|
"TOOL RESPONSE issue persisted after retries (session=%s kind=%s malformed=%d invalid=%d required_miss=%d); returning clean guardrail response",
|
|
4586
4614
|
session_id,
|
|
@@ -4178,3 +4178,45 @@ class TestFinalizePingPongFix(unittest.TestCase):
|
|
|
4178
4178
|
proxy.TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = old_compat
|
|
4179
4179
|
proxy.PROXY_TOOL_CALL_GRAMMAR = old_flag
|
|
4180
4180
|
proxy.TOOL_CALL_GBNF = old_gbnf
|
|
4181
|
+
|
|
4182
|
+
|
|
4183
|
+
class TestReviewPhaseBootstrapReset(unittest.TestCase):
|
|
4184
|
+
"""Tests for bootstrap reset after exhausted retries in review phase (PR #154)."""
|
|
4185
|
+
|
|
4186
|
+
def _make_monitor_in_review(self):
|
|
4187
|
+
m = proxy.SessionMonitor()
|
|
4188
|
+
m.set_tool_turn_phase("review", reason="test")
|
|
4189
|
+
m.malformed_tool_streak = 3
|
|
4190
|
+
m.invalid_tool_call_streak = 0
|
|
4191
|
+
return m
|
|
4192
|
+
|
|
4193
|
+
def _make_monitor_in_act(self):
|
|
4194
|
+
m = proxy.SessionMonitor()
|
|
4195
|
+
m.set_tool_turn_phase("act", reason="test")
|
|
4196
|
+
m.malformed_tool_streak = 3
|
|
4197
|
+
return m
|
|
4198
|
+
|
|
4199
|
+
def test_review_phase_resets_to_bootstrap(self):
|
|
4200
|
+
"""After retries exhaust in review, monitor resets to bootstrap."""
|
|
4201
|
+
m = self._make_monitor_in_review()
|
|
4202
|
+
self.assertEqual(m.tool_turn_phase, "review")
|
|
4203
|
+
self.assertEqual(m.malformed_tool_streak, 3)
|
|
4204
|
+
|
|
4205
|
+
# Simulate what happens after retry exhaustion: the code checks
|
|
4206
|
+
# monitor.tool_turn_phase == "review" and resets
|
|
4207
|
+
if m.tool_turn_phase == "review":
|
|
4208
|
+
m.reset_tool_turn_state(reason="review_retry_exhausted")
|
|
4209
|
+
m.malformed_tool_streak = 0
|
|
4210
|
+
m.invalid_tool_call_streak = 0
|
|
4211
|
+
|
|
4212
|
+
self.assertEqual(m.tool_turn_phase, "bootstrap")
|
|
4213
|
+
self.assertEqual(m.malformed_tool_streak, 0)
|
|
4214
|
+
self.assertEqual(m.tool_state_stagnation_streak, 0)
|
|
4215
|
+
self.assertEqual(m.cycling_tool_names, [])
|
|
4216
|
+
|
|
4217
|
+
def test_act_phase_does_not_reset(self):
|
|
4218
|
+
"""In act phase, retries exhaustion should NOT trigger bootstrap reset."""
|
|
4219
|
+
m = self._make_monitor_in_act()
|
|
4220
|
+
# The bootstrap reset only triggers for review phase
|
|
4221
|
+
self.assertNotEqual(m.tool_turn_phase, "review")
|
|
4222
|
+
# In act phase, the normal guardrail fallback path runs instead
|