@miller-tech/uap 1.20.23 → 1.20.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.23",
3
+ "version": "1.20.24",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -4581,6 +4581,34 @@ async def _apply_malformed_tool_guardrail(
4581
4581
  )
4582
4582
  current_issue = retry_issue
4583
4583
 
4584
+ # Option 2 (PR #154): When retries exhaust during review phase, reset to
4585
+ # bootstrap instead of returning guardrail fallback. This re-enables all
4586
+ # tools (including previously excluded cycling ones) and gives the model
4587
+ # a clean shot. The cycle detector will catch re-cycling if it recurs.
4588
+ if monitor.tool_turn_phase == "review":
4589
+ logger.warning(
4590
+ "TOOL RESPONSE review-phase reset: session=%s retries exhausted in review "
4591
+ "(kind=%s malformed=%d), resetting to bootstrap for fresh attempt",
4592
+ session_id,
4593
+ current_issue.kind or issue.kind,
4594
+ monitor.malformed_tool_streak,
4595
+ )
4596
+ monitor.reset_tool_turn_state(reason="review_retry_exhausted")
4597
+ monitor.malformed_tool_streak = 0
4598
+ monitor.invalid_tool_call_streak = 0
4599
+ # Return the best response we have — even if degraded — to keep
4600
+ # the conversation moving rather than returning a guardrail stub.
4601
+ degraded_text = _sanitize_tool_call_apology_text(
4602
+ _openai_message_text(working_resp)
4603
+ ).strip()
4604
+ if degraded_text and not _looks_malformed_tool_payload(degraded_text):
4605
+ return _build_safe_text_openai_response(
4606
+ working_resp, degraded_text, finish_reason="tool_calls",
4607
+ )
4608
+ return _build_clean_guardrail_openai_response(
4609
+ working_resp, finish_reason="tool_calls",
4610
+ )
4611
+
4584
4612
  logger.error(
4585
4613
  "TOOL RESPONSE issue persisted after retries (session=%s kind=%s malformed=%d invalid=%d required_miss=%d); returning clean guardrail response",
4586
4614
  session_id,
@@ -4178,3 +4178,45 @@ class TestFinalizePingPongFix(unittest.TestCase):
4178
4178
  proxy.TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = old_compat
4179
4179
  proxy.PROXY_TOOL_CALL_GRAMMAR = old_flag
4180
4180
  proxy.TOOL_CALL_GBNF = old_gbnf
4181
+
4182
+
4183
+ class TestReviewPhaseBootstrapReset(unittest.TestCase):
4184
+ """Tests for bootstrap reset after exhausted retries in review phase (PR #154)."""
4185
+
4186
+ def _make_monitor_in_review(self):
4187
+ m = proxy.SessionMonitor()
4188
+ m.set_tool_turn_phase("review", reason="test")
4189
+ m.malformed_tool_streak = 3
4190
+ m.invalid_tool_call_streak = 0
4191
+ return m
4192
+
4193
+ def _make_monitor_in_act(self):
4194
+ m = proxy.SessionMonitor()
4195
+ m.set_tool_turn_phase("act", reason="test")
4196
+ m.malformed_tool_streak = 3
4197
+ return m
4198
+
4199
+ def test_review_phase_resets_to_bootstrap(self):
4200
+ """After retries exhaust in review, monitor resets to bootstrap."""
4201
+ m = self._make_monitor_in_review()
4202
+ self.assertEqual(m.tool_turn_phase, "review")
4203
+ self.assertEqual(m.malformed_tool_streak, 3)
4204
+
4205
+ # Simulate what happens after retry exhaustion: the code checks
4206
+ # monitor.tool_turn_phase == "review" and resets
4207
+ if m.tool_turn_phase == "review":
4208
+ m.reset_tool_turn_state(reason="review_retry_exhausted")
4209
+ m.malformed_tool_streak = 0
4210
+ m.invalid_tool_call_streak = 0
4211
+
4212
+ self.assertEqual(m.tool_turn_phase, "bootstrap")
4213
+ self.assertEqual(m.malformed_tool_streak, 0)
4214
+ self.assertEqual(m.tool_state_stagnation_streak, 0)
4215
+ self.assertEqual(m.cycling_tool_names, [])
4216
+
4217
+ def test_act_phase_does_not_reset(self):
4218
+ """In act phase, retries exhaustion should NOT trigger bootstrap reset."""
4219
+ m = self._make_monitor_in_act()
4220
+ # The bootstrap reset only triggers for review phase
4221
+ self.assertNotEqual(m.tool_turn_phase, "review")
4222
+ # In act phase, the normal guardrail fallback path runs instead