@miller-tech/uap 1.15.12 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.15.12",
3
+ "version": "1.16.0",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -122,6 +122,41 @@ PROXY_NO_PROGRESS_THRESHOLD = int(os.environ.get("PROXY_NO_PROGRESS_THRESHOLD",
122
122
  PROXY_CONTEXT_RELEASE_THRESHOLD = float(
123
123
  os.environ.get("PROXY_CONTEXT_RELEASE_THRESHOLD", "0.90")
124
124
  )
125
+ PROXY_TOOL_STATE_MACHINE = os.environ.get(
126
+ "PROXY_TOOL_STATE_MACHINE", "on"
127
+ ).lower() not in {
128
+ "0",
129
+ "false",
130
+ "off",
131
+ "no",
132
+ }
133
+ PROXY_TOOL_STATE_MIN_MESSAGES = int(
134
+ os.environ.get("PROXY_TOOL_STATE_MIN_MESSAGES", "6")
135
+ )
136
+ PROXY_TOOL_STATE_FORCED_BUDGET = int(
137
+ os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "24")
138
+ )
139
+ PROXY_TOOL_STATE_AUTO_BUDGET = int(os.environ.get("PROXY_TOOL_STATE_AUTO_BUDGET", "2"))
140
+ PROXY_TOOL_STATE_STAGNATION_THRESHOLD = int(
141
+ os.environ.get("PROXY_TOOL_STATE_STAGNATION_THRESHOLD", "12")
142
+ )
143
+ PROXY_TOOL_STATE_CYCLE_WINDOW = int(
144
+ os.environ.get("PROXY_TOOL_STATE_CYCLE_WINDOW", "8")
145
+ )
146
+ PROXY_TOOL_STATE_FINALIZE_THRESHOLD = int(
147
+ os.environ.get("PROXY_TOOL_STATE_FINALIZE_THRESHOLD", "24")
148
+ )
149
+ PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT = int(
150
+ os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "3")
151
+ )
152
+ PROXY_TOOL_NARROWING_EXPAND_ON_LOOP = os.environ.get(
153
+ "PROXY_TOOL_NARROWING_EXPAND_ON_LOOP", "on"
154
+ ).lower() not in {
155
+ "0",
156
+ "false",
157
+ "off",
158
+ "no",
159
+ }
125
160
  PROXY_GUARDRAIL_RETRY = os.environ.get("PROXY_GUARDRAIL_RETRY", "on").lower() not in {
126
161
  "0",
127
162
  "false",
@@ -404,6 +439,14 @@ class SessionMonitor:
404
439
  forced_dampener_triggers: int = 0 # number of dampener activations
405
440
  arg_preflight_rejections: int = 0 # rejected tool calls from arg preflight
406
441
  arg_preflight_repairs: int = 0 # sanitized tool call args accepted
442
+ tool_turn_phase: str = "bootstrap" # bootstrap -> act -> review
443
+ tool_state_forced_budget_remaining: int = 0
444
+ tool_state_auto_budget_remaining: int = 0
445
+ tool_state_stagnation_streak: int = 0
446
+ tool_state_transitions: int = 0
447
+ tool_state_review_cycles: int = 0
448
+ last_tool_fingerprint: str = ""
449
+ finalize_turn_active: bool = False
407
450
  last_seen_ts: float = 0.0
408
451
 
409
452
  def record_request(self, estimated_tokens: int):
@@ -540,6 +583,69 @@ class SessionMonitor:
540
583
 
541
584
  return False, 0
542
585
 
586
+ def detect_tool_cycle(self, window: int = 8) -> tuple[bool, int]:
587
+ """Detect low-entropy tool cycles (A/B oscillation style loops)."""
588
+ if len(self.tool_call_history) < window:
589
+ return False, 0
590
+
591
+ recent = [fp for fp in self.tool_call_history[-window:] if fp]
592
+ if len(recent) < window:
593
+ return False, 0
594
+
595
+ unique = list(dict.fromkeys(recent))
596
+ if len(unique) == 1:
597
+ target = unique[0]
598
+ count = 0
599
+ for fp in reversed(self.tool_call_history):
600
+ if fp == target:
601
+ count += 1
602
+ else:
603
+ break
604
+ return True, count
605
+
606
+ if len(unique) > 2:
607
+ return False, 0
608
+
609
+ counts: dict[str, int] = {}
610
+ for fp in recent:
611
+ counts[fp] = counts.get(fp, 0) + 1
612
+ if counts and min(counts.values()) < 2:
613
+ return False, 0
614
+
615
+ transitions = sum(1 for a, b in zip(recent, recent[1:]) if a != b)
616
+ if transitions < window // 2:
617
+ return False, 0
618
+
619
+ allowed = set(counts.keys())
620
+ count = 0
621
+ for fp in reversed(self.tool_call_history):
622
+ if fp in allowed:
623
+ count += 1
624
+ else:
625
+ break
626
+ return True, count
627
+
628
+ def set_tool_turn_phase(self, phase: str, reason: str = ""):
629
+ if phase == self.tool_turn_phase:
630
+ return
631
+ old_phase = self.tool_turn_phase
632
+ self.tool_turn_phase = phase
633
+ self.tool_state_transitions += 1
634
+ logger.info(
635
+ "TOOL STATE MACHINE: phase %s -> %s%s",
636
+ old_phase,
637
+ phase,
638
+ f" reason={reason}" if reason else "",
639
+ )
640
+
641
+ def reset_tool_turn_state(self, reason: str = ""):
642
+ self.set_tool_turn_phase("bootstrap", reason=reason)
643
+ self.tool_state_forced_budget_remaining = 0
644
+ self.tool_state_auto_budget_remaining = 0
645
+ self.tool_state_stagnation_streak = 0
646
+ self.tool_state_review_cycles = 0
647
+ self.last_tool_fingerprint = ""
648
+
543
649
  def guardrail_streak(self) -> int:
544
650
  """Highest current streak among malformed/invalid tool outputs."""
545
651
  return max(self.malformed_tool_streak, self.invalid_tool_call_streak)
@@ -602,6 +708,9 @@ class SessionMonitor:
602
708
  return False
603
709
 
604
710
  is_looping, repeat_count = self.detect_tool_loop(window=PROXY_LOOP_WINDOW)
711
+ cycle_looping, cycle_repeat = self.detect_tool_cycle(
712
+ window=max(PROXY_LOOP_WINDOW, PROXY_TOOL_STATE_CYCLE_WINDOW)
713
+ )
605
714
 
606
715
  # Pattern 1: Detected tool call loop
607
716
  if (
@@ -618,6 +727,20 @@ class SessionMonitor:
618
727
  self.loop_warnings_emitted += 1
619
728
  return True
620
729
 
730
+ if (
731
+ cycle_looping
732
+ and cycle_repeat >= PROXY_LOOP_REPEAT_THRESHOLD
733
+ and self.tool_state_stagnation_streak >= max(1, PROXY_NO_PROGRESS_THRESHOLD)
734
+ ):
735
+ logger.warning(
736
+ "LOOP BREAKER: low-entropy tool cycle repeated %d turns with stagnation=%d. "
737
+ "Releasing tool_choice to 'auto'.",
738
+ cycle_repeat,
739
+ self.tool_state_stagnation_streak,
740
+ )
741
+ self.loop_warnings_emitted += 1
742
+ return True
743
+
621
744
  # Pattern 2: Too many consecutive forced requests
622
745
  if (
623
746
  self.consecutive_forced_count >= PROXY_FORCED_THRESHOLD
@@ -979,13 +1102,22 @@ async def lifespan(app: FastAPI):
979
1102
  _resolve_prune_target_fraction() * 100,
980
1103
  )
981
1104
  logger.info(
982
- "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s tools_compatible=%s path=%s)",
1105
+ "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s expand_on_loop=%s thinking_off_on_tools=%s state_machine=%s(min_msgs=%d forced=%d auto=%d stagnation=%d cycle=%d finalize=%d review_cycles=%d) dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s tools_compatible=%s path=%s)",
983
1106
  PROXY_MALFORMED_TOOL_GUARDRAIL,
984
1107
  PROXY_MALFORMED_TOOL_STREAM_STRICT,
985
1108
  PROXY_FORCE_NON_STREAM,
986
1109
  PROXY_TOOL_ARGS_PREFLIGHT,
987
1110
  PROXY_TOOL_NARROWING,
1111
+ PROXY_TOOL_NARROWING_EXPAND_ON_LOOP,
988
1112
  PROXY_DISABLE_THINKING_ON_TOOL_TURNS,
1113
+ PROXY_TOOL_STATE_MACHINE,
1114
+ PROXY_TOOL_STATE_MIN_MESSAGES,
1115
+ PROXY_TOOL_STATE_FORCED_BUDGET,
1116
+ PROXY_TOOL_STATE_AUTO_BUDGET,
1117
+ PROXY_TOOL_STATE_STAGNATION_THRESHOLD,
1118
+ PROXY_TOOL_STATE_CYCLE_WINDOW,
1119
+ PROXY_TOOL_STATE_FINALIZE_THRESHOLD,
1120
+ PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT,
989
1121
  PROXY_FORCED_TOOL_DAMPENER,
990
1122
  PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED,
991
1123
  PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK,
@@ -1343,6 +1475,14 @@ def _last_user_has_tool_result(anthropic_body: dict) -> bool:
1343
1475
  return False
1344
1476
 
1345
1477
 
1478
+ def _conversation_has_tool_results(anthropic_body: dict) -> bool:
1479
+ return any(
1480
+ _message_has_tool_result(msg.get("content"))
1481
+ for msg in anthropic_body.get("messages", [])
1482
+ if isinstance(msg, dict)
1483
+ )
1484
+
1485
+
1346
1486
  def _sanitize_tool_schema_for_llama(schema):
1347
1487
  """Remove JSON Schema keywords that generate unsupported regex grammar.
1348
1488
 
@@ -1351,19 +1491,28 @@ def _sanitize_tool_schema_for_llama(schema):
1351
1491
  """
1352
1492
 
1353
1493
  removed = 0
1494
+ property_map_keys = {"properties", "definitions", "$defs", "dependentSchemas"}
1354
1495
 
1355
- def _walk(node):
1496
+ def _walk(node, parent_key=None):
1356
1497
  nonlocal removed
1357
1498
  if isinstance(node, dict):
1358
1499
  cleaned = {}
1359
1500
  for key, value in node.items():
1360
- if key in {"pattern", "patternProperties"}:
1501
+ key_is_property_name = parent_key in property_map_keys
1502
+ if (
1503
+ key == "pattern"
1504
+ and isinstance(value, str)
1505
+ and not key_is_property_name
1506
+ ):
1507
+ removed += 1
1508
+ continue
1509
+ if key == "patternProperties" and not key_is_property_name:
1361
1510
  removed += 1
1362
1511
  continue
1363
- cleaned[key] = _walk(value)
1512
+ cleaned[key] = _walk(value, key)
1364
1513
  return cleaned
1365
1514
  if isinstance(node, list):
1366
- return [_walk(item) for item in node]
1515
+ return [_walk(item, parent_key) for item in node]
1367
1516
  return node
1368
1517
 
1369
1518
  return _walk(schema), removed
@@ -1424,6 +1573,18 @@ def _narrow_tools_for_request(
1424
1573
  query_text = _latest_user_text(anthropic_body).lower()
1425
1574
  query_tokens = _tokenize_for_tool_ranking(query_text)
1426
1575
  if not query_tokens:
1576
+ n_msgs = len(anthropic_body.get("messages", []))
1577
+ if (
1578
+ PROXY_TOOL_NARROWING_EXPAND_ON_LOOP
1579
+ and _conversation_has_tool_results(anthropic_body)
1580
+ and n_msgs >= 3
1581
+ ):
1582
+ logger.info(
1583
+ "TOOL NARROWING: %d tools retained (no query tokens during active loop)",
1584
+ len(openai_tools),
1585
+ )
1586
+ return openai_tools
1587
+
1427
1588
  narrowed = openai_tools[:keep]
1428
1589
  logger.info(
1429
1590
  "TOOL NARROWING: %d -> %d tools (no query tokens)",
@@ -1461,6 +1622,182 @@ def _narrow_tools_for_request(
1461
1622
  return narrowed
1462
1623
 
1463
1624
 
1625
+ def _update_tool_state_stagnation(
1626
+ monitor: SessionMonitor,
1627
+ latest_tool_fingerprint: str,
1628
+ last_user_has_tool_result: bool,
1629
+ ) -> None:
1630
+ if not PROXY_TOOL_STATE_MACHINE:
1631
+ return
1632
+
1633
+ if not latest_tool_fingerprint or not last_user_has_tool_result:
1634
+ monitor.tool_state_stagnation_streak = 0
1635
+ monitor.last_tool_fingerprint = latest_tool_fingerprint
1636
+ return
1637
+
1638
+ repeated = latest_tool_fingerprint == monitor.last_tool_fingerprint
1639
+ recently_seen = latest_tool_fingerprint in monitor.tool_call_history[-4:-1]
1640
+
1641
+ if repeated or recently_seen:
1642
+ monitor.tool_state_stagnation_streak += 1
1643
+ else:
1644
+ monitor.tool_state_stagnation_streak = 0
1645
+
1646
+ monitor.last_tool_fingerprint = latest_tool_fingerprint
1647
+
1648
+
1649
+ def _resolve_state_machine_tool_choice(
1650
+ anthropic_body: dict,
1651
+ monitor: SessionMonitor,
1652
+ has_tool_results: bool,
1653
+ last_user_has_tool_result: bool,
1654
+ ) -> tuple[str | None, str]:
1655
+ if not PROXY_TOOL_STATE_MACHINE:
1656
+ return None, "disabled"
1657
+
1658
+ n_msgs = len(anthropic_body.get("messages", []))
1659
+ latest_user_text = _latest_user_text(anthropic_body).strip()
1660
+ if latest_user_text and not last_user_has_tool_result:
1661
+ monitor.tool_call_history = []
1662
+ if n_msgs <= 1:
1663
+ monitor.forced_auto_cooldown_turns = 0
1664
+ monitor.consecutive_forced_count = 0
1665
+ monitor.no_progress_streak = 0
1666
+ monitor.malformed_tool_streak = 0
1667
+ monitor.invalid_tool_call_streak = 0
1668
+ monitor.required_tool_miss_streak = 0
1669
+ monitor.reset_tool_turn_state(reason="fresh_user_text")
1670
+ return None, "fresh_user_text"
1671
+
1672
+ active_loop = (
1673
+ has_tool_results
1674
+ and last_user_has_tool_result
1675
+ and n_msgs >= max(3, PROXY_TOOL_STATE_MIN_MESSAGES)
1676
+ )
1677
+ if not active_loop:
1678
+ if not has_tool_results:
1679
+ monitor.tool_call_history = []
1680
+ if n_msgs <= 1:
1681
+ monitor.forced_auto_cooldown_turns = 0
1682
+ monitor.consecutive_forced_count = 0
1683
+ monitor.no_progress_streak = 0
1684
+ monitor.malformed_tool_streak = 0
1685
+ monitor.invalid_tool_call_streak = 0
1686
+ monitor.required_tool_miss_streak = 0
1687
+ monitor.reset_tool_turn_state(reason="inactive_loop")
1688
+ return None, "inactive_loop"
1689
+
1690
+ if monitor.tool_turn_phase == "bootstrap":
1691
+ monitor.set_tool_turn_phase("act", reason="loop_detected")
1692
+ monitor.tool_state_forced_budget_remaining = max(
1693
+ 1, PROXY_TOOL_STATE_FORCED_BUDGET
1694
+ )
1695
+ monitor.tool_state_auto_budget_remaining = 0
1696
+
1697
+ cycle_looping, cycle_repeat = monitor.detect_tool_cycle(
1698
+ window=max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
1699
+ )
1700
+ stagnating = monitor.tool_state_stagnation_streak >= max(
1701
+ 1, PROXY_TOOL_STATE_STAGNATION_THRESHOLD
1702
+ )
1703
+ finalize_threshold = max(
1704
+ max(1, PROXY_TOOL_STATE_FINALIZE_THRESHOLD),
1705
+ max(1, PROXY_TOOL_STATE_STAGNATION_THRESHOLD) * 2,
1706
+ )
1707
+ review_cycle_limit = max(1, PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT)
1708
+
1709
+ if cycle_looping and monitor.tool_state_stagnation_streak >= finalize_threshold:
1710
+ monitor.set_tool_turn_phase("finalize", reason="stagnation_limit")
1711
+ monitor.tool_state_auto_budget_remaining = 1
1712
+ logger.warning(
1713
+ "TOOL STATE MACHINE: forcing finalize turn after prolonged cycle (repeat=%d stagnation=%d)",
1714
+ cycle_repeat,
1715
+ monitor.tool_state_stagnation_streak,
1716
+ )
1717
+ return "finalize", "stagnation_limit"
1718
+
1719
+ if (
1720
+ monitor.tool_turn_phase in {"act", "review"}
1721
+ and monitor.tool_state_review_cycles >= review_cycle_limit
1722
+ ):
1723
+ monitor.set_tool_turn_phase("finalize", reason="review_cycle_limit")
1724
+ monitor.tool_state_auto_budget_remaining = 1
1725
+ logger.warning(
1726
+ "TOOL STATE MACHINE: forcing finalize turn after repeated review cycles (cycles=%d stagnation=%d)",
1727
+ monitor.tool_state_review_cycles,
1728
+ monitor.tool_state_stagnation_streak,
1729
+ )
1730
+ return "finalize", "review_cycle_limit"
1731
+
1732
+ if monitor.tool_turn_phase == "act":
1733
+ if cycle_looping or stagnating:
1734
+ reason = "cycle_detected" if cycle_looping else "stagnation"
1735
+ monitor.set_tool_turn_phase("review", reason=reason)
1736
+ monitor.tool_state_review_cycles += 1
1737
+ monitor.tool_state_auto_budget_remaining = max(
1738
+ 1, PROXY_TOOL_STATE_AUTO_BUDGET
1739
+ )
1740
+ monitor.tool_state_forced_budget_remaining = max(
1741
+ 1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
1742
+ )
1743
+ logger.warning(
1744
+ "TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d)",
1745
+ cycle_looping,
1746
+ cycle_repeat,
1747
+ monitor.tool_state_stagnation_streak,
1748
+ monitor.tool_state_review_cycles,
1749
+ )
1750
+ return "auto", reason
1751
+
1752
+ if monitor.tool_state_forced_budget_remaining <= 0:
1753
+ monitor.set_tool_turn_phase("review", reason="forced_budget_exhausted")
1754
+ monitor.tool_state_review_cycles += 1
1755
+ monitor.tool_state_auto_budget_remaining = max(
1756
+ 1, PROXY_TOOL_STATE_AUTO_BUDGET
1757
+ )
1758
+ monitor.tool_state_forced_budget_remaining = max(
1759
+ 1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
1760
+ )
1761
+ logger.warning(
1762
+ "TOOL STATE MACHINE: forced budget exhausted, entering review (cycles=%d)",
1763
+ monitor.tool_state_review_cycles,
1764
+ )
1765
+ return "auto", "forced_budget_exhausted"
1766
+
1767
+ monitor.tool_state_forced_budget_remaining -= 1
1768
+ return "required", "act"
1769
+
1770
+ if monitor.tool_turn_phase == "review":
1771
+ if monitor.tool_state_auto_budget_remaining <= 0:
1772
+ monitor.set_tool_turn_phase("act", reason="review_budget_spent")
1773
+ monitor.tool_state_forced_budget_remaining = max(
1774
+ 1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
1775
+ )
1776
+ return "required", "review_complete"
1777
+
1778
+ monitor.tool_state_auto_budget_remaining -= 1
1779
+ if monitor.tool_state_auto_budget_remaining == 0:
1780
+ monitor.set_tool_turn_phase("act", reason="review_budget_spent")
1781
+ monitor.tool_state_forced_budget_remaining = max(
1782
+ 1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
1783
+ )
1784
+ return "required", "review_complete"
1785
+ return "auto", "review"
1786
+
1787
+ if monitor.tool_turn_phase == "finalize":
1788
+ if monitor.tool_state_auto_budget_remaining <= 0:
1789
+ monitor.reset_tool_turn_state(reason="finalize_complete")
1790
+ return None, "finalize_complete"
1791
+
1792
+ monitor.tool_state_auto_budget_remaining -= 1
1793
+ if monitor.tool_state_auto_budget_remaining == 0:
1794
+ monitor.reset_tool_turn_state(reason="finalize_complete")
1795
+ return "finalize", "finalize"
1796
+
1797
+ monitor.reset_tool_turn_state(reason="unknown_phase")
1798
+ return None, "unknown_phase"
1799
+
1800
+
1464
1801
  def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
1465
1802
  """Build an OpenAI Chat Completions request from an Anthropic Messages request."""
1466
1803
  openai_body = {
@@ -1489,10 +1826,26 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
1489
1826
  )
1490
1827
 
1491
1828
  if "max_tokens" in anthropic_body:
1829
+ requested_raw = max(1, int(anthropic_body["max_tokens"]))
1830
+
1492
1831
  # Enforce configurable minimum floor for thinking mode: model needs
1493
1832
  # tokens for reasoning (<think>...</think>) plus actual response/tool
1494
1833
  # calls. Set PROXY_MAX_TOKENS_FLOOR=0 to disable this floor.
1495
- requested_max = _resolve_max_tokens_request(anthropic_body["max_tokens"])
1834
+ floor_bypassed_for_tool_turn = (
1835
+ has_tools
1836
+ and PROXY_DISABLE_THINKING_ON_TOOL_TURNS
1837
+ and PROXY_MAX_TOKENS_FLOOR > 0
1838
+ )
1839
+ if floor_bypassed_for_tool_turn:
1840
+ requested_max = requested_raw
1841
+ if requested_raw < PROXY_MAX_TOKENS_FLOOR:
1842
+ logger.info(
1843
+ "MAX_TOKENS floor bypassed for tool turn with thinking disabled: requested=%d floor=%d",
1844
+ requested_raw,
1845
+ PROXY_MAX_TOKENS_FLOOR,
1846
+ )
1847
+ else:
1848
+ requested_max = _resolve_max_tokens_request(requested_raw)
1496
1849
 
1497
1850
  # Option E: Smart max_tokens capping — prevent the response from
1498
1851
  # consuming so many tokens that the NEXT turn's input won't fit.
@@ -1557,18 +1910,25 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
1557
1910
  # a tool call loop (same tools called repeatedly), to prevent
1558
1911
  # runaway token consumption.
1559
1912
  n_msgs = len(anthropic_body.get("messages", []))
1560
- has_tool_results = any(
1561
- isinstance(m.get("content"), list)
1562
- and any(
1563
- isinstance(b, dict) and b.get("type") == "tool_result"
1564
- for b in m.get("content", [])
1565
- )
1566
- for m in anthropic_body.get("messages", [])
1567
- )
1913
+ has_tool_results = _conversation_has_tool_results(anthropic_body)
1568
1914
 
1569
1915
  # Record tool calls from the last assistant message for loop detection
1570
- _record_last_assistant_tool_calls(anthropic_body, monitor)
1916
+ latest_tool_fingerprint = _record_last_assistant_tool_calls(
1917
+ anthropic_body, monitor
1918
+ )
1571
1919
  last_user_has_tool_result = _last_user_has_tool_result(anthropic_body)
1920
+ _update_tool_state_stagnation(
1921
+ monitor,
1922
+ latest_tool_fingerprint,
1923
+ last_user_has_tool_result,
1924
+ )
1925
+ monitor.finalize_turn_active = False
1926
+ state_choice, state_reason = _resolve_state_machine_tool_choice(
1927
+ anthropic_body,
1928
+ monitor,
1929
+ has_tool_results,
1930
+ last_user_has_tool_result,
1931
+ )
1572
1932
 
1573
1933
  # Check if forced-tool dampener or loop breaker should override tool_choice
1574
1934
  if monitor.consume_forced_auto_turn():
@@ -1579,6 +1939,47 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
1579
1939
  "tool_choice set to 'auto' by FORCED-TOOL DAMPENER (remaining=%d)",
1580
1940
  monitor.forced_auto_cooldown_turns,
1581
1941
  )
1942
+ elif state_choice == "auto":
1943
+ openai_body["tool_choice"] = "auto"
1944
+ monitor.consecutive_forced_count = 0
1945
+ monitor.no_progress_streak = 0
1946
+ logger.info(
1947
+ "tool_choice set to 'auto' by TOOL STATE MACHINE (phase=%s reason=%s auto_budget=%d stagnation=%d)",
1948
+ monitor.tool_turn_phase,
1949
+ state_reason,
1950
+ monitor.tool_state_auto_budget_remaining,
1951
+ monitor.tool_state_stagnation_streak,
1952
+ )
1953
+ elif state_choice == "finalize":
1954
+ openai_body.pop("tool_choice", None)
1955
+ openai_body.pop("tools", None)
1956
+ monitor.finalize_turn_active = True
1957
+ monitor.consecutive_forced_count = 0
1958
+ monitor.no_progress_streak = 0
1959
+ logger.warning(
1960
+ "TOOL STATE MACHINE: tools temporarily disabled for finalize turn (reason=%s)",
1961
+ state_reason,
1962
+ )
1963
+ elif state_choice == "required":
1964
+ openai_body["tool_choice"] = "required"
1965
+ monitor.consecutive_forced_count += 1
1966
+ monitor.no_progress_streak = (
1967
+ 0 if last_user_has_tool_result else monitor.no_progress_streak + 1
1968
+ )
1969
+ logger.info(
1970
+ "tool_choice forced to 'required' by TOOL STATE MACHINE (phase=%s reason=%s forced_budget=%d)",
1971
+ monitor.tool_turn_phase,
1972
+ state_reason,
1973
+ monitor.tool_state_forced_budget_remaining,
1974
+ )
1975
+ elif state_reason in {"fresh_user_text", "inactive_loop"} and n_msgs <= 1:
1976
+ monitor.consecutive_forced_count = 0
1977
+ monitor.no_progress_streak = 0
1978
+ logger.info(
1979
+ "tool_choice left unchanged after state reset (reason=%s n_msgs=%d)",
1980
+ state_reason,
1981
+ n_msgs,
1982
+ )
1582
1983
  elif monitor.should_release_tool_choice():
1583
1984
  openai_body["tool_choice"] = "auto"
1584
1985
  monitor.consecutive_forced_count = 0
@@ -1605,6 +2006,8 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
1605
2006
  else:
1606
2007
  monitor.consecutive_forced_count = 0
1607
2008
  monitor.no_progress_streak = 0
2009
+ if not has_tool_results:
2010
+ monitor.reset_tool_turn_state(reason="no_tool_results")
1608
2011
 
1609
2012
  if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
1610
2013
  openai_body["enable_thinking"] = False
@@ -1617,7 +2020,9 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
1617
2020
  return openai_body
1618
2021
 
1619
2022
 
1620
- def _record_last_assistant_tool_calls(anthropic_body: dict, monitor: SessionMonitor):
2023
+ def _record_last_assistant_tool_calls(
2024
+ anthropic_body: dict, monitor: SessionMonitor
2025
+ ) -> str:
1621
2026
  """Extract tool call names from the last assistant message and record
1622
2027
  them in the session monitor for loop detection."""
1623
2028
  messages = anthropic_body.get("messages", [])
@@ -1633,6 +2038,8 @@ def _record_last_assistant_tool_calls(anthropic_body: dict, monitor: SessionMoni
1633
2038
  break
1634
2039
  if tool_names:
1635
2040
  monitor.record_tool_calls(tool_names)
2041
+ return "|".join(sorted(tool_names))
2042
+ return ""
1636
2043
 
1637
2044
 
1638
2045
  def _is_unexpected_end_turn(openai_resp: dict, anthropic_body: dict) -> bool:
@@ -1652,14 +2059,7 @@ def _is_unexpected_end_turn(openai_resp: dict, anthropic_body: dict) -> bool:
1652
2059
  if "tools" not in anthropic_body:
1653
2060
  return False
1654
2061
 
1655
- has_tool_results = any(
1656
- isinstance(m.get("content"), list)
1657
- and any(
1658
- isinstance(b, dict) and b.get("type") == "tool_result"
1659
- for b in m.get("content", [])
1660
- )
1661
- for m in anthropic_body.get("messages", [])
1662
- )
2062
+ has_tool_results = _conversation_has_tool_results(anthropic_body)
1663
2063
 
1664
2064
  return has_tool_results or _last_assistant_was_text_only(anthropic_body)
1665
2065
 
@@ -2809,6 +3209,22 @@ async def _apply_unexpected_end_turn_guardrail(
2809
3209
  if not PROXY_GUARDRAIL_RETRY:
2810
3210
  return openai_resp
2811
3211
 
3212
+ if monitor.finalize_turn_active:
3213
+ logger.info("GUARDRAIL: skipped unexpected_end_turn retry on finalize turn")
3214
+ return openai_resp
3215
+
3216
+ if monitor.tool_turn_phase == "act" and openai_body.get("tool_choice") == "auto":
3217
+ logger.info(
3218
+ "GUARDRAIL: skipped unexpected_end_turn retry during act auto release"
3219
+ )
3220
+ return openai_resp
3221
+
3222
+ if monitor.tool_turn_phase == "review" and openai_body.get("tool_choice") == "auto":
3223
+ logger.info(
3224
+ "GUARDRAIL: skipped unexpected_end_turn retry during review auto turn"
3225
+ )
3226
+ return openai_resp
3227
+
2812
3228
  if not _is_unexpected_end_turn(openai_resp, anthropic_body):
2813
3229
  return openai_resp
2814
3230
 
@@ -2864,6 +3280,10 @@ async def _apply_malformed_tool_guardrail(
2864
3280
  if not PROXY_MALFORMED_TOOL_GUARDRAIL:
2865
3281
  return openai_resp
2866
3282
 
3283
+ if monitor.finalize_turn_active:
3284
+ logger.info("GUARDRAIL: skipped malformed-tool retries on finalize turn")
3285
+ return openai_resp
3286
+
2867
3287
  working_resp = openai_resp
2868
3288
  repair_count = 0
2869
3289
  if PROXY_TOOL_ARGS_PREFLIGHT and _openai_has_tool_calls(openai_resp):
@@ -3064,6 +3484,7 @@ def _maybe_apply_session_contamination_breaker(
3064
3484
  monitor.malformed_tool_streak = 0
3065
3485
  monitor.invalid_tool_call_streak = 0
3066
3486
  monitor.required_tool_miss_streak = 0
3487
+ monitor.reset_tool_turn_state(reason="contamination_guardrail_soft_reset")
3067
3488
  return anthropic_body
3068
3489
 
3069
3490
  head = messages[:1]
@@ -3088,6 +3509,7 @@ def _maybe_apply_session_contamination_breaker(
3088
3509
  monitor.no_progress_streak = 0
3089
3510
  monitor.consecutive_forced_count = 0
3090
3511
  monitor.forced_auto_cooldown_turns = 0
3512
+ monitor.reset_tool_turn_state(reason="contamination_guardrail_reset")
3091
3513
  logger.warning(
3092
3514
  "SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages (bad_streak=%d forced=%d required_miss=%d)",
3093
3515
  session_id,
@@ -4073,9 +4495,22 @@ async def context_status(request: Request):
4073
4495
  "forced_auto_cooldown_turns": monitor.forced_auto_cooldown_turns,
4074
4496
  "forced_dampener_triggers": monitor.forced_dampener_triggers,
4075
4497
  "contamination_resets": monitor.contamination_resets,
4498
+ "tool_turn_phase": monitor.tool_turn_phase,
4499
+ "tool_state_forced_budget_remaining": monitor.tool_state_forced_budget_remaining,
4500
+ "tool_state_auto_budget_remaining": monitor.tool_state_auto_budget_remaining,
4501
+ "tool_state_stagnation_streak": monitor.tool_state_stagnation_streak,
4502
+ "tool_state_transitions": monitor.tool_state_transitions,
4503
+ "tool_state_review_cycles": monitor.tool_state_review_cycles,
4504
+ "finalize_turn_active": monitor.finalize_turn_active,
4076
4505
  "tool_call_history_len": len(monitor.tool_call_history),
4077
4506
  "is_looping": monitor.detect_tool_loop(window=PROXY_LOOP_WINDOW)[0],
4078
4507
  "loop_repeat_count": monitor.detect_tool_loop(window=PROXY_LOOP_WINDOW)[1],
4508
+ "is_cycle_looping": monitor.detect_tool_cycle(
4509
+ window=max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
4510
+ )[0],
4511
+ "cycle_repeat_count": monitor.detect_tool_cycle(
4512
+ window=max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
4513
+ )[1],
4079
4514
  "recent_tool_patterns": monitor.tool_call_history[-5:],
4080
4515
  },
4081
4516
  }