@miller-tech/uap 1.15.12 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -122,6 +122,41 @@ PROXY_NO_PROGRESS_THRESHOLD = int(os.environ.get("PROXY_NO_PROGRESS_THRESHOLD",
|
|
|
122
122
|
PROXY_CONTEXT_RELEASE_THRESHOLD = float(
|
|
123
123
|
os.environ.get("PROXY_CONTEXT_RELEASE_THRESHOLD", "0.90")
|
|
124
124
|
)
|
|
125
|
+
PROXY_TOOL_STATE_MACHINE = os.environ.get(
|
|
126
|
+
"PROXY_TOOL_STATE_MACHINE", "on"
|
|
127
|
+
).lower() not in {
|
|
128
|
+
"0",
|
|
129
|
+
"false",
|
|
130
|
+
"off",
|
|
131
|
+
"no",
|
|
132
|
+
}
|
|
133
|
+
PROXY_TOOL_STATE_MIN_MESSAGES = int(
|
|
134
|
+
os.environ.get("PROXY_TOOL_STATE_MIN_MESSAGES", "6")
|
|
135
|
+
)
|
|
136
|
+
PROXY_TOOL_STATE_FORCED_BUDGET = int(
|
|
137
|
+
os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "24")
|
|
138
|
+
)
|
|
139
|
+
PROXY_TOOL_STATE_AUTO_BUDGET = int(os.environ.get("PROXY_TOOL_STATE_AUTO_BUDGET", "2"))
|
|
140
|
+
PROXY_TOOL_STATE_STAGNATION_THRESHOLD = int(
|
|
141
|
+
os.environ.get("PROXY_TOOL_STATE_STAGNATION_THRESHOLD", "12")
|
|
142
|
+
)
|
|
143
|
+
PROXY_TOOL_STATE_CYCLE_WINDOW = int(
|
|
144
|
+
os.environ.get("PROXY_TOOL_STATE_CYCLE_WINDOW", "8")
|
|
145
|
+
)
|
|
146
|
+
PROXY_TOOL_STATE_FINALIZE_THRESHOLD = int(
|
|
147
|
+
os.environ.get("PROXY_TOOL_STATE_FINALIZE_THRESHOLD", "24")
|
|
148
|
+
)
|
|
149
|
+
PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT = int(
|
|
150
|
+
os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "3")
|
|
151
|
+
)
|
|
152
|
+
PROXY_TOOL_NARROWING_EXPAND_ON_LOOP = os.environ.get(
|
|
153
|
+
"PROXY_TOOL_NARROWING_EXPAND_ON_LOOP", "on"
|
|
154
|
+
).lower() not in {
|
|
155
|
+
"0",
|
|
156
|
+
"false",
|
|
157
|
+
"off",
|
|
158
|
+
"no",
|
|
159
|
+
}
|
|
125
160
|
PROXY_GUARDRAIL_RETRY = os.environ.get("PROXY_GUARDRAIL_RETRY", "on").lower() not in {
|
|
126
161
|
"0",
|
|
127
162
|
"false",
|
|
@@ -404,6 +439,14 @@ class SessionMonitor:
|
|
|
404
439
|
forced_dampener_triggers: int = 0 # number of dampener activations
|
|
405
440
|
arg_preflight_rejections: int = 0 # rejected tool calls from arg preflight
|
|
406
441
|
arg_preflight_repairs: int = 0 # sanitized tool call args accepted
|
|
442
|
+
tool_turn_phase: str = "bootstrap" # bootstrap -> act -> review
|
|
443
|
+
tool_state_forced_budget_remaining: int = 0
|
|
444
|
+
tool_state_auto_budget_remaining: int = 0
|
|
445
|
+
tool_state_stagnation_streak: int = 0
|
|
446
|
+
tool_state_transitions: int = 0
|
|
447
|
+
tool_state_review_cycles: int = 0
|
|
448
|
+
last_tool_fingerprint: str = ""
|
|
449
|
+
finalize_turn_active: bool = False
|
|
407
450
|
last_seen_ts: float = 0.0
|
|
408
451
|
|
|
409
452
|
def record_request(self, estimated_tokens: int):
|
|
@@ -540,6 +583,69 @@ class SessionMonitor:
|
|
|
540
583
|
|
|
541
584
|
return False, 0
|
|
542
585
|
|
|
586
|
+
def detect_tool_cycle(self, window: int = 8) -> tuple[bool, int]:
|
|
587
|
+
"""Detect low-entropy tool cycles (A/B oscillation style loops)."""
|
|
588
|
+
if len(self.tool_call_history) < window:
|
|
589
|
+
return False, 0
|
|
590
|
+
|
|
591
|
+
recent = [fp for fp in self.tool_call_history[-window:] if fp]
|
|
592
|
+
if len(recent) < window:
|
|
593
|
+
return False, 0
|
|
594
|
+
|
|
595
|
+
unique = list(dict.fromkeys(recent))
|
|
596
|
+
if len(unique) == 1:
|
|
597
|
+
target = unique[0]
|
|
598
|
+
count = 0
|
|
599
|
+
for fp in reversed(self.tool_call_history):
|
|
600
|
+
if fp == target:
|
|
601
|
+
count += 1
|
|
602
|
+
else:
|
|
603
|
+
break
|
|
604
|
+
return True, count
|
|
605
|
+
|
|
606
|
+
if len(unique) > 2:
|
|
607
|
+
return False, 0
|
|
608
|
+
|
|
609
|
+
counts: dict[str, int] = {}
|
|
610
|
+
for fp in recent:
|
|
611
|
+
counts[fp] = counts.get(fp, 0) + 1
|
|
612
|
+
if counts and min(counts.values()) < 2:
|
|
613
|
+
return False, 0
|
|
614
|
+
|
|
615
|
+
transitions = sum(1 for a, b in zip(recent, recent[1:]) if a != b)
|
|
616
|
+
if transitions < window // 2:
|
|
617
|
+
return False, 0
|
|
618
|
+
|
|
619
|
+
allowed = set(counts.keys())
|
|
620
|
+
count = 0
|
|
621
|
+
for fp in reversed(self.tool_call_history):
|
|
622
|
+
if fp in allowed:
|
|
623
|
+
count += 1
|
|
624
|
+
else:
|
|
625
|
+
break
|
|
626
|
+
return True, count
|
|
627
|
+
|
|
628
|
+
def set_tool_turn_phase(self, phase: str, reason: str = ""):
|
|
629
|
+
if phase == self.tool_turn_phase:
|
|
630
|
+
return
|
|
631
|
+
old_phase = self.tool_turn_phase
|
|
632
|
+
self.tool_turn_phase = phase
|
|
633
|
+
self.tool_state_transitions += 1
|
|
634
|
+
logger.info(
|
|
635
|
+
"TOOL STATE MACHINE: phase %s -> %s%s",
|
|
636
|
+
old_phase,
|
|
637
|
+
phase,
|
|
638
|
+
f" reason={reason}" if reason else "",
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
def reset_tool_turn_state(self, reason: str = ""):
|
|
642
|
+
self.set_tool_turn_phase("bootstrap", reason=reason)
|
|
643
|
+
self.tool_state_forced_budget_remaining = 0
|
|
644
|
+
self.tool_state_auto_budget_remaining = 0
|
|
645
|
+
self.tool_state_stagnation_streak = 0
|
|
646
|
+
self.tool_state_review_cycles = 0
|
|
647
|
+
self.last_tool_fingerprint = ""
|
|
648
|
+
|
|
543
649
|
def guardrail_streak(self) -> int:
|
|
544
650
|
"""Highest current streak among malformed/invalid tool outputs."""
|
|
545
651
|
return max(self.malformed_tool_streak, self.invalid_tool_call_streak)
|
|
@@ -602,6 +708,9 @@ class SessionMonitor:
|
|
|
602
708
|
return False
|
|
603
709
|
|
|
604
710
|
is_looping, repeat_count = self.detect_tool_loop(window=PROXY_LOOP_WINDOW)
|
|
711
|
+
cycle_looping, cycle_repeat = self.detect_tool_cycle(
|
|
712
|
+
window=max(PROXY_LOOP_WINDOW, PROXY_TOOL_STATE_CYCLE_WINDOW)
|
|
713
|
+
)
|
|
605
714
|
|
|
606
715
|
# Pattern 1: Detected tool call loop
|
|
607
716
|
if (
|
|
@@ -618,6 +727,20 @@ class SessionMonitor:
|
|
|
618
727
|
self.loop_warnings_emitted += 1
|
|
619
728
|
return True
|
|
620
729
|
|
|
730
|
+
if (
|
|
731
|
+
cycle_looping
|
|
732
|
+
and cycle_repeat >= PROXY_LOOP_REPEAT_THRESHOLD
|
|
733
|
+
and self.tool_state_stagnation_streak >= max(1, PROXY_NO_PROGRESS_THRESHOLD)
|
|
734
|
+
):
|
|
735
|
+
logger.warning(
|
|
736
|
+
"LOOP BREAKER: low-entropy tool cycle repeated %d turns with stagnation=%d. "
|
|
737
|
+
"Releasing tool_choice to 'auto'.",
|
|
738
|
+
cycle_repeat,
|
|
739
|
+
self.tool_state_stagnation_streak,
|
|
740
|
+
)
|
|
741
|
+
self.loop_warnings_emitted += 1
|
|
742
|
+
return True
|
|
743
|
+
|
|
621
744
|
# Pattern 2: Too many consecutive forced requests
|
|
622
745
|
if (
|
|
623
746
|
self.consecutive_forced_count >= PROXY_FORCED_THRESHOLD
|
|
@@ -979,13 +1102,22 @@ async def lifespan(app: FastAPI):
|
|
|
979
1102
|
_resolve_prune_target_fraction() * 100,
|
|
980
1103
|
)
|
|
981
1104
|
logger.info(
|
|
982
|
-
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s tools_compatible=%s path=%s)",
|
|
1105
|
+
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s expand_on_loop=%s thinking_off_on_tools=%s state_machine=%s(min_msgs=%d forced=%d auto=%d stagnation=%d cycle=%d finalize=%d review_cycles=%d) dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s tools_compatible=%s path=%s)",
|
|
983
1106
|
PROXY_MALFORMED_TOOL_GUARDRAIL,
|
|
984
1107
|
PROXY_MALFORMED_TOOL_STREAM_STRICT,
|
|
985
1108
|
PROXY_FORCE_NON_STREAM,
|
|
986
1109
|
PROXY_TOOL_ARGS_PREFLIGHT,
|
|
987
1110
|
PROXY_TOOL_NARROWING,
|
|
1111
|
+
PROXY_TOOL_NARROWING_EXPAND_ON_LOOP,
|
|
988
1112
|
PROXY_DISABLE_THINKING_ON_TOOL_TURNS,
|
|
1113
|
+
PROXY_TOOL_STATE_MACHINE,
|
|
1114
|
+
PROXY_TOOL_STATE_MIN_MESSAGES,
|
|
1115
|
+
PROXY_TOOL_STATE_FORCED_BUDGET,
|
|
1116
|
+
PROXY_TOOL_STATE_AUTO_BUDGET,
|
|
1117
|
+
PROXY_TOOL_STATE_STAGNATION_THRESHOLD,
|
|
1118
|
+
PROXY_TOOL_STATE_CYCLE_WINDOW,
|
|
1119
|
+
PROXY_TOOL_STATE_FINALIZE_THRESHOLD,
|
|
1120
|
+
PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT,
|
|
989
1121
|
PROXY_FORCED_TOOL_DAMPENER,
|
|
990
1122
|
PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED,
|
|
991
1123
|
PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK,
|
|
@@ -1343,6 +1475,14 @@ def _last_user_has_tool_result(anthropic_body: dict) -> bool:
|
|
|
1343
1475
|
return False
|
|
1344
1476
|
|
|
1345
1477
|
|
|
1478
|
+
def _conversation_has_tool_results(anthropic_body: dict) -> bool:
|
|
1479
|
+
return any(
|
|
1480
|
+
_message_has_tool_result(msg.get("content"))
|
|
1481
|
+
for msg in anthropic_body.get("messages", [])
|
|
1482
|
+
if isinstance(msg, dict)
|
|
1483
|
+
)
|
|
1484
|
+
|
|
1485
|
+
|
|
1346
1486
|
def _sanitize_tool_schema_for_llama(schema):
|
|
1347
1487
|
"""Remove JSON Schema keywords that generate unsupported regex grammar.
|
|
1348
1488
|
|
|
@@ -1351,19 +1491,28 @@ def _sanitize_tool_schema_for_llama(schema):
|
|
|
1351
1491
|
"""
|
|
1352
1492
|
|
|
1353
1493
|
removed = 0
|
|
1494
|
+
property_map_keys = {"properties", "definitions", "$defs", "dependentSchemas"}
|
|
1354
1495
|
|
|
1355
|
-
def _walk(node):
|
|
1496
|
+
def _walk(node, parent_key=None):
|
|
1356
1497
|
nonlocal removed
|
|
1357
1498
|
if isinstance(node, dict):
|
|
1358
1499
|
cleaned = {}
|
|
1359
1500
|
for key, value in node.items():
|
|
1360
|
-
|
|
1501
|
+
key_is_property_name = parent_key in property_map_keys
|
|
1502
|
+
if (
|
|
1503
|
+
key == "pattern"
|
|
1504
|
+
and isinstance(value, str)
|
|
1505
|
+
and not key_is_property_name
|
|
1506
|
+
):
|
|
1507
|
+
removed += 1
|
|
1508
|
+
continue
|
|
1509
|
+
if key == "patternProperties" and not key_is_property_name:
|
|
1361
1510
|
removed += 1
|
|
1362
1511
|
continue
|
|
1363
|
-
cleaned[key] = _walk(value)
|
|
1512
|
+
cleaned[key] = _walk(value, key)
|
|
1364
1513
|
return cleaned
|
|
1365
1514
|
if isinstance(node, list):
|
|
1366
|
-
return [_walk(item) for item in node]
|
|
1515
|
+
return [_walk(item, parent_key) for item in node]
|
|
1367
1516
|
return node
|
|
1368
1517
|
|
|
1369
1518
|
return _walk(schema), removed
|
|
@@ -1424,6 +1573,18 @@ def _narrow_tools_for_request(
|
|
|
1424
1573
|
query_text = _latest_user_text(anthropic_body).lower()
|
|
1425
1574
|
query_tokens = _tokenize_for_tool_ranking(query_text)
|
|
1426
1575
|
if not query_tokens:
|
|
1576
|
+
n_msgs = len(anthropic_body.get("messages", []))
|
|
1577
|
+
if (
|
|
1578
|
+
PROXY_TOOL_NARROWING_EXPAND_ON_LOOP
|
|
1579
|
+
and _conversation_has_tool_results(anthropic_body)
|
|
1580
|
+
and n_msgs >= 3
|
|
1581
|
+
):
|
|
1582
|
+
logger.info(
|
|
1583
|
+
"TOOL NARROWING: %d tools retained (no query tokens during active loop)",
|
|
1584
|
+
len(openai_tools),
|
|
1585
|
+
)
|
|
1586
|
+
return openai_tools
|
|
1587
|
+
|
|
1427
1588
|
narrowed = openai_tools[:keep]
|
|
1428
1589
|
logger.info(
|
|
1429
1590
|
"TOOL NARROWING: %d -> %d tools (no query tokens)",
|
|
@@ -1461,6 +1622,182 @@ def _narrow_tools_for_request(
|
|
|
1461
1622
|
return narrowed
|
|
1462
1623
|
|
|
1463
1624
|
|
|
1625
|
+
def _update_tool_state_stagnation(
|
|
1626
|
+
monitor: SessionMonitor,
|
|
1627
|
+
latest_tool_fingerprint: str,
|
|
1628
|
+
last_user_has_tool_result: bool,
|
|
1629
|
+
) -> None:
|
|
1630
|
+
if not PROXY_TOOL_STATE_MACHINE:
|
|
1631
|
+
return
|
|
1632
|
+
|
|
1633
|
+
if not latest_tool_fingerprint or not last_user_has_tool_result:
|
|
1634
|
+
monitor.tool_state_stagnation_streak = 0
|
|
1635
|
+
monitor.last_tool_fingerprint = latest_tool_fingerprint
|
|
1636
|
+
return
|
|
1637
|
+
|
|
1638
|
+
repeated = latest_tool_fingerprint == monitor.last_tool_fingerprint
|
|
1639
|
+
recently_seen = latest_tool_fingerprint in monitor.tool_call_history[-4:-1]
|
|
1640
|
+
|
|
1641
|
+
if repeated or recently_seen:
|
|
1642
|
+
monitor.tool_state_stagnation_streak += 1
|
|
1643
|
+
else:
|
|
1644
|
+
monitor.tool_state_stagnation_streak = 0
|
|
1645
|
+
|
|
1646
|
+
monitor.last_tool_fingerprint = latest_tool_fingerprint
|
|
1647
|
+
|
|
1648
|
+
|
|
1649
|
+
def _resolve_state_machine_tool_choice(
|
|
1650
|
+
anthropic_body: dict,
|
|
1651
|
+
monitor: SessionMonitor,
|
|
1652
|
+
has_tool_results: bool,
|
|
1653
|
+
last_user_has_tool_result: bool,
|
|
1654
|
+
) -> tuple[str | None, str]:
|
|
1655
|
+
if not PROXY_TOOL_STATE_MACHINE:
|
|
1656
|
+
return None, "disabled"
|
|
1657
|
+
|
|
1658
|
+
n_msgs = len(anthropic_body.get("messages", []))
|
|
1659
|
+
latest_user_text = _latest_user_text(anthropic_body).strip()
|
|
1660
|
+
if latest_user_text and not last_user_has_tool_result:
|
|
1661
|
+
monitor.tool_call_history = []
|
|
1662
|
+
if n_msgs <= 1:
|
|
1663
|
+
monitor.forced_auto_cooldown_turns = 0
|
|
1664
|
+
monitor.consecutive_forced_count = 0
|
|
1665
|
+
monitor.no_progress_streak = 0
|
|
1666
|
+
monitor.malformed_tool_streak = 0
|
|
1667
|
+
monitor.invalid_tool_call_streak = 0
|
|
1668
|
+
monitor.required_tool_miss_streak = 0
|
|
1669
|
+
monitor.reset_tool_turn_state(reason="fresh_user_text")
|
|
1670
|
+
return None, "fresh_user_text"
|
|
1671
|
+
|
|
1672
|
+
active_loop = (
|
|
1673
|
+
has_tool_results
|
|
1674
|
+
and last_user_has_tool_result
|
|
1675
|
+
and n_msgs >= max(3, PROXY_TOOL_STATE_MIN_MESSAGES)
|
|
1676
|
+
)
|
|
1677
|
+
if not active_loop:
|
|
1678
|
+
if not has_tool_results:
|
|
1679
|
+
monitor.tool_call_history = []
|
|
1680
|
+
if n_msgs <= 1:
|
|
1681
|
+
monitor.forced_auto_cooldown_turns = 0
|
|
1682
|
+
monitor.consecutive_forced_count = 0
|
|
1683
|
+
monitor.no_progress_streak = 0
|
|
1684
|
+
monitor.malformed_tool_streak = 0
|
|
1685
|
+
monitor.invalid_tool_call_streak = 0
|
|
1686
|
+
monitor.required_tool_miss_streak = 0
|
|
1687
|
+
monitor.reset_tool_turn_state(reason="inactive_loop")
|
|
1688
|
+
return None, "inactive_loop"
|
|
1689
|
+
|
|
1690
|
+
if monitor.tool_turn_phase == "bootstrap":
|
|
1691
|
+
monitor.set_tool_turn_phase("act", reason="loop_detected")
|
|
1692
|
+
monitor.tool_state_forced_budget_remaining = max(
|
|
1693
|
+
1, PROXY_TOOL_STATE_FORCED_BUDGET
|
|
1694
|
+
)
|
|
1695
|
+
monitor.tool_state_auto_budget_remaining = 0
|
|
1696
|
+
|
|
1697
|
+
cycle_looping, cycle_repeat = monitor.detect_tool_cycle(
|
|
1698
|
+
window=max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
|
|
1699
|
+
)
|
|
1700
|
+
stagnating = monitor.tool_state_stagnation_streak >= max(
|
|
1701
|
+
1, PROXY_TOOL_STATE_STAGNATION_THRESHOLD
|
|
1702
|
+
)
|
|
1703
|
+
finalize_threshold = max(
|
|
1704
|
+
max(1, PROXY_TOOL_STATE_FINALIZE_THRESHOLD),
|
|
1705
|
+
max(1, PROXY_TOOL_STATE_STAGNATION_THRESHOLD) * 2,
|
|
1706
|
+
)
|
|
1707
|
+
review_cycle_limit = max(1, PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT)
|
|
1708
|
+
|
|
1709
|
+
if cycle_looping and monitor.tool_state_stagnation_streak >= finalize_threshold:
|
|
1710
|
+
monitor.set_tool_turn_phase("finalize", reason="stagnation_limit")
|
|
1711
|
+
monitor.tool_state_auto_budget_remaining = 1
|
|
1712
|
+
logger.warning(
|
|
1713
|
+
"TOOL STATE MACHINE: forcing finalize turn after prolonged cycle (repeat=%d stagnation=%d)",
|
|
1714
|
+
cycle_repeat,
|
|
1715
|
+
monitor.tool_state_stagnation_streak,
|
|
1716
|
+
)
|
|
1717
|
+
return "finalize", "stagnation_limit"
|
|
1718
|
+
|
|
1719
|
+
if (
|
|
1720
|
+
monitor.tool_turn_phase in {"act", "review"}
|
|
1721
|
+
and monitor.tool_state_review_cycles >= review_cycle_limit
|
|
1722
|
+
):
|
|
1723
|
+
monitor.set_tool_turn_phase("finalize", reason="review_cycle_limit")
|
|
1724
|
+
monitor.tool_state_auto_budget_remaining = 1
|
|
1725
|
+
logger.warning(
|
|
1726
|
+
"TOOL STATE MACHINE: forcing finalize turn after repeated review cycles (cycles=%d stagnation=%d)",
|
|
1727
|
+
monitor.tool_state_review_cycles,
|
|
1728
|
+
monitor.tool_state_stagnation_streak,
|
|
1729
|
+
)
|
|
1730
|
+
return "finalize", "review_cycle_limit"
|
|
1731
|
+
|
|
1732
|
+
if monitor.tool_turn_phase == "act":
|
|
1733
|
+
if cycle_looping or stagnating:
|
|
1734
|
+
reason = "cycle_detected" if cycle_looping else "stagnation"
|
|
1735
|
+
monitor.set_tool_turn_phase("review", reason=reason)
|
|
1736
|
+
monitor.tool_state_review_cycles += 1
|
|
1737
|
+
monitor.tool_state_auto_budget_remaining = max(
|
|
1738
|
+
1, PROXY_TOOL_STATE_AUTO_BUDGET
|
|
1739
|
+
)
|
|
1740
|
+
monitor.tool_state_forced_budget_remaining = max(
|
|
1741
|
+
1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
|
|
1742
|
+
)
|
|
1743
|
+
logger.warning(
|
|
1744
|
+
"TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d)",
|
|
1745
|
+
cycle_looping,
|
|
1746
|
+
cycle_repeat,
|
|
1747
|
+
monitor.tool_state_stagnation_streak,
|
|
1748
|
+
monitor.tool_state_review_cycles,
|
|
1749
|
+
)
|
|
1750
|
+
return "auto", reason
|
|
1751
|
+
|
|
1752
|
+
if monitor.tool_state_forced_budget_remaining <= 0:
|
|
1753
|
+
monitor.set_tool_turn_phase("review", reason="forced_budget_exhausted")
|
|
1754
|
+
monitor.tool_state_review_cycles += 1
|
|
1755
|
+
monitor.tool_state_auto_budget_remaining = max(
|
|
1756
|
+
1, PROXY_TOOL_STATE_AUTO_BUDGET
|
|
1757
|
+
)
|
|
1758
|
+
monitor.tool_state_forced_budget_remaining = max(
|
|
1759
|
+
1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
|
|
1760
|
+
)
|
|
1761
|
+
logger.warning(
|
|
1762
|
+
"TOOL STATE MACHINE: forced budget exhausted, entering review (cycles=%d)",
|
|
1763
|
+
monitor.tool_state_review_cycles,
|
|
1764
|
+
)
|
|
1765
|
+
return "auto", "forced_budget_exhausted"
|
|
1766
|
+
|
|
1767
|
+
monitor.tool_state_forced_budget_remaining -= 1
|
|
1768
|
+
return "required", "act"
|
|
1769
|
+
|
|
1770
|
+
if monitor.tool_turn_phase == "review":
|
|
1771
|
+
if monitor.tool_state_auto_budget_remaining <= 0:
|
|
1772
|
+
monitor.set_tool_turn_phase("act", reason="review_budget_spent")
|
|
1773
|
+
monitor.tool_state_forced_budget_remaining = max(
|
|
1774
|
+
1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
|
|
1775
|
+
)
|
|
1776
|
+
return "required", "review_complete"
|
|
1777
|
+
|
|
1778
|
+
monitor.tool_state_auto_budget_remaining -= 1
|
|
1779
|
+
if monitor.tool_state_auto_budget_remaining == 0:
|
|
1780
|
+
monitor.set_tool_turn_phase("act", reason="review_budget_spent")
|
|
1781
|
+
monitor.tool_state_forced_budget_remaining = max(
|
|
1782
|
+
1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
|
|
1783
|
+
)
|
|
1784
|
+
return "required", "review_complete"
|
|
1785
|
+
return "auto", "review"
|
|
1786
|
+
|
|
1787
|
+
if monitor.tool_turn_phase == "finalize":
|
|
1788
|
+
if monitor.tool_state_auto_budget_remaining <= 0:
|
|
1789
|
+
monitor.reset_tool_turn_state(reason="finalize_complete")
|
|
1790
|
+
return None, "finalize_complete"
|
|
1791
|
+
|
|
1792
|
+
monitor.tool_state_auto_budget_remaining -= 1
|
|
1793
|
+
if monitor.tool_state_auto_budget_remaining == 0:
|
|
1794
|
+
monitor.reset_tool_turn_state(reason="finalize_complete")
|
|
1795
|
+
return "finalize", "finalize"
|
|
1796
|
+
|
|
1797
|
+
monitor.reset_tool_turn_state(reason="unknown_phase")
|
|
1798
|
+
return None, "unknown_phase"
|
|
1799
|
+
|
|
1800
|
+
|
|
1464
1801
|
def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
1465
1802
|
"""Build an OpenAI Chat Completions request from an Anthropic Messages request."""
|
|
1466
1803
|
openai_body = {
|
|
@@ -1489,10 +1826,26 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1489
1826
|
)
|
|
1490
1827
|
|
|
1491
1828
|
if "max_tokens" in anthropic_body:
|
|
1829
|
+
requested_raw = max(1, int(anthropic_body["max_tokens"]))
|
|
1830
|
+
|
|
1492
1831
|
# Enforce configurable minimum floor for thinking mode: model needs
|
|
1493
1832
|
# tokens for reasoning (<think>...</think>) plus actual response/tool
|
|
1494
1833
|
# calls. Set PROXY_MAX_TOKENS_FLOOR=0 to disable this floor.
|
|
1495
|
-
|
|
1834
|
+
floor_bypassed_for_tool_turn = (
|
|
1835
|
+
has_tools
|
|
1836
|
+
and PROXY_DISABLE_THINKING_ON_TOOL_TURNS
|
|
1837
|
+
and PROXY_MAX_TOKENS_FLOOR > 0
|
|
1838
|
+
)
|
|
1839
|
+
if floor_bypassed_for_tool_turn:
|
|
1840
|
+
requested_max = requested_raw
|
|
1841
|
+
if requested_raw < PROXY_MAX_TOKENS_FLOOR:
|
|
1842
|
+
logger.info(
|
|
1843
|
+
"MAX_TOKENS floor bypassed for tool turn with thinking disabled: requested=%d floor=%d",
|
|
1844
|
+
requested_raw,
|
|
1845
|
+
PROXY_MAX_TOKENS_FLOOR,
|
|
1846
|
+
)
|
|
1847
|
+
else:
|
|
1848
|
+
requested_max = _resolve_max_tokens_request(requested_raw)
|
|
1496
1849
|
|
|
1497
1850
|
# Option E: Smart max_tokens capping — prevent the response from
|
|
1498
1851
|
# consuming so many tokens that the NEXT turn's input won't fit.
|
|
@@ -1557,18 +1910,25 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1557
1910
|
# a tool call loop (same tools called repeatedly), to prevent
|
|
1558
1911
|
# runaway token consumption.
|
|
1559
1912
|
n_msgs = len(anthropic_body.get("messages", []))
|
|
1560
|
-
has_tool_results =
|
|
1561
|
-
isinstance(m.get("content"), list)
|
|
1562
|
-
and any(
|
|
1563
|
-
isinstance(b, dict) and b.get("type") == "tool_result"
|
|
1564
|
-
for b in m.get("content", [])
|
|
1565
|
-
)
|
|
1566
|
-
for m in anthropic_body.get("messages", [])
|
|
1567
|
-
)
|
|
1913
|
+
has_tool_results = _conversation_has_tool_results(anthropic_body)
|
|
1568
1914
|
|
|
1569
1915
|
# Record tool calls from the last assistant message for loop detection
|
|
1570
|
-
_record_last_assistant_tool_calls(
|
|
1916
|
+
latest_tool_fingerprint = _record_last_assistant_tool_calls(
|
|
1917
|
+
anthropic_body, monitor
|
|
1918
|
+
)
|
|
1571
1919
|
last_user_has_tool_result = _last_user_has_tool_result(anthropic_body)
|
|
1920
|
+
_update_tool_state_stagnation(
|
|
1921
|
+
monitor,
|
|
1922
|
+
latest_tool_fingerprint,
|
|
1923
|
+
last_user_has_tool_result,
|
|
1924
|
+
)
|
|
1925
|
+
monitor.finalize_turn_active = False
|
|
1926
|
+
state_choice, state_reason = _resolve_state_machine_tool_choice(
|
|
1927
|
+
anthropic_body,
|
|
1928
|
+
monitor,
|
|
1929
|
+
has_tool_results,
|
|
1930
|
+
last_user_has_tool_result,
|
|
1931
|
+
)
|
|
1572
1932
|
|
|
1573
1933
|
# Check if forced-tool dampener or loop breaker should override tool_choice
|
|
1574
1934
|
if monitor.consume_forced_auto_turn():
|
|
@@ -1579,6 +1939,47 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1579
1939
|
"tool_choice set to 'auto' by FORCED-TOOL DAMPENER (remaining=%d)",
|
|
1580
1940
|
monitor.forced_auto_cooldown_turns,
|
|
1581
1941
|
)
|
|
1942
|
+
elif state_choice == "auto":
|
|
1943
|
+
openai_body["tool_choice"] = "auto"
|
|
1944
|
+
monitor.consecutive_forced_count = 0
|
|
1945
|
+
monitor.no_progress_streak = 0
|
|
1946
|
+
logger.info(
|
|
1947
|
+
"tool_choice set to 'auto' by TOOL STATE MACHINE (phase=%s reason=%s auto_budget=%d stagnation=%d)",
|
|
1948
|
+
monitor.tool_turn_phase,
|
|
1949
|
+
state_reason,
|
|
1950
|
+
monitor.tool_state_auto_budget_remaining,
|
|
1951
|
+
monitor.tool_state_stagnation_streak,
|
|
1952
|
+
)
|
|
1953
|
+
elif state_choice == "finalize":
|
|
1954
|
+
openai_body.pop("tool_choice", None)
|
|
1955
|
+
openai_body.pop("tools", None)
|
|
1956
|
+
monitor.finalize_turn_active = True
|
|
1957
|
+
monitor.consecutive_forced_count = 0
|
|
1958
|
+
monitor.no_progress_streak = 0
|
|
1959
|
+
logger.warning(
|
|
1960
|
+
"TOOL STATE MACHINE: tools temporarily disabled for finalize turn (reason=%s)",
|
|
1961
|
+
state_reason,
|
|
1962
|
+
)
|
|
1963
|
+
elif state_choice == "required":
|
|
1964
|
+
openai_body["tool_choice"] = "required"
|
|
1965
|
+
monitor.consecutive_forced_count += 1
|
|
1966
|
+
monitor.no_progress_streak = (
|
|
1967
|
+
0 if last_user_has_tool_result else monitor.no_progress_streak + 1
|
|
1968
|
+
)
|
|
1969
|
+
logger.info(
|
|
1970
|
+
"tool_choice forced to 'required' by TOOL STATE MACHINE (phase=%s reason=%s forced_budget=%d)",
|
|
1971
|
+
monitor.tool_turn_phase,
|
|
1972
|
+
state_reason,
|
|
1973
|
+
monitor.tool_state_forced_budget_remaining,
|
|
1974
|
+
)
|
|
1975
|
+
elif state_reason in {"fresh_user_text", "inactive_loop"} and n_msgs <= 1:
|
|
1976
|
+
monitor.consecutive_forced_count = 0
|
|
1977
|
+
monitor.no_progress_streak = 0
|
|
1978
|
+
logger.info(
|
|
1979
|
+
"tool_choice left unchanged after state reset (reason=%s n_msgs=%d)",
|
|
1980
|
+
state_reason,
|
|
1981
|
+
n_msgs,
|
|
1982
|
+
)
|
|
1582
1983
|
elif monitor.should_release_tool_choice():
|
|
1583
1984
|
openai_body["tool_choice"] = "auto"
|
|
1584
1985
|
monitor.consecutive_forced_count = 0
|
|
@@ -1605,6 +2006,8 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1605
2006
|
else:
|
|
1606
2007
|
monitor.consecutive_forced_count = 0
|
|
1607
2008
|
monitor.no_progress_streak = 0
|
|
2009
|
+
if not has_tool_results:
|
|
2010
|
+
monitor.reset_tool_turn_state(reason="no_tool_results")
|
|
1608
2011
|
|
|
1609
2012
|
if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
|
|
1610
2013
|
openai_body["enable_thinking"] = False
|
|
@@ -1617,7 +2020,9 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1617
2020
|
return openai_body
|
|
1618
2021
|
|
|
1619
2022
|
|
|
1620
|
-
def _record_last_assistant_tool_calls(
|
|
2023
|
+
def _record_last_assistant_tool_calls(
|
|
2024
|
+
anthropic_body: dict, monitor: SessionMonitor
|
|
2025
|
+
) -> str:
|
|
1621
2026
|
"""Extract tool call names from the last assistant message and record
|
|
1622
2027
|
them in the session monitor for loop detection."""
|
|
1623
2028
|
messages = anthropic_body.get("messages", [])
|
|
@@ -1633,6 +2038,8 @@ def _record_last_assistant_tool_calls(anthropic_body: dict, monitor: SessionMoni
|
|
|
1633
2038
|
break
|
|
1634
2039
|
if tool_names:
|
|
1635
2040
|
monitor.record_tool_calls(tool_names)
|
|
2041
|
+
return "|".join(sorted(tool_names))
|
|
2042
|
+
return ""
|
|
1636
2043
|
|
|
1637
2044
|
|
|
1638
2045
|
def _is_unexpected_end_turn(openai_resp: dict, anthropic_body: dict) -> bool:
|
|
@@ -1652,14 +2059,7 @@ def _is_unexpected_end_turn(openai_resp: dict, anthropic_body: dict) -> bool:
|
|
|
1652
2059
|
if "tools" not in anthropic_body:
|
|
1653
2060
|
return False
|
|
1654
2061
|
|
|
1655
|
-
has_tool_results =
|
|
1656
|
-
isinstance(m.get("content"), list)
|
|
1657
|
-
and any(
|
|
1658
|
-
isinstance(b, dict) and b.get("type") == "tool_result"
|
|
1659
|
-
for b in m.get("content", [])
|
|
1660
|
-
)
|
|
1661
|
-
for m in anthropic_body.get("messages", [])
|
|
1662
|
-
)
|
|
2062
|
+
has_tool_results = _conversation_has_tool_results(anthropic_body)
|
|
1663
2063
|
|
|
1664
2064
|
return has_tool_results or _last_assistant_was_text_only(anthropic_body)
|
|
1665
2065
|
|
|
@@ -2809,6 +3209,22 @@ async def _apply_unexpected_end_turn_guardrail(
|
|
|
2809
3209
|
if not PROXY_GUARDRAIL_RETRY:
|
|
2810
3210
|
return openai_resp
|
|
2811
3211
|
|
|
3212
|
+
if monitor.finalize_turn_active:
|
|
3213
|
+
logger.info("GUARDRAIL: skipped unexpected_end_turn retry on finalize turn")
|
|
3214
|
+
return openai_resp
|
|
3215
|
+
|
|
3216
|
+
if monitor.tool_turn_phase == "act" and openai_body.get("tool_choice") == "auto":
|
|
3217
|
+
logger.info(
|
|
3218
|
+
"GUARDRAIL: skipped unexpected_end_turn retry during act auto release"
|
|
3219
|
+
)
|
|
3220
|
+
return openai_resp
|
|
3221
|
+
|
|
3222
|
+
if monitor.tool_turn_phase == "review" and openai_body.get("tool_choice") == "auto":
|
|
3223
|
+
logger.info(
|
|
3224
|
+
"GUARDRAIL: skipped unexpected_end_turn retry during review auto turn"
|
|
3225
|
+
)
|
|
3226
|
+
return openai_resp
|
|
3227
|
+
|
|
2812
3228
|
if not _is_unexpected_end_turn(openai_resp, anthropic_body):
|
|
2813
3229
|
return openai_resp
|
|
2814
3230
|
|
|
@@ -2864,6 +3280,10 @@ async def _apply_malformed_tool_guardrail(
|
|
|
2864
3280
|
if not PROXY_MALFORMED_TOOL_GUARDRAIL:
|
|
2865
3281
|
return openai_resp
|
|
2866
3282
|
|
|
3283
|
+
if monitor.finalize_turn_active:
|
|
3284
|
+
logger.info("GUARDRAIL: skipped malformed-tool retries on finalize turn")
|
|
3285
|
+
return openai_resp
|
|
3286
|
+
|
|
2867
3287
|
working_resp = openai_resp
|
|
2868
3288
|
repair_count = 0
|
|
2869
3289
|
if PROXY_TOOL_ARGS_PREFLIGHT and _openai_has_tool_calls(openai_resp):
|
|
@@ -3064,6 +3484,7 @@ def _maybe_apply_session_contamination_breaker(
|
|
|
3064
3484
|
monitor.malformed_tool_streak = 0
|
|
3065
3485
|
monitor.invalid_tool_call_streak = 0
|
|
3066
3486
|
monitor.required_tool_miss_streak = 0
|
|
3487
|
+
monitor.reset_tool_turn_state(reason="contamination_guardrail_soft_reset")
|
|
3067
3488
|
return anthropic_body
|
|
3068
3489
|
|
|
3069
3490
|
head = messages[:1]
|
|
@@ -3088,6 +3509,7 @@ def _maybe_apply_session_contamination_breaker(
|
|
|
3088
3509
|
monitor.no_progress_streak = 0
|
|
3089
3510
|
monitor.consecutive_forced_count = 0
|
|
3090
3511
|
monitor.forced_auto_cooldown_turns = 0
|
|
3512
|
+
monitor.reset_tool_turn_state(reason="contamination_guardrail_reset")
|
|
3091
3513
|
logger.warning(
|
|
3092
3514
|
"SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages (bad_streak=%d forced=%d required_miss=%d)",
|
|
3093
3515
|
session_id,
|
|
@@ -4073,9 +4495,22 @@ async def context_status(request: Request):
|
|
|
4073
4495
|
"forced_auto_cooldown_turns": monitor.forced_auto_cooldown_turns,
|
|
4074
4496
|
"forced_dampener_triggers": monitor.forced_dampener_triggers,
|
|
4075
4497
|
"contamination_resets": monitor.contamination_resets,
|
|
4498
|
+
"tool_turn_phase": monitor.tool_turn_phase,
|
|
4499
|
+
"tool_state_forced_budget_remaining": monitor.tool_state_forced_budget_remaining,
|
|
4500
|
+
"tool_state_auto_budget_remaining": monitor.tool_state_auto_budget_remaining,
|
|
4501
|
+
"tool_state_stagnation_streak": monitor.tool_state_stagnation_streak,
|
|
4502
|
+
"tool_state_transitions": monitor.tool_state_transitions,
|
|
4503
|
+
"tool_state_review_cycles": monitor.tool_state_review_cycles,
|
|
4504
|
+
"finalize_turn_active": monitor.finalize_turn_active,
|
|
4076
4505
|
"tool_call_history_len": len(monitor.tool_call_history),
|
|
4077
4506
|
"is_looping": monitor.detect_tool_loop(window=PROXY_LOOP_WINDOW)[0],
|
|
4078
4507
|
"loop_repeat_count": monitor.detect_tool_loop(window=PROXY_LOOP_WINDOW)[1],
|
|
4508
|
+
"is_cycle_looping": monitor.detect_tool_cycle(
|
|
4509
|
+
window=max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
|
|
4510
|
+
)[0],
|
|
4511
|
+
"cycle_repeat_count": monitor.detect_tool_cycle(
|
|
4512
|
+
window=max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
|
|
4513
|
+
)[1],
|
|
4079
4514
|
"recent_tool_patterns": monitor.tool_call_history[-5:],
|
|
4080
4515
|
},
|
|
4081
4516
|
}
|