@miller-tech/uap 1.15.13 → 1.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -13,16 +13,7 @@
|
|
|
13
13
|
|
|
14
14
|
---
|
|
15
15
|
|
|
16
|
-
##
|
|
17
|
-
|
|
18
|
-
A comprehensive deep analysis of the UAP codebase has been completed. See the analysis documentation:
|
|
19
|
-
|
|
20
|
-
| Document | Description |
|
|
21
|
-
|----------|-------------|
|
|
22
|
-
| **[FINAL_SUMMARY.md](./FINAL_SUMMARY.md)** | TL;DR and key findings |
|
|
23
|
-
| **[ANALYSIS_INDEX.md](./ANALYSIS_INDEX.md)** | Complete navigation guide |
|
|
24
|
-
| **[DEEP_ANALYSIS_REPORT.md](./DEEP_ANALYSIS_REPORT.md)** | Full technical analysis |
|
|
25
|
-
| **[OPTIMIZATION_ROADMAP.md](./OPTIMIZATION_ROADMAP.md)** | Implementation timeline |
|
|
16
|
+
## Recent Updates
|
|
26
17
|
|
|
27
18
|
**New Feature:** `uap worktree prune` - Automatically clean up stale worktrees!
|
|
28
19
|
|
package/package.json
CHANGED
|
@@ -122,6 +122,41 @@ PROXY_NO_PROGRESS_THRESHOLD = int(os.environ.get("PROXY_NO_PROGRESS_THRESHOLD",
|
|
|
122
122
|
PROXY_CONTEXT_RELEASE_THRESHOLD = float(
|
|
123
123
|
os.environ.get("PROXY_CONTEXT_RELEASE_THRESHOLD", "0.90")
|
|
124
124
|
)
|
|
125
|
+
PROXY_TOOL_STATE_MACHINE = os.environ.get(
|
|
126
|
+
"PROXY_TOOL_STATE_MACHINE", "on"
|
|
127
|
+
).lower() not in {
|
|
128
|
+
"0",
|
|
129
|
+
"false",
|
|
130
|
+
"off",
|
|
131
|
+
"no",
|
|
132
|
+
}
|
|
133
|
+
PROXY_TOOL_STATE_MIN_MESSAGES = int(
|
|
134
|
+
os.environ.get("PROXY_TOOL_STATE_MIN_MESSAGES", "6")
|
|
135
|
+
)
|
|
136
|
+
PROXY_TOOL_STATE_FORCED_BUDGET = int(
|
|
137
|
+
os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "24")
|
|
138
|
+
)
|
|
139
|
+
PROXY_TOOL_STATE_AUTO_BUDGET = int(os.environ.get("PROXY_TOOL_STATE_AUTO_BUDGET", "2"))
|
|
140
|
+
PROXY_TOOL_STATE_STAGNATION_THRESHOLD = int(
|
|
141
|
+
os.environ.get("PROXY_TOOL_STATE_STAGNATION_THRESHOLD", "12")
|
|
142
|
+
)
|
|
143
|
+
PROXY_TOOL_STATE_CYCLE_WINDOW = int(
|
|
144
|
+
os.environ.get("PROXY_TOOL_STATE_CYCLE_WINDOW", "8")
|
|
145
|
+
)
|
|
146
|
+
PROXY_TOOL_STATE_FINALIZE_THRESHOLD = int(
|
|
147
|
+
os.environ.get("PROXY_TOOL_STATE_FINALIZE_THRESHOLD", "24")
|
|
148
|
+
)
|
|
149
|
+
PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT = int(
|
|
150
|
+
os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "3")
|
|
151
|
+
)
|
|
152
|
+
PROXY_TOOL_NARROWING_EXPAND_ON_LOOP = os.environ.get(
|
|
153
|
+
"PROXY_TOOL_NARROWING_EXPAND_ON_LOOP", "on"
|
|
154
|
+
).lower() not in {
|
|
155
|
+
"0",
|
|
156
|
+
"false",
|
|
157
|
+
"off",
|
|
158
|
+
"no",
|
|
159
|
+
}
|
|
125
160
|
PROXY_GUARDRAIL_RETRY = os.environ.get("PROXY_GUARDRAIL_RETRY", "on").lower() not in {
|
|
126
161
|
"0",
|
|
127
162
|
"false",
|
|
@@ -404,6 +439,14 @@ class SessionMonitor:
|
|
|
404
439
|
forced_dampener_triggers: int = 0 # number of dampener activations
|
|
405
440
|
arg_preflight_rejections: int = 0 # rejected tool calls from arg preflight
|
|
406
441
|
arg_preflight_repairs: int = 0 # sanitized tool call args accepted
|
|
442
|
+
tool_turn_phase: str = "bootstrap" # bootstrap -> act -> review
|
|
443
|
+
tool_state_forced_budget_remaining: int = 0
|
|
444
|
+
tool_state_auto_budget_remaining: int = 0
|
|
445
|
+
tool_state_stagnation_streak: int = 0
|
|
446
|
+
tool_state_transitions: int = 0
|
|
447
|
+
tool_state_review_cycles: int = 0
|
|
448
|
+
last_tool_fingerprint: str = ""
|
|
449
|
+
finalize_turn_active: bool = False
|
|
407
450
|
last_seen_ts: float = 0.0
|
|
408
451
|
|
|
409
452
|
def record_request(self, estimated_tokens: int):
|
|
@@ -540,6 +583,69 @@ class SessionMonitor:
|
|
|
540
583
|
|
|
541
584
|
return False, 0
|
|
542
585
|
|
|
586
|
+
def detect_tool_cycle(self, window: int = 8) -> tuple[bool, int]:
|
|
587
|
+
"""Detect low-entropy tool cycles (A/B oscillation style loops)."""
|
|
588
|
+
if len(self.tool_call_history) < window:
|
|
589
|
+
return False, 0
|
|
590
|
+
|
|
591
|
+
recent = [fp for fp in self.tool_call_history[-window:] if fp]
|
|
592
|
+
if len(recent) < window:
|
|
593
|
+
return False, 0
|
|
594
|
+
|
|
595
|
+
unique = list(dict.fromkeys(recent))
|
|
596
|
+
if len(unique) == 1:
|
|
597
|
+
target = unique[0]
|
|
598
|
+
count = 0
|
|
599
|
+
for fp in reversed(self.tool_call_history):
|
|
600
|
+
if fp == target:
|
|
601
|
+
count += 1
|
|
602
|
+
else:
|
|
603
|
+
break
|
|
604
|
+
return True, count
|
|
605
|
+
|
|
606
|
+
if len(unique) > 2:
|
|
607
|
+
return False, 0
|
|
608
|
+
|
|
609
|
+
counts: dict[str, int] = {}
|
|
610
|
+
for fp in recent:
|
|
611
|
+
counts[fp] = counts.get(fp, 0) + 1
|
|
612
|
+
if counts and min(counts.values()) < 2:
|
|
613
|
+
return False, 0
|
|
614
|
+
|
|
615
|
+
transitions = sum(1 for a, b in zip(recent, recent[1:]) if a != b)
|
|
616
|
+
if transitions < window // 2:
|
|
617
|
+
return False, 0
|
|
618
|
+
|
|
619
|
+
allowed = set(counts.keys())
|
|
620
|
+
count = 0
|
|
621
|
+
for fp in reversed(self.tool_call_history):
|
|
622
|
+
if fp in allowed:
|
|
623
|
+
count += 1
|
|
624
|
+
else:
|
|
625
|
+
break
|
|
626
|
+
return True, count
|
|
627
|
+
|
|
628
|
+
def set_tool_turn_phase(self, phase: str, reason: str = ""):
|
|
629
|
+
if phase == self.tool_turn_phase:
|
|
630
|
+
return
|
|
631
|
+
old_phase = self.tool_turn_phase
|
|
632
|
+
self.tool_turn_phase = phase
|
|
633
|
+
self.tool_state_transitions += 1
|
|
634
|
+
logger.info(
|
|
635
|
+
"TOOL STATE MACHINE: phase %s -> %s%s",
|
|
636
|
+
old_phase,
|
|
637
|
+
phase,
|
|
638
|
+
f" reason={reason}" if reason else "",
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
def reset_tool_turn_state(self, reason: str = ""):
|
|
642
|
+
self.set_tool_turn_phase("bootstrap", reason=reason)
|
|
643
|
+
self.tool_state_forced_budget_remaining = 0
|
|
644
|
+
self.tool_state_auto_budget_remaining = 0
|
|
645
|
+
self.tool_state_stagnation_streak = 0
|
|
646
|
+
self.tool_state_review_cycles = 0
|
|
647
|
+
self.last_tool_fingerprint = ""
|
|
648
|
+
|
|
543
649
|
def guardrail_streak(self) -> int:
|
|
544
650
|
"""Highest current streak among malformed/invalid tool outputs."""
|
|
545
651
|
return max(self.malformed_tool_streak, self.invalid_tool_call_streak)
|
|
@@ -602,6 +708,9 @@ class SessionMonitor:
|
|
|
602
708
|
return False
|
|
603
709
|
|
|
604
710
|
is_looping, repeat_count = self.detect_tool_loop(window=PROXY_LOOP_WINDOW)
|
|
711
|
+
cycle_looping, cycle_repeat = self.detect_tool_cycle(
|
|
712
|
+
window=max(PROXY_LOOP_WINDOW, PROXY_TOOL_STATE_CYCLE_WINDOW)
|
|
713
|
+
)
|
|
605
714
|
|
|
606
715
|
# Pattern 1: Detected tool call loop
|
|
607
716
|
if (
|
|
@@ -618,6 +727,20 @@ class SessionMonitor:
|
|
|
618
727
|
self.loop_warnings_emitted += 1
|
|
619
728
|
return True
|
|
620
729
|
|
|
730
|
+
if (
|
|
731
|
+
cycle_looping
|
|
732
|
+
and cycle_repeat >= PROXY_LOOP_REPEAT_THRESHOLD
|
|
733
|
+
and self.tool_state_stagnation_streak >= max(1, PROXY_NO_PROGRESS_THRESHOLD)
|
|
734
|
+
):
|
|
735
|
+
logger.warning(
|
|
736
|
+
"LOOP BREAKER: low-entropy tool cycle repeated %d turns with stagnation=%d. "
|
|
737
|
+
"Releasing tool_choice to 'auto'.",
|
|
738
|
+
cycle_repeat,
|
|
739
|
+
self.tool_state_stagnation_streak,
|
|
740
|
+
)
|
|
741
|
+
self.loop_warnings_emitted += 1
|
|
742
|
+
return True
|
|
743
|
+
|
|
621
744
|
# Pattern 2: Too many consecutive forced requests
|
|
622
745
|
if (
|
|
623
746
|
self.consecutive_forced_count >= PROXY_FORCED_THRESHOLD
|
|
@@ -979,13 +1102,22 @@ async def lifespan(app: FastAPI):
|
|
|
979
1102
|
_resolve_prune_target_fraction() * 100,
|
|
980
1103
|
)
|
|
981
1104
|
logger.info(
|
|
982
|
-
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s tools_compatible=%s path=%s)",
|
|
1105
|
+
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s expand_on_loop=%s thinking_off_on_tools=%s state_machine=%s(min_msgs=%d forced=%d auto=%d stagnation=%d cycle=%d finalize=%d review_cycles=%d) dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s tools_compatible=%s path=%s)",
|
|
983
1106
|
PROXY_MALFORMED_TOOL_GUARDRAIL,
|
|
984
1107
|
PROXY_MALFORMED_TOOL_STREAM_STRICT,
|
|
985
1108
|
PROXY_FORCE_NON_STREAM,
|
|
986
1109
|
PROXY_TOOL_ARGS_PREFLIGHT,
|
|
987
1110
|
PROXY_TOOL_NARROWING,
|
|
1111
|
+
PROXY_TOOL_NARROWING_EXPAND_ON_LOOP,
|
|
988
1112
|
PROXY_DISABLE_THINKING_ON_TOOL_TURNS,
|
|
1113
|
+
PROXY_TOOL_STATE_MACHINE,
|
|
1114
|
+
PROXY_TOOL_STATE_MIN_MESSAGES,
|
|
1115
|
+
PROXY_TOOL_STATE_FORCED_BUDGET,
|
|
1116
|
+
PROXY_TOOL_STATE_AUTO_BUDGET,
|
|
1117
|
+
PROXY_TOOL_STATE_STAGNATION_THRESHOLD,
|
|
1118
|
+
PROXY_TOOL_STATE_CYCLE_WINDOW,
|
|
1119
|
+
PROXY_TOOL_STATE_FINALIZE_THRESHOLD,
|
|
1120
|
+
PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT,
|
|
989
1121
|
PROXY_FORCED_TOOL_DAMPENER,
|
|
990
1122
|
PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED,
|
|
991
1123
|
PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK,
|
|
@@ -1343,6 +1475,14 @@ def _last_user_has_tool_result(anthropic_body: dict) -> bool:
|
|
|
1343
1475
|
return False
|
|
1344
1476
|
|
|
1345
1477
|
|
|
1478
|
+
def _conversation_has_tool_results(anthropic_body: dict) -> bool:
|
|
1479
|
+
return any(
|
|
1480
|
+
_message_has_tool_result(msg.get("content"))
|
|
1481
|
+
for msg in anthropic_body.get("messages", [])
|
|
1482
|
+
if isinstance(msg, dict)
|
|
1483
|
+
)
|
|
1484
|
+
|
|
1485
|
+
|
|
1346
1486
|
def _sanitize_tool_schema_for_llama(schema):
|
|
1347
1487
|
"""Remove JSON Schema keywords that generate unsupported regex grammar.
|
|
1348
1488
|
|
|
@@ -1433,6 +1573,18 @@ def _narrow_tools_for_request(
|
|
|
1433
1573
|
query_text = _latest_user_text(anthropic_body).lower()
|
|
1434
1574
|
query_tokens = _tokenize_for_tool_ranking(query_text)
|
|
1435
1575
|
if not query_tokens:
|
|
1576
|
+
n_msgs = len(anthropic_body.get("messages", []))
|
|
1577
|
+
if (
|
|
1578
|
+
PROXY_TOOL_NARROWING_EXPAND_ON_LOOP
|
|
1579
|
+
and _conversation_has_tool_results(anthropic_body)
|
|
1580
|
+
and n_msgs >= 3
|
|
1581
|
+
):
|
|
1582
|
+
logger.info(
|
|
1583
|
+
"TOOL NARROWING: %d tools retained (no query tokens during active loop)",
|
|
1584
|
+
len(openai_tools),
|
|
1585
|
+
)
|
|
1586
|
+
return openai_tools
|
|
1587
|
+
|
|
1436
1588
|
narrowed = openai_tools[:keep]
|
|
1437
1589
|
logger.info(
|
|
1438
1590
|
"TOOL NARROWING: %d -> %d tools (no query tokens)",
|
|
@@ -1470,6 +1622,182 @@ def _narrow_tools_for_request(
|
|
|
1470
1622
|
return narrowed
|
|
1471
1623
|
|
|
1472
1624
|
|
|
1625
|
+
def _update_tool_state_stagnation(
|
|
1626
|
+
monitor: SessionMonitor,
|
|
1627
|
+
latest_tool_fingerprint: str,
|
|
1628
|
+
last_user_has_tool_result: bool,
|
|
1629
|
+
) -> None:
|
|
1630
|
+
if not PROXY_TOOL_STATE_MACHINE:
|
|
1631
|
+
return
|
|
1632
|
+
|
|
1633
|
+
if not latest_tool_fingerprint or not last_user_has_tool_result:
|
|
1634
|
+
monitor.tool_state_stagnation_streak = 0
|
|
1635
|
+
monitor.last_tool_fingerprint = latest_tool_fingerprint
|
|
1636
|
+
return
|
|
1637
|
+
|
|
1638
|
+
repeated = latest_tool_fingerprint == monitor.last_tool_fingerprint
|
|
1639
|
+
recently_seen = latest_tool_fingerprint in monitor.tool_call_history[-4:-1]
|
|
1640
|
+
|
|
1641
|
+
if repeated or recently_seen:
|
|
1642
|
+
monitor.tool_state_stagnation_streak += 1
|
|
1643
|
+
else:
|
|
1644
|
+
monitor.tool_state_stagnation_streak = 0
|
|
1645
|
+
|
|
1646
|
+
monitor.last_tool_fingerprint = latest_tool_fingerprint
|
|
1647
|
+
|
|
1648
|
+
|
|
1649
|
+
def _resolve_state_machine_tool_choice(
|
|
1650
|
+
anthropic_body: dict,
|
|
1651
|
+
monitor: SessionMonitor,
|
|
1652
|
+
has_tool_results: bool,
|
|
1653
|
+
last_user_has_tool_result: bool,
|
|
1654
|
+
) -> tuple[str | None, str]:
|
|
1655
|
+
if not PROXY_TOOL_STATE_MACHINE:
|
|
1656
|
+
return None, "disabled"
|
|
1657
|
+
|
|
1658
|
+
n_msgs = len(anthropic_body.get("messages", []))
|
|
1659
|
+
latest_user_text = _latest_user_text(anthropic_body).strip()
|
|
1660
|
+
if latest_user_text and not last_user_has_tool_result:
|
|
1661
|
+
monitor.tool_call_history = []
|
|
1662
|
+
if n_msgs <= 1:
|
|
1663
|
+
monitor.forced_auto_cooldown_turns = 0
|
|
1664
|
+
monitor.consecutive_forced_count = 0
|
|
1665
|
+
monitor.no_progress_streak = 0
|
|
1666
|
+
monitor.malformed_tool_streak = 0
|
|
1667
|
+
monitor.invalid_tool_call_streak = 0
|
|
1668
|
+
monitor.required_tool_miss_streak = 0
|
|
1669
|
+
monitor.reset_tool_turn_state(reason="fresh_user_text")
|
|
1670
|
+
return None, "fresh_user_text"
|
|
1671
|
+
|
|
1672
|
+
active_loop = (
|
|
1673
|
+
has_tool_results
|
|
1674
|
+
and last_user_has_tool_result
|
|
1675
|
+
and n_msgs >= max(3, PROXY_TOOL_STATE_MIN_MESSAGES)
|
|
1676
|
+
)
|
|
1677
|
+
if not active_loop:
|
|
1678
|
+
if not has_tool_results:
|
|
1679
|
+
monitor.tool_call_history = []
|
|
1680
|
+
if n_msgs <= 1:
|
|
1681
|
+
monitor.forced_auto_cooldown_turns = 0
|
|
1682
|
+
monitor.consecutive_forced_count = 0
|
|
1683
|
+
monitor.no_progress_streak = 0
|
|
1684
|
+
monitor.malformed_tool_streak = 0
|
|
1685
|
+
monitor.invalid_tool_call_streak = 0
|
|
1686
|
+
monitor.required_tool_miss_streak = 0
|
|
1687
|
+
monitor.reset_tool_turn_state(reason="inactive_loop")
|
|
1688
|
+
return None, "inactive_loop"
|
|
1689
|
+
|
|
1690
|
+
if monitor.tool_turn_phase == "bootstrap":
|
|
1691
|
+
monitor.set_tool_turn_phase("act", reason="loop_detected")
|
|
1692
|
+
monitor.tool_state_forced_budget_remaining = max(
|
|
1693
|
+
1, PROXY_TOOL_STATE_FORCED_BUDGET
|
|
1694
|
+
)
|
|
1695
|
+
monitor.tool_state_auto_budget_remaining = 0
|
|
1696
|
+
|
|
1697
|
+
cycle_looping, cycle_repeat = monitor.detect_tool_cycle(
|
|
1698
|
+
window=max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
|
|
1699
|
+
)
|
|
1700
|
+
stagnating = monitor.tool_state_stagnation_streak >= max(
|
|
1701
|
+
1, PROXY_TOOL_STATE_STAGNATION_THRESHOLD
|
|
1702
|
+
)
|
|
1703
|
+
finalize_threshold = max(
|
|
1704
|
+
max(1, PROXY_TOOL_STATE_FINALIZE_THRESHOLD),
|
|
1705
|
+
max(1, PROXY_TOOL_STATE_STAGNATION_THRESHOLD) * 2,
|
|
1706
|
+
)
|
|
1707
|
+
review_cycle_limit = max(1, PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT)
|
|
1708
|
+
|
|
1709
|
+
if cycle_looping and monitor.tool_state_stagnation_streak >= finalize_threshold:
|
|
1710
|
+
monitor.set_tool_turn_phase("finalize", reason="stagnation_limit")
|
|
1711
|
+
monitor.tool_state_auto_budget_remaining = 1
|
|
1712
|
+
logger.warning(
|
|
1713
|
+
"TOOL STATE MACHINE: forcing finalize turn after prolonged cycle (repeat=%d stagnation=%d)",
|
|
1714
|
+
cycle_repeat,
|
|
1715
|
+
monitor.tool_state_stagnation_streak,
|
|
1716
|
+
)
|
|
1717
|
+
return "finalize", "stagnation_limit"
|
|
1718
|
+
|
|
1719
|
+
if (
|
|
1720
|
+
monitor.tool_turn_phase in {"act", "review"}
|
|
1721
|
+
and monitor.tool_state_review_cycles >= review_cycle_limit
|
|
1722
|
+
):
|
|
1723
|
+
monitor.set_tool_turn_phase("finalize", reason="review_cycle_limit")
|
|
1724
|
+
monitor.tool_state_auto_budget_remaining = 1
|
|
1725
|
+
logger.warning(
|
|
1726
|
+
"TOOL STATE MACHINE: forcing finalize turn after repeated review cycles (cycles=%d stagnation=%d)",
|
|
1727
|
+
monitor.tool_state_review_cycles,
|
|
1728
|
+
monitor.tool_state_stagnation_streak,
|
|
1729
|
+
)
|
|
1730
|
+
return "finalize", "review_cycle_limit"
|
|
1731
|
+
|
|
1732
|
+
if monitor.tool_turn_phase == "act":
|
|
1733
|
+
if cycle_looping or stagnating:
|
|
1734
|
+
reason = "cycle_detected" if cycle_looping else "stagnation"
|
|
1735
|
+
monitor.set_tool_turn_phase("review", reason=reason)
|
|
1736
|
+
monitor.tool_state_review_cycles += 1
|
|
1737
|
+
monitor.tool_state_auto_budget_remaining = max(
|
|
1738
|
+
1, PROXY_TOOL_STATE_AUTO_BUDGET
|
|
1739
|
+
)
|
|
1740
|
+
monitor.tool_state_forced_budget_remaining = max(
|
|
1741
|
+
1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
|
|
1742
|
+
)
|
|
1743
|
+
logger.warning(
|
|
1744
|
+
"TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d)",
|
|
1745
|
+
cycle_looping,
|
|
1746
|
+
cycle_repeat,
|
|
1747
|
+
monitor.tool_state_stagnation_streak,
|
|
1748
|
+
monitor.tool_state_review_cycles,
|
|
1749
|
+
)
|
|
1750
|
+
return "auto", reason
|
|
1751
|
+
|
|
1752
|
+
if monitor.tool_state_forced_budget_remaining <= 0:
|
|
1753
|
+
monitor.set_tool_turn_phase("review", reason="forced_budget_exhausted")
|
|
1754
|
+
monitor.tool_state_review_cycles += 1
|
|
1755
|
+
monitor.tool_state_auto_budget_remaining = max(
|
|
1756
|
+
1, PROXY_TOOL_STATE_AUTO_BUDGET
|
|
1757
|
+
)
|
|
1758
|
+
monitor.tool_state_forced_budget_remaining = max(
|
|
1759
|
+
1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
|
|
1760
|
+
)
|
|
1761
|
+
logger.warning(
|
|
1762
|
+
"TOOL STATE MACHINE: forced budget exhausted, entering review (cycles=%d)",
|
|
1763
|
+
monitor.tool_state_review_cycles,
|
|
1764
|
+
)
|
|
1765
|
+
return "auto", "forced_budget_exhausted"
|
|
1766
|
+
|
|
1767
|
+
monitor.tool_state_forced_budget_remaining -= 1
|
|
1768
|
+
return "required", "act"
|
|
1769
|
+
|
|
1770
|
+
if monitor.tool_turn_phase == "review":
|
|
1771
|
+
if monitor.tool_state_auto_budget_remaining <= 0:
|
|
1772
|
+
monitor.set_tool_turn_phase("act", reason="review_budget_spent")
|
|
1773
|
+
monitor.tool_state_forced_budget_remaining = max(
|
|
1774
|
+
1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
|
|
1775
|
+
)
|
|
1776
|
+
return "required", "review_complete"
|
|
1777
|
+
|
|
1778
|
+
monitor.tool_state_auto_budget_remaining -= 1
|
|
1779
|
+
if monitor.tool_state_auto_budget_remaining == 0:
|
|
1780
|
+
monitor.set_tool_turn_phase("act", reason="review_budget_spent")
|
|
1781
|
+
monitor.tool_state_forced_budget_remaining = max(
|
|
1782
|
+
1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
|
|
1783
|
+
)
|
|
1784
|
+
return "required", "review_complete"
|
|
1785
|
+
return "auto", "review"
|
|
1786
|
+
|
|
1787
|
+
if monitor.tool_turn_phase == "finalize":
|
|
1788
|
+
if monitor.tool_state_auto_budget_remaining <= 0:
|
|
1789
|
+
monitor.reset_tool_turn_state(reason="finalize_complete")
|
|
1790
|
+
return None, "finalize_complete"
|
|
1791
|
+
|
|
1792
|
+
monitor.tool_state_auto_budget_remaining -= 1
|
|
1793
|
+
if monitor.tool_state_auto_budget_remaining == 0:
|
|
1794
|
+
monitor.reset_tool_turn_state(reason="finalize_complete")
|
|
1795
|
+
return "finalize", "finalize"
|
|
1796
|
+
|
|
1797
|
+
monitor.reset_tool_turn_state(reason="unknown_phase")
|
|
1798
|
+
return None, "unknown_phase"
|
|
1799
|
+
|
|
1800
|
+
|
|
1473
1801
|
def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
1474
1802
|
"""Build an OpenAI Chat Completions request from an Anthropic Messages request."""
|
|
1475
1803
|
openai_body = {
|
|
@@ -1498,10 +1826,26 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1498
1826
|
)
|
|
1499
1827
|
|
|
1500
1828
|
if "max_tokens" in anthropic_body:
|
|
1829
|
+
requested_raw = max(1, int(anthropic_body["max_tokens"]))
|
|
1830
|
+
|
|
1501
1831
|
# Enforce configurable minimum floor for thinking mode: model needs
|
|
1502
1832
|
# tokens for reasoning (<think>...</think>) plus actual response/tool
|
|
1503
1833
|
# calls. Set PROXY_MAX_TOKENS_FLOOR=0 to disable this floor.
|
|
1504
|
-
|
|
1834
|
+
floor_bypassed_for_tool_turn = (
|
|
1835
|
+
has_tools
|
|
1836
|
+
and PROXY_DISABLE_THINKING_ON_TOOL_TURNS
|
|
1837
|
+
and PROXY_MAX_TOKENS_FLOOR > 0
|
|
1838
|
+
)
|
|
1839
|
+
if floor_bypassed_for_tool_turn:
|
|
1840
|
+
requested_max = requested_raw
|
|
1841
|
+
if requested_raw < PROXY_MAX_TOKENS_FLOOR:
|
|
1842
|
+
logger.info(
|
|
1843
|
+
"MAX_TOKENS floor bypassed for tool turn with thinking disabled: requested=%d floor=%d",
|
|
1844
|
+
requested_raw,
|
|
1845
|
+
PROXY_MAX_TOKENS_FLOOR,
|
|
1846
|
+
)
|
|
1847
|
+
else:
|
|
1848
|
+
requested_max = _resolve_max_tokens_request(requested_raw)
|
|
1505
1849
|
|
|
1506
1850
|
# Option E: Smart max_tokens capping — prevent the response from
|
|
1507
1851
|
# consuming so many tokens that the NEXT turn's input won't fit.
|
|
@@ -1566,18 +1910,25 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1566
1910
|
# a tool call loop (same tools called repeatedly), to prevent
|
|
1567
1911
|
# runaway token consumption.
|
|
1568
1912
|
n_msgs = len(anthropic_body.get("messages", []))
|
|
1569
|
-
has_tool_results =
|
|
1570
|
-
isinstance(m.get("content"), list)
|
|
1571
|
-
and any(
|
|
1572
|
-
isinstance(b, dict) and b.get("type") == "tool_result"
|
|
1573
|
-
for b in m.get("content", [])
|
|
1574
|
-
)
|
|
1575
|
-
for m in anthropic_body.get("messages", [])
|
|
1576
|
-
)
|
|
1913
|
+
has_tool_results = _conversation_has_tool_results(anthropic_body)
|
|
1577
1914
|
|
|
1578
1915
|
# Record tool calls from the last assistant message for loop detection
|
|
1579
|
-
_record_last_assistant_tool_calls(
|
|
1916
|
+
latest_tool_fingerprint = _record_last_assistant_tool_calls(
|
|
1917
|
+
anthropic_body, monitor
|
|
1918
|
+
)
|
|
1580
1919
|
last_user_has_tool_result = _last_user_has_tool_result(anthropic_body)
|
|
1920
|
+
_update_tool_state_stagnation(
|
|
1921
|
+
monitor,
|
|
1922
|
+
latest_tool_fingerprint,
|
|
1923
|
+
last_user_has_tool_result,
|
|
1924
|
+
)
|
|
1925
|
+
monitor.finalize_turn_active = False
|
|
1926
|
+
state_choice, state_reason = _resolve_state_machine_tool_choice(
|
|
1927
|
+
anthropic_body,
|
|
1928
|
+
monitor,
|
|
1929
|
+
has_tool_results,
|
|
1930
|
+
last_user_has_tool_result,
|
|
1931
|
+
)
|
|
1581
1932
|
|
|
1582
1933
|
# Check if forced-tool dampener or loop breaker should override tool_choice
|
|
1583
1934
|
if monitor.consume_forced_auto_turn():
|
|
@@ -1588,6 +1939,47 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1588
1939
|
"tool_choice set to 'auto' by FORCED-TOOL DAMPENER (remaining=%d)",
|
|
1589
1940
|
monitor.forced_auto_cooldown_turns,
|
|
1590
1941
|
)
|
|
1942
|
+
elif state_choice == "auto":
|
|
1943
|
+
openai_body["tool_choice"] = "auto"
|
|
1944
|
+
monitor.consecutive_forced_count = 0
|
|
1945
|
+
monitor.no_progress_streak = 0
|
|
1946
|
+
logger.info(
|
|
1947
|
+
"tool_choice set to 'auto' by TOOL STATE MACHINE (phase=%s reason=%s auto_budget=%d stagnation=%d)",
|
|
1948
|
+
monitor.tool_turn_phase,
|
|
1949
|
+
state_reason,
|
|
1950
|
+
monitor.tool_state_auto_budget_remaining,
|
|
1951
|
+
monitor.tool_state_stagnation_streak,
|
|
1952
|
+
)
|
|
1953
|
+
elif state_choice == "finalize":
|
|
1954
|
+
openai_body.pop("tool_choice", None)
|
|
1955
|
+
openai_body.pop("tools", None)
|
|
1956
|
+
monitor.finalize_turn_active = True
|
|
1957
|
+
monitor.consecutive_forced_count = 0
|
|
1958
|
+
monitor.no_progress_streak = 0
|
|
1959
|
+
logger.warning(
|
|
1960
|
+
"TOOL STATE MACHINE: tools temporarily disabled for finalize turn (reason=%s)",
|
|
1961
|
+
state_reason,
|
|
1962
|
+
)
|
|
1963
|
+
elif state_choice == "required":
|
|
1964
|
+
openai_body["tool_choice"] = "required"
|
|
1965
|
+
monitor.consecutive_forced_count += 1
|
|
1966
|
+
monitor.no_progress_streak = (
|
|
1967
|
+
0 if last_user_has_tool_result else monitor.no_progress_streak + 1
|
|
1968
|
+
)
|
|
1969
|
+
logger.info(
|
|
1970
|
+
"tool_choice forced to 'required' by TOOL STATE MACHINE (phase=%s reason=%s forced_budget=%d)",
|
|
1971
|
+
monitor.tool_turn_phase,
|
|
1972
|
+
state_reason,
|
|
1973
|
+
monitor.tool_state_forced_budget_remaining,
|
|
1974
|
+
)
|
|
1975
|
+
elif state_reason in {"fresh_user_text", "inactive_loop"} and n_msgs <= 1:
|
|
1976
|
+
monitor.consecutive_forced_count = 0
|
|
1977
|
+
monitor.no_progress_streak = 0
|
|
1978
|
+
logger.info(
|
|
1979
|
+
"tool_choice left unchanged after state reset (reason=%s n_msgs=%d)",
|
|
1980
|
+
state_reason,
|
|
1981
|
+
n_msgs,
|
|
1982
|
+
)
|
|
1591
1983
|
elif monitor.should_release_tool_choice():
|
|
1592
1984
|
openai_body["tool_choice"] = "auto"
|
|
1593
1985
|
monitor.consecutive_forced_count = 0
|
|
@@ -1614,6 +2006,8 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1614
2006
|
else:
|
|
1615
2007
|
monitor.consecutive_forced_count = 0
|
|
1616
2008
|
monitor.no_progress_streak = 0
|
|
2009
|
+
if not has_tool_results:
|
|
2010
|
+
monitor.reset_tool_turn_state(reason="no_tool_results")
|
|
1617
2011
|
|
|
1618
2012
|
if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
|
|
1619
2013
|
openai_body["enable_thinking"] = False
|
|
@@ -1626,7 +2020,9 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1626
2020
|
return openai_body
|
|
1627
2021
|
|
|
1628
2022
|
|
|
1629
|
-
def _record_last_assistant_tool_calls(
|
|
2023
|
+
def _record_last_assistant_tool_calls(
|
|
2024
|
+
anthropic_body: dict, monitor: SessionMonitor
|
|
2025
|
+
) -> str:
|
|
1630
2026
|
"""Extract tool call names from the last assistant message and record
|
|
1631
2027
|
them in the session monitor for loop detection."""
|
|
1632
2028
|
messages = anthropic_body.get("messages", [])
|
|
@@ -1642,6 +2038,8 @@ def _record_last_assistant_tool_calls(anthropic_body: dict, monitor: SessionMoni
|
|
|
1642
2038
|
break
|
|
1643
2039
|
if tool_names:
|
|
1644
2040
|
monitor.record_tool_calls(tool_names)
|
|
2041
|
+
return "|".join(sorted(tool_names))
|
|
2042
|
+
return ""
|
|
1645
2043
|
|
|
1646
2044
|
|
|
1647
2045
|
def _is_unexpected_end_turn(openai_resp: dict, anthropic_body: dict) -> bool:
|
|
@@ -1661,14 +2059,7 @@ def _is_unexpected_end_turn(openai_resp: dict, anthropic_body: dict) -> bool:
|
|
|
1661
2059
|
if "tools" not in anthropic_body:
|
|
1662
2060
|
return False
|
|
1663
2061
|
|
|
1664
|
-
has_tool_results =
|
|
1665
|
-
isinstance(m.get("content"), list)
|
|
1666
|
-
and any(
|
|
1667
|
-
isinstance(b, dict) and b.get("type") == "tool_result"
|
|
1668
|
-
for b in m.get("content", [])
|
|
1669
|
-
)
|
|
1670
|
-
for m in anthropic_body.get("messages", [])
|
|
1671
|
-
)
|
|
2062
|
+
has_tool_results = _conversation_has_tool_results(anthropic_body)
|
|
1672
2063
|
|
|
1673
2064
|
return has_tool_results or _last_assistant_was_text_only(anthropic_body)
|
|
1674
2065
|
|
|
@@ -2818,6 +3209,22 @@ async def _apply_unexpected_end_turn_guardrail(
|
|
|
2818
3209
|
if not PROXY_GUARDRAIL_RETRY:
|
|
2819
3210
|
return openai_resp
|
|
2820
3211
|
|
|
3212
|
+
if monitor.finalize_turn_active:
|
|
3213
|
+
logger.info("GUARDRAIL: skipped unexpected_end_turn retry on finalize turn")
|
|
3214
|
+
return openai_resp
|
|
3215
|
+
|
|
3216
|
+
if monitor.tool_turn_phase == "act" and openai_body.get("tool_choice") == "auto":
|
|
3217
|
+
logger.info(
|
|
3218
|
+
"GUARDRAIL: skipped unexpected_end_turn retry during act auto release"
|
|
3219
|
+
)
|
|
3220
|
+
return openai_resp
|
|
3221
|
+
|
|
3222
|
+
if monitor.tool_turn_phase == "review" and openai_body.get("tool_choice") == "auto":
|
|
3223
|
+
logger.info(
|
|
3224
|
+
"GUARDRAIL: skipped unexpected_end_turn retry during review auto turn"
|
|
3225
|
+
)
|
|
3226
|
+
return openai_resp
|
|
3227
|
+
|
|
2821
3228
|
if not _is_unexpected_end_turn(openai_resp, anthropic_body):
|
|
2822
3229
|
return openai_resp
|
|
2823
3230
|
|
|
@@ -2873,6 +3280,10 @@ async def _apply_malformed_tool_guardrail(
|
|
|
2873
3280
|
if not PROXY_MALFORMED_TOOL_GUARDRAIL:
|
|
2874
3281
|
return openai_resp
|
|
2875
3282
|
|
|
3283
|
+
if monitor.finalize_turn_active:
|
|
3284
|
+
logger.info("GUARDRAIL: skipped malformed-tool retries on finalize turn")
|
|
3285
|
+
return openai_resp
|
|
3286
|
+
|
|
2876
3287
|
working_resp = openai_resp
|
|
2877
3288
|
repair_count = 0
|
|
2878
3289
|
if PROXY_TOOL_ARGS_PREFLIGHT and _openai_has_tool_calls(openai_resp):
|
|
@@ -3073,6 +3484,7 @@ def _maybe_apply_session_contamination_breaker(
|
|
|
3073
3484
|
monitor.malformed_tool_streak = 0
|
|
3074
3485
|
monitor.invalid_tool_call_streak = 0
|
|
3075
3486
|
monitor.required_tool_miss_streak = 0
|
|
3487
|
+
monitor.reset_tool_turn_state(reason="contamination_guardrail_soft_reset")
|
|
3076
3488
|
return anthropic_body
|
|
3077
3489
|
|
|
3078
3490
|
head = messages[:1]
|
|
@@ -3097,6 +3509,7 @@ def _maybe_apply_session_contamination_breaker(
|
|
|
3097
3509
|
monitor.no_progress_streak = 0
|
|
3098
3510
|
monitor.consecutive_forced_count = 0
|
|
3099
3511
|
monitor.forced_auto_cooldown_turns = 0
|
|
3512
|
+
monitor.reset_tool_turn_state(reason="contamination_guardrail_reset")
|
|
3100
3513
|
logger.warning(
|
|
3101
3514
|
"SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages (bad_streak=%d forced=%d required_miss=%d)",
|
|
3102
3515
|
session_id,
|
|
@@ -4082,9 +4495,22 @@ async def context_status(request: Request):
|
|
|
4082
4495
|
"forced_auto_cooldown_turns": monitor.forced_auto_cooldown_turns,
|
|
4083
4496
|
"forced_dampener_triggers": monitor.forced_dampener_triggers,
|
|
4084
4497
|
"contamination_resets": monitor.contamination_resets,
|
|
4498
|
+
"tool_turn_phase": monitor.tool_turn_phase,
|
|
4499
|
+
"tool_state_forced_budget_remaining": monitor.tool_state_forced_budget_remaining,
|
|
4500
|
+
"tool_state_auto_budget_remaining": monitor.tool_state_auto_budget_remaining,
|
|
4501
|
+
"tool_state_stagnation_streak": monitor.tool_state_stagnation_streak,
|
|
4502
|
+
"tool_state_transitions": monitor.tool_state_transitions,
|
|
4503
|
+
"tool_state_review_cycles": monitor.tool_state_review_cycles,
|
|
4504
|
+
"finalize_turn_active": monitor.finalize_turn_active,
|
|
4085
4505
|
"tool_call_history_len": len(monitor.tool_call_history),
|
|
4086
4506
|
"is_looping": monitor.detect_tool_loop(window=PROXY_LOOP_WINDOW)[0],
|
|
4087
4507
|
"loop_repeat_count": monitor.detect_tool_loop(window=PROXY_LOOP_WINDOW)[1],
|
|
4508
|
+
"is_cycle_looping": monitor.detect_tool_cycle(
|
|
4509
|
+
window=max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
|
|
4510
|
+
)[0],
|
|
4511
|
+
"cycle_repeat_count": monitor.detect_tool_cycle(
|
|
4512
|
+
window=max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
|
|
4513
|
+
)[1],
|
|
4088
4514
|
"recent_tool_patterns": monitor.tool_call_history[-5:],
|
|
4089
4515
|
},
|
|
4090
4516
|
}
|