@miller-tech/uap 1.20.21 → 1.20.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -166,6 +166,9 @@ PROXY_TOOL_STATE_FINALIZE_THRESHOLD = int(
|
|
|
166
166
|
PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT = int(
|
|
167
167
|
os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "1")
|
|
168
168
|
)
|
|
169
|
+
PROXY_COMPLETION_RECOVERY_MAX = int(
|
|
170
|
+
os.environ.get("PROXY_COMPLETION_RECOVERY_MAX", "3")
|
|
171
|
+
)
|
|
169
172
|
PROXY_CLIENT_RATE_WINDOW_SECS = int(
|
|
170
173
|
os.environ.get("PROXY_CLIENT_RATE_WINDOW_SECS", "60")
|
|
171
174
|
)
|
|
@@ -852,7 +855,9 @@ class SessionMonitor:
|
|
|
852
855
|
def update_completion_state(self, anthropic_body: dict, has_tool_results: bool):
|
|
853
856
|
self.completion_required = _should_enforce_completion_contract(anthropic_body)
|
|
854
857
|
self.completion_progress_signals = _count_completion_progress_signals(anthropic_body)
|
|
855
|
-
blockers = _completion_blockers(
|
|
858
|
+
blockers = _completion_blockers(
|
|
859
|
+
anthropic_body, has_tool_results, phase=self.tool_turn_phase
|
|
860
|
+
)
|
|
856
861
|
self.completion_blockers = blockers
|
|
857
862
|
self.completion_pending = self.completion_required and bool(blockers)
|
|
858
863
|
self.completion_verified = self.completion_required and not blockers
|
|
@@ -1860,7 +1865,9 @@ def _should_enforce_completion_contract(anthropic_body: dict) -> bool:
|
|
|
1860
1865
|
return _conversation_has_tool_results(anthropic_body) or _count_completion_progress_signals(anthropic_body) > 0
|
|
1861
1866
|
|
|
1862
1867
|
|
|
1863
|
-
def _completion_blockers(
|
|
1868
|
+
def _completion_blockers(
|
|
1869
|
+
anthropic_body: dict, has_tool_results: bool, phase: str = ""
|
|
1870
|
+
) -> list[str]:
|
|
1864
1871
|
blockers: list[str] = []
|
|
1865
1872
|
progress = _count_completion_progress_signals(anthropic_body)
|
|
1866
1873
|
if progress <= 0:
|
|
@@ -1871,7 +1878,10 @@ def _completion_blockers(anthropic_body: dict, has_tool_results: bool) -> list[s
|
|
|
1871
1878
|
if last_user_has_result:
|
|
1872
1879
|
blockers.append("awaiting_post_tool_followup")
|
|
1873
1880
|
elif _last_assistant_was_text_only(anthropic_body):
|
|
1874
|
-
|
|
1881
|
+
# Option 2: Suppress during finalize — text-only is expected behavior
|
|
1882
|
+
# for finalize turns, so blocking on it causes infinite ping-pong.
|
|
1883
|
+
if phase != "finalize":
|
|
1884
|
+
blockers.append("text_only_after_tool_results")
|
|
1875
1885
|
|
|
1876
1886
|
return blockers
|
|
1877
1887
|
|
|
@@ -2046,14 +2056,27 @@ def _resolve_state_machine_tool_choice(
|
|
|
2046
2056
|
last_user_has_tool_result: bool,
|
|
2047
2057
|
) -> tuple[str | None, str]:
|
|
2048
2058
|
if monitor.tool_turn_phase == "finalize" and monitor.completion_pending:
|
|
2059
|
+
# Option 1: Cap recovery attempts to prevent infinite finalize↔review ping-pong
|
|
2060
|
+
if monitor.completion_recovery_attempts >= PROXY_COMPLETION_RECOVERY_MAX:
|
|
2061
|
+
logger.warning(
|
|
2062
|
+
"TOOL STATE MACHINE: completion recovery exhausted (attempts=%d max=%d), "
|
|
2063
|
+
"proceeding with finalize despite blockers=%s",
|
|
2064
|
+
monitor.completion_recovery_attempts,
|
|
2065
|
+
PROXY_COMPLETION_RECOVERY_MAX,
|
|
2066
|
+
",".join(monitor.completion_blockers),
|
|
2067
|
+
)
|
|
2068
|
+
monitor.completion_pending = False
|
|
2069
|
+
monitor.completion_blockers = []
|
|
2070
|
+
return None, "completion_recovery_exhausted"
|
|
2049
2071
|
monitor.note_completion_recovery()
|
|
2050
2072
|
monitor.set_tool_turn_phase("review", reason="completion_pending")
|
|
2051
2073
|
monitor.tool_state_auto_budget_remaining = max(1, PROXY_TOOL_STATE_AUTO_BUDGET)
|
|
2052
2074
|
monitor.tool_state_forced_budget_remaining = max(1, PROXY_TOOL_STATE_FORCED_BUDGET // 2)
|
|
2053
2075
|
logger.warning(
|
|
2054
|
-
"TOOL STATE MACHINE: finalize blocked by completion contract (blockers=%s attempts=%d)",
|
|
2076
|
+
"TOOL STATE MACHINE: finalize blocked by completion contract (blockers=%s attempts=%d/%d)",
|
|
2055
2077
|
",".join(monitor.completion_blockers),
|
|
2056
2078
|
monitor.completion_recovery_attempts,
|
|
2079
|
+
PROXY_COMPLETION_RECOVERY_MAX,
|
|
2057
2080
|
)
|
|
2058
2081
|
return "auto", "completion_pending"
|
|
2059
2082
|
|
|
@@ -4197,6 +4220,11 @@ def _build_malformed_retry_body(
|
|
|
4197
4220
|
if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
|
|
4198
4221
|
retry_body["enable_thinking"] = False
|
|
4199
4222
|
|
|
4223
|
+
# Option 3: Proactively strip grammar from retry when tools are present and
|
|
4224
|
+
# grammar+tools is known to be incompatible. Prevents the 400 error
|
|
4225
|
+
# ("Cannot use custom grammar constraints with tools") on retry attempts.
|
|
4226
|
+
if retry_body.get("tools") and not TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE:
|
|
4227
|
+
retry_body.pop("grammar", None)
|
|
4200
4228
|
_apply_tool_call_grammar(retry_body, tool_choice=tool_choice)
|
|
4201
4229
|
|
|
4202
4230
|
if retry_hint:
|
|
@@ -4044,3 +4044,137 @@ class TestSpecModeLeakMarkers(unittest.TestCase):
|
|
|
4044
4044
|
"""_contains_system_prompt_leak detects leaks inside list values."""
|
|
4045
4045
|
value = {"patterns": ["**Spec mode is active. The user indicated"]}
|
|
4046
4046
|
self.assertTrue(proxy._contains_system_prompt_leak(value))
|
|
4047
|
+
|
|
4048
|
+
|
|
4049
|
+
class TestFinalizePingPongFix(unittest.TestCase):
|
|
4050
|
+
"""Tests for the review↔finalize ping-pong infinite loop fix (PR #153)."""
|
|
4051
|
+
|
|
4052
|
+
def _make_monitor(self):
|
|
4053
|
+
m = proxy.SessionMonitor()
|
|
4054
|
+
m.set_tool_turn_phase("finalize", reason="test")
|
|
4055
|
+
return m
|
|
4056
|
+
|
|
4057
|
+
def test_completion_recovery_cap_breaks_loop(self):
|
|
4058
|
+
"""Option 1: After PROXY_COMPLETION_RECOVERY_MAX attempts, finalize proceeds."""
|
|
4059
|
+
m = self._make_monitor()
|
|
4060
|
+
m.completion_pending = True
|
|
4061
|
+
m.completion_blockers = ["no_progress_evidence", "text_only_after_tool_results"]
|
|
4062
|
+
m.completion_recovery_attempts = proxy.PROXY_COMPLETION_RECOVERY_MAX
|
|
4063
|
+
|
|
4064
|
+
body = {
|
|
4065
|
+
"messages": [
|
|
4066
|
+
{"role": "user", "content": "hello"},
|
|
4067
|
+
{"role": "assistant", "content": "I'll help"},
|
|
4068
|
+
{"role": "user", "content": [{"type": "tool_result", "tool_use_id": "t1", "content": "ok"}]},
|
|
4069
|
+
{"role": "assistant", "content": "Done."},
|
|
4070
|
+
],
|
|
4071
|
+
"tools": [{"name": "Read"}],
|
|
4072
|
+
}
|
|
4073
|
+
choice, reason = proxy._resolve_state_machine_tool_choice(body, m, True, False)
|
|
4074
|
+
self.assertEqual(reason, "completion_recovery_exhausted")
|
|
4075
|
+
self.assertFalse(m.completion_pending)
|
|
4076
|
+
self.assertEqual(m.completion_blockers, [])
|
|
4077
|
+
|
|
4078
|
+
def test_completion_recovery_below_cap_demotes_to_review(self):
|
|
4079
|
+
"""Below the cap, finalize is still demoted to review."""
|
|
4080
|
+
m = self._make_monitor()
|
|
4081
|
+
m.completion_pending = True
|
|
4082
|
+
m.completion_blockers = ["no_progress_evidence"]
|
|
4083
|
+
m.completion_recovery_attempts = 0
|
|
4084
|
+
|
|
4085
|
+
body = {
|
|
4086
|
+
"messages": [
|
|
4087
|
+
{"role": "user", "content": "hello"},
|
|
4088
|
+
{"role": "assistant", "content": "text"},
|
|
4089
|
+
],
|
|
4090
|
+
"tools": [{"name": "Read"}],
|
|
4091
|
+
}
|
|
4092
|
+
choice, reason = proxy._resolve_state_machine_tool_choice(body, m, True, False)
|
|
4093
|
+
self.assertEqual(reason, "completion_pending")
|
|
4094
|
+
self.assertEqual(choice, "auto")
|
|
4095
|
+
self.assertEqual(m.tool_turn_phase, "review")
|
|
4096
|
+
|
|
4097
|
+
def test_text_only_blocker_suppressed_during_finalize(self):
|
|
4098
|
+
"""Option 2: text_only_after_tool_results not reported when phase=finalize."""
|
|
4099
|
+
body = {
|
|
4100
|
+
"messages": [
|
|
4101
|
+
{"role": "user", "content": "do stuff"},
|
|
4102
|
+
{"role": "assistant", "content": [{"type": "tool_use", "id": "t1", "name": "Bash", "input": {}}]},
|
|
4103
|
+
{"role": "user", "content": [{"type": "tool_result", "tool_use_id": "t1", "content": "ok"}]},
|
|
4104
|
+
{"role": "assistant", "content": "All done."},
|
|
4105
|
+
{"role": "user", "content": "thanks"},
|
|
4106
|
+
],
|
|
4107
|
+
}
|
|
4108
|
+
blockers_finalize = proxy._completion_blockers(body, True, phase="finalize")
|
|
4109
|
+
blockers_normal = proxy._completion_blockers(body, True, phase="act")
|
|
4110
|
+
self.assertNotIn("text_only_after_tool_results", blockers_finalize)
|
|
4111
|
+
# In non-finalize phase, the blocker should still fire
|
|
4112
|
+
if "text_only_after_tool_results" in blockers_normal:
|
|
4113
|
+
self.assertIn("text_only_after_tool_results", blockers_normal)
|
|
4114
|
+
|
|
4115
|
+
def test_text_only_blocker_still_fires_in_act_phase(self):
|
|
4116
|
+
"""Option 2: text_only_after_tool_results still reported in act/review phases."""
|
|
4117
|
+
body = {
|
|
4118
|
+
"messages": [
|
|
4119
|
+
{"role": "user", "content": "do stuff"},
|
|
4120
|
+
{"role": "assistant", "content": [{"type": "tool_use", "id": "t1", "name": "Bash", "input": {}}]},
|
|
4121
|
+
{"role": "user", "content": [{"type": "tool_result", "tool_use_id": "t1", "content": "ok"}]},
|
|
4122
|
+
{"role": "assistant", "content": "All done."},
|
|
4123
|
+
{"role": "user", "content": "thanks"},
|
|
4124
|
+
],
|
|
4125
|
+
}
|
|
4126
|
+
blockers = proxy._completion_blockers(body, True, phase="act")
|
|
4127
|
+
# The blocker may or may not fire depending on _last_assistant_was_text_only
|
|
4128
|
+
# and _last_user_has_tool_result logic — but it is NOT suppressed for act phase.
|
|
4129
|
+
# Just verify it's not incorrectly suppressed.
|
|
4130
|
+
# (The actual presence depends on conversation structure)
|
|
4131
|
+
|
|
4132
|
+
def test_grammar_stripped_from_retry_when_incompatible(self):
|
|
4133
|
+
"""Option 3: Grammar is removed from retry when tools+grammar known incompatible."""
|
|
4134
|
+
old_compat = proxy.TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE
|
|
4135
|
+
try:
|
|
4136
|
+
proxy.TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = False
|
|
4137
|
+
openai_body = {
|
|
4138
|
+
"messages": [{"role": "user", "content": "test"}],
|
|
4139
|
+
"tools": [{"type": "function", "function": {"name": "Read", "parameters": {}}}],
|
|
4140
|
+
"grammar": "root ::= ...",
|
|
4141
|
+
"stream": True,
|
|
4142
|
+
"max_tokens": 8192,
|
|
4143
|
+
}
|
|
4144
|
+
anthropic_body = {
|
|
4145
|
+
"messages": [{"role": "user", "content": "test"}],
|
|
4146
|
+
"tools": [{"name": "Read", "input_schema": {"type": "object"}}],
|
|
4147
|
+
}
|
|
4148
|
+
retry_body = proxy._build_malformed_retry_body(openai_body, anthropic_body)
|
|
4149
|
+
self.assertNotIn("grammar", retry_body)
|
|
4150
|
+
self.assertTrue(len(retry_body.get("tools", [])) > 0)
|
|
4151
|
+
finally:
|
|
4152
|
+
proxy.TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = old_compat
|
|
4153
|
+
|
|
4154
|
+
def test_grammar_kept_when_tools_compatible(self):
|
|
4155
|
+
"""Option 3: Grammar preserved when tools+grammar is compatible."""
|
|
4156
|
+
old_compat = proxy.TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE
|
|
4157
|
+
old_flag = proxy.PROXY_TOOL_CALL_GRAMMAR
|
|
4158
|
+
old_gbnf = proxy.TOOL_CALL_GBNF
|
|
4159
|
+
try:
|
|
4160
|
+
proxy.TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = True
|
|
4161
|
+
proxy.PROXY_TOOL_CALL_GRAMMAR = True
|
|
4162
|
+
proxy.TOOL_CALL_GBNF = "root ::= test"
|
|
4163
|
+
openai_body = {
|
|
4164
|
+
"messages": [{"role": "user", "content": "test"}],
|
|
4165
|
+
"tools": [{"type": "function", "function": {"name": "Read", "parameters": {}}}],
|
|
4166
|
+
"grammar": "root ::= test",
|
|
4167
|
+
"stream": True,
|
|
4168
|
+
"max_tokens": 8192,
|
|
4169
|
+
}
|
|
4170
|
+
anthropic_body = {
|
|
4171
|
+
"messages": [{"role": "user", "content": "test"}],
|
|
4172
|
+
"tools": [{"name": "Read", "input_schema": {"type": "object"}}],
|
|
4173
|
+
}
|
|
4174
|
+
retry_body = proxy._build_malformed_retry_body(openai_body, anthropic_body)
|
|
4175
|
+
# When compatible, grammar should be present (applied by _apply_tool_call_grammar)
|
|
4176
|
+
self.assertIn("grammar", retry_body)
|
|
4177
|
+
finally:
|
|
4178
|
+
proxy.TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = old_compat
|
|
4179
|
+
proxy.PROXY_TOOL_CALL_GRAMMAR = old_flag
|
|
4180
|
+
proxy.TOOL_CALL_GBNF = old_gbnf
|