@miller-tech/uap 1.20.34 → 1.20.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -2140,8 +2140,12 @@ class TestToolTurnControls(unittest.TestCase):
|
|
|
2140
2140
|
}
|
|
2141
2141
|
|
|
2142
2142
|
openai = proxy.build_openai_request(body, monitor)
|
|
2143
|
-
|
|
2144
|
-
|
|
2143
|
+
# Finalize turn keeps tools available but switches tool_choice to
|
|
2144
|
+
# 'auto' so the model can complete with a tool call or summarise.
|
|
2145
|
+
# Earlier behaviour stripped tools entirely, which caused Anthropic
|
|
2146
|
+
# clients to see end_turn with no action and halt.
|
|
2147
|
+
self.assertIn("tools", openai)
|
|
2148
|
+
self.assertEqual(openai.get("tool_choice"), "auto")
|
|
2145
2149
|
self.assertEqual(monitor.tool_turn_phase, "finalize")
|
|
2146
2150
|
self.assertTrue(monitor.finalize_turn_active)
|
|
2147
2151
|
finally:
|
|
@@ -2229,7 +2233,7 @@ class TestToolTurnControls(unittest.TestCase):
|
|
|
2229
2233
|
finally:
|
|
2230
2234
|
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
|
|
2231
2235
|
|
|
2232
|
-
def
|
|
2236
|
+
def test_state_machine_finalize_keeps_tools_with_auto_choice(self):
|
|
2233
2237
|
old_state = getattr(proxy, "PROXY_TOOL_STATE_MACHINE")
|
|
2234
2238
|
old_min_msgs = getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES")
|
|
2235
2239
|
old_stagnation = getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD")
|
|
@@ -2293,8 +2297,10 @@ class TestToolTurnControls(unittest.TestCase):
|
|
|
2293
2297
|
}
|
|
2294
2298
|
|
|
2295
2299
|
openai = proxy.build_openai_request(body, monitor)
|
|
2296
|
-
|
|
2297
|
-
|
|
2300
|
+
# Finalize keeps tools + tool_choice=auto so the model can either
|
|
2301
|
+
# complete with a tool call or emit a plain-text summary.
|
|
2302
|
+
self.assertIn("tools", openai)
|
|
2303
|
+
self.assertEqual(openai.get("tool_choice"), "auto")
|
|
2298
2304
|
finally:
|
|
2299
2305
|
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
|
|
2300
2306
|
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
|
|
@@ -3512,28 +3518,78 @@ class TestDegenerateRepetitionDetection(unittest.TestCase):
|
|
|
3512
3518
|
setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
|
|
3513
3519
|
|
|
3514
3520
|
def test_max_tokens_floor_applied_when_thinking_active(self):
|
|
3515
|
-
"""
|
|
3521
|
+
"""Floor applies on non-preflight tool turns with thinking enabled."""
|
|
3516
3522
|
old_floor = getattr(proxy, "PROXY_MAX_TOKENS_FLOOR")
|
|
3517
3523
|
old_disable = getattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS")
|
|
3518
3524
|
try:
|
|
3519
3525
|
setattr(proxy, "PROXY_MAX_TOKENS_FLOOR", 4096)
|
|
3520
3526
|
setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", False)
|
|
3521
3527
|
|
|
3528
|
+
# max_tokens=1536 is above SMALL_PREFLIGHT_THRESHOLD (1024), so the
|
|
3529
|
+
# request does NOT take the preflight carveout and the regular
|
|
3530
|
+
# floor path applies. Small-preflight bypass is covered separately
|
|
3531
|
+
# in test_max_tokens_floor_bypassed_for_small_preflight.
|
|
3522
3532
|
body = {
|
|
3523
3533
|
"model": "test",
|
|
3524
|
-
"max_tokens":
|
|
3534
|
+
"max_tokens": 1536,
|
|
3525
3535
|
"messages": [{"role": "user", "content": "run command"}],
|
|
3526
3536
|
"tools": [{"name": "Bash", "description": "run", "input_schema": {"type": "object"}}],
|
|
3527
3537
|
}
|
|
3528
3538
|
openai = proxy.build_openai_request(
|
|
3529
3539
|
body, proxy.SessionMonitor(context_window=0)
|
|
3530
3540
|
)
|
|
3531
|
-
# Tools + thinking enabled = floor applied
|
|
3532
3541
|
self.assertEqual(openai.get("max_tokens"), 4096)
|
|
3533
3542
|
finally:
|
|
3534
3543
|
setattr(proxy, "PROXY_MAX_TOKENS_FLOOR", old_floor)
|
|
3535
3544
|
setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
|
|
3536
3545
|
|
|
3546
|
+
def test_max_tokens_floor_bypassed_for_small_preflight(self):
|
|
3547
|
+
"""Small preflight requests (max_tokens <= SMALL_PREFLIGHT_THRESHOLD)
|
|
3548
|
+
bypass the big floor and instead get THINKING_MIN_FOR_TOOLS=2048
|
|
3549
|
+
bump so Qwen's mandatory thinking has room before the tool call."""
|
|
3550
|
+
old_floor = getattr(proxy, "PROXY_MAX_TOKENS_FLOOR")
|
|
3551
|
+
old_disable = getattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS")
|
|
3552
|
+
try:
|
|
3553
|
+
setattr(proxy, "PROXY_MAX_TOKENS_FLOOR", 4096)
|
|
3554
|
+
setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", False)
|
|
3555
|
+
|
|
3556
|
+
body = {
|
|
3557
|
+
"model": "test",
|
|
3558
|
+
"max_tokens": 512,
|
|
3559
|
+
"messages": [{"role": "user", "content": "run command"}],
|
|
3560
|
+
"tools": [{"name": "Bash", "description": "run", "input_schema": {"type": "object"}}],
|
|
3561
|
+
}
|
|
3562
|
+
openai = proxy.build_openai_request(
|
|
3563
|
+
body, proxy.SessionMonitor(context_window=0)
|
|
3564
|
+
)
|
|
3565
|
+
self.assertEqual(openai.get("max_tokens"), 2048)
|
|
3566
|
+
finally:
|
|
3567
|
+
setattr(proxy, "PROXY_MAX_TOKENS_FLOOR", old_floor)
|
|
3568
|
+
setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
|
|
3569
|
+
|
|
3570
|
+
def test_max_tokens_true_preflight_left_alone(self):
|
|
3571
|
+
"""True preflight requests (max_tokens <= 16) are not inflated, even
|
|
3572
|
+
with tools present, so plan-generation latency stays low."""
|
|
3573
|
+
old_floor = getattr(proxy, "PROXY_MAX_TOKENS_FLOOR")
|
|
3574
|
+
old_disable = getattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS")
|
|
3575
|
+
try:
|
|
3576
|
+
setattr(proxy, "PROXY_MAX_TOKENS_FLOOR", 4096)
|
|
3577
|
+
setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", False)
|
|
3578
|
+
|
|
3579
|
+
body = {
|
|
3580
|
+
"model": "test",
|
|
3581
|
+
"max_tokens": 1,
|
|
3582
|
+
"messages": [{"role": "user", "content": "ping"}],
|
|
3583
|
+
"tools": [{"name": "Bash", "description": "run", "input_schema": {"type": "object"}}],
|
|
3584
|
+
}
|
|
3585
|
+
openai = proxy.build_openai_request(
|
|
3586
|
+
body, proxy.SessionMonitor(context_window=0)
|
|
3587
|
+
)
|
|
3588
|
+
self.assertEqual(openai.get("max_tokens"), 1)
|
|
3589
|
+
finally:
|
|
3590
|
+
setattr(proxy, "PROXY_MAX_TOKENS_FLOOR", old_floor)
|
|
3591
|
+
setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
|
|
3592
|
+
|
|
3537
3593
|
|
|
3538
3594
|
class TestGenerationHangRecovery(unittest.TestCase):
|
|
3539
3595
|
"""Tests for generation hang recovery: timeouts, slot hang detection."""
|