@miller-tech/uap 1.20.27 → 1.20.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -656,6 +656,8 @@ class SessionMonitor:
|
|
|
656
656
|
tool_state_review_cycles: int = 0
|
|
657
657
|
last_tool_fingerprint: str = ""
|
|
658
658
|
cycling_tool_names: list = field(default_factory=list)
|
|
659
|
+
session_banned_tools: set = field(default_factory=set) # tools banned for entire session after repeated cycling
|
|
660
|
+
tool_cycle_counts: dict = field(default_factory=dict) # {tool_name: cycle_count} across resets
|
|
659
661
|
last_response_garbled: bool = False # previous turn had garbled/malformed output
|
|
660
662
|
finalize_turn_active: bool = False
|
|
661
663
|
finalize_continuation_count: int = 0
|
|
@@ -1346,6 +1348,43 @@ def prune_conversation(
|
|
|
1346
1348
|
http_client: httpx.AsyncClient | None = None
|
|
1347
1349
|
|
|
1348
1350
|
|
|
1351
|
+
def _is_loading_model_503(resp: httpx.Response) -> bool:
|
|
1352
|
+
"""Check if response is a 503 'Loading model' from llama.cpp."""
|
|
1353
|
+
if resp.status_code != 503:
|
|
1354
|
+
return False
|
|
1355
|
+
try:
|
|
1356
|
+
return "loading model" in resp.text.lower()
|
|
1357
|
+
except Exception:
|
|
1358
|
+
return False
|
|
1359
|
+
|
|
1360
|
+
|
|
1361
|
+
async def _wait_for_upstream_health(
|
|
1362
|
+
client: httpx.AsyncClient,
|
|
1363
|
+
max_wait: float = 60.0,
|
|
1364
|
+
poll_interval: float = 5.0,
|
|
1365
|
+
) -> bool:
|
|
1366
|
+
"""Poll upstream /health until ready or timeout. Returns True if healthy."""
|
|
1367
|
+
health_url = LLAMA_CPP_BASE.replace("/v1", "/health")
|
|
1368
|
+
elapsed = 0.0
|
|
1369
|
+
while elapsed < max_wait:
|
|
1370
|
+
try:
|
|
1371
|
+
resp = await client.get(health_url, timeout=5.0)
|
|
1372
|
+
if resp.status_code == 200:
|
|
1373
|
+
data = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
|
|
1374
|
+
if data.get("status") == "ok" or resp.status_code == 200:
|
|
1375
|
+
if elapsed > 0:
|
|
1376
|
+
logger.info(
|
|
1377
|
+
"UPSTREAM HEALTH: recovered after %.0fs wait", elapsed
|
|
1378
|
+
)
|
|
1379
|
+
return True
|
|
1380
|
+
except Exception:
|
|
1381
|
+
pass
|
|
1382
|
+
await asyncio.sleep(poll_interval)
|
|
1383
|
+
elapsed += poll_interval
|
|
1384
|
+
logger.error("UPSTREAM HEALTH: not ready after %.0fs", max_wait)
|
|
1385
|
+
return False
|
|
1386
|
+
|
|
1387
|
+
|
|
1349
1388
|
async def _post_with_retry(
|
|
1350
1389
|
client: httpx.AsyncClient,
|
|
1351
1390
|
url: str,
|
|
@@ -1355,7 +1394,19 @@ async def _post_with_retry(
|
|
|
1355
1394
|
last_exc: Exception | None = None
|
|
1356
1395
|
for attempt in range(PROXY_UPSTREAM_RETRY_MAX):
|
|
1357
1396
|
try:
|
|
1358
|
-
|
|
1397
|
+
resp = await client.post(url, json=payload, headers=headers)
|
|
1398
|
+
# Cycle 19 Option 1: if 503 "Loading model", wait for health then retry
|
|
1399
|
+
if _is_loading_model_503(resp):
|
|
1400
|
+
logger.warning(
|
|
1401
|
+
"Upstream 503 Loading model (attempt %d/%d) – waiting for health",
|
|
1402
|
+
attempt + 1,
|
|
1403
|
+
PROXY_UPSTREAM_RETRY_MAX,
|
|
1404
|
+
)
|
|
1405
|
+
healthy = await _wait_for_upstream_health(client, max_wait=60.0)
|
|
1406
|
+
if healthy and attempt < PROXY_UPSTREAM_RETRY_MAX - 1:
|
|
1407
|
+
continue # retry the request now that upstream is healthy
|
|
1408
|
+
return resp # return the 503 if health wait timed out
|
|
1409
|
+
return resp
|
|
1359
1410
|
except (httpx.ConnectError, httpx.RemoteProtocolError, httpx.ReadTimeout) as exc:
|
|
1360
1411
|
last_exc = exc
|
|
1361
1412
|
if attempt < PROXY_UPSTREAM_RETRY_MAX - 1:
|
|
@@ -2240,6 +2291,16 @@ def _resolve_state_machine_tool_choice(
|
|
|
2240
2291
|
for part in fp.split("|"):
|
|
2241
2292
|
raw_names.append(part.split(":")[0])
|
|
2242
2293
|
monitor.cycling_tool_names = list(dict.fromkeys(raw_names))
|
|
2294
|
+
# Cycle 18 Option 2: track per-tool cycle counts and ban after 3 cycles
|
|
2295
|
+
for name in monitor.cycling_tool_names:
|
|
2296
|
+
monitor.tool_cycle_counts[name] = monitor.tool_cycle_counts.get(name, 0) + 1
|
|
2297
|
+
if monitor.tool_cycle_counts[name] >= 3 and name not in monitor.session_banned_tools:
|
|
2298
|
+
monitor.session_banned_tools.add(name)
|
|
2299
|
+
logger.warning(
|
|
2300
|
+
"TOOL BAN: '%s' banned for session after %d cycle detections",
|
|
2301
|
+
name,
|
|
2302
|
+
monitor.tool_cycle_counts[name],
|
|
2303
|
+
)
|
|
2243
2304
|
logger.warning(
|
|
2244
2305
|
"TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d cycling_tools=%s)",
|
|
2245
2306
|
cycle_looping,
|
|
@@ -2629,14 +2690,15 @@ def build_openai_request(
|
|
|
2629
2690
|
cycling_names,
|
|
2630
2691
|
cycles,
|
|
2631
2692
|
)
|
|
2632
|
-
# Narrow tools to exclude cycling tools
|
|
2693
|
+
# Narrow tools to exclude cycling tools + session-banned tools
|
|
2633
2694
|
# Option 1 (Cycle 13): if any cycling tool is read-only, exclude entire class
|
|
2634
2695
|
# Option 1 (Cycle 14): persist exclusion during act phase too, not just review
|
|
2696
|
+
# Option 2 (Cycle 18): always exclude session-banned tools
|
|
2635
2697
|
if (
|
|
2636
|
-
monitor.cycling_tool_names
|
|
2698
|
+
(monitor.cycling_tool_names or monitor.session_banned_tools)
|
|
2637
2699
|
and "tools" in openai_body
|
|
2638
2700
|
):
|
|
2639
|
-
exclude_set = set(monitor.cycling_tool_names)
|
|
2701
|
+
exclude_set = set(monitor.cycling_tool_names) | monitor.session_banned_tools
|
|
2640
2702
|
# Expand to full read-only class if any cycling tool is read-only
|
|
2641
2703
|
if any(n.lower() in {c.lower() for c in _READ_ONLY_TOOL_CLASS} for n in exclude_set):
|
|
2642
2704
|
exclude_set |= _READ_ONLY_TOOL_CLASS
|
|
@@ -2648,13 +2710,15 @@ def build_openai_request(
|
|
|
2648
2710
|
]
|
|
2649
2711
|
if narrowed:
|
|
2650
2712
|
openai_body["tools"] = narrowed
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
|
|
2655
|
-
|
|
2656
|
-
|
|
2657
|
-
|
|
2713
|
+
# Only log on first activation or phase transitions to reduce noise
|
|
2714
|
+
if state_reason in {"cycle_detected", "stagnation"}:
|
|
2715
|
+
logger.warning(
|
|
2716
|
+
"CYCLE BREAK: narrowed tools from %d to %d (excluded %s, read_only_class=%s)",
|
|
2717
|
+
original_count,
|
|
2718
|
+
len(narrowed),
|
|
2719
|
+
monitor.cycling_tool_names,
|
|
2720
|
+
any(n.lower() in {c.lower() for c in _READ_ONLY_TOOL_CLASS} for n in monitor.cycling_tool_names),
|
|
2721
|
+
)
|
|
2658
2722
|
else:
|
|
2659
2723
|
logger.warning(
|
|
2660
2724
|
"CYCLE BREAK: cannot narrow tools — all tools are cycling, keeping original set",
|
|
@@ -3092,6 +3156,47 @@ _TOOL_CALL_XML_RE = re.compile(
|
|
|
3092
3156
|
)
|
|
3093
3157
|
|
|
3094
3158
|
|
|
3159
|
+
def _repair_tool_call_json(raw: str) -> str | None:
|
|
3160
|
+
"""Attempt to repair common garbled JSON in tool call payloads.
|
|
3161
|
+
|
|
3162
|
+
Returns repaired JSON string, or None if repair is not possible.
|
|
3163
|
+
Handles: trailing braces, unbalanced brackets, truncated strings.
|
|
3164
|
+
"""
|
|
3165
|
+
s = raw.strip()
|
|
3166
|
+
if not s.startswith("{"):
|
|
3167
|
+
return None
|
|
3168
|
+
# Strip trailing garbage (runaway braces/brackets)
|
|
3169
|
+
while s.endswith("}}") and s.count("{") < s.count("}"):
|
|
3170
|
+
s = s[:-1]
|
|
3171
|
+
while s.endswith("]]") and s.count("[") < s.count("]"):
|
|
3172
|
+
s = s[:-1]
|
|
3173
|
+
# Balance braces
|
|
3174
|
+
open_b = s.count("{") - s.count("}")
|
|
3175
|
+
if open_b > 0:
|
|
3176
|
+
s += "}" * open_b
|
|
3177
|
+
elif open_b < 0:
|
|
3178
|
+
# Too many closing braces — trim from end
|
|
3179
|
+
for _ in range(-open_b):
|
|
3180
|
+
idx = s.rfind("}")
|
|
3181
|
+
if idx > 0:
|
|
3182
|
+
s = s[:idx] + s[idx + 1:]
|
|
3183
|
+
# Try to parse
|
|
3184
|
+
try:
|
|
3185
|
+
json.loads(s)
|
|
3186
|
+
return s
|
|
3187
|
+
except json.JSONDecodeError:
|
|
3188
|
+
pass
|
|
3189
|
+
# Try truncating at last valid comma + closing
|
|
3190
|
+
for end in range(len(s) - 1, max(0, len(s) - 200), -1):
|
|
3191
|
+
candidate = s[:end].rstrip().rstrip(",") + "}" * max(0, s[:end].count("{") - s[:end].count("}"))
|
|
3192
|
+
try:
|
|
3193
|
+
json.loads(candidate)
|
|
3194
|
+
return candidate
|
|
3195
|
+
except json.JSONDecodeError:
|
|
3196
|
+
continue
|
|
3197
|
+
return None
|
|
3198
|
+
|
|
3199
|
+
|
|
3095
3200
|
def _extract_tool_calls_from_text(text: str) -> tuple[list[dict], str]:
|
|
3096
3201
|
"""Parse ``<tool_call>{...}</tool_call>`` blocks out of *text*.
|
|
3097
3202
|
|
|
@@ -3112,7 +3217,18 @@ def _extract_tool_calls_from_text(text: str) -> tuple[list[dict], str]:
|
|
|
3112
3217
|
try:
|
|
3113
3218
|
payload = json.loads(raw_json)
|
|
3114
3219
|
except json.JSONDecodeError:
|
|
3115
|
-
|
|
3220
|
+
# Cycle 15 Option 1: attempt JSON repair before giving up
|
|
3221
|
+
repaired = _repair_tool_call_json(raw_json)
|
|
3222
|
+
if repaired:
|
|
3223
|
+
try:
|
|
3224
|
+
payload = json.loads(repaired)
|
|
3225
|
+
logger.info(
|
|
3226
|
+
"TOOL CALL EXTRACTION: repaired garbled JSON in <tool_call> block"
|
|
3227
|
+
)
|
|
3228
|
+
except json.JSONDecodeError:
|
|
3229
|
+
continue
|
|
3230
|
+
else:
|
|
3231
|
+
continue
|
|
3116
3232
|
if not isinstance(payload, dict):
|
|
3117
3233
|
continue
|
|
3118
3234
|
|
|
@@ -4380,9 +4496,11 @@ def _build_malformed_retry_body(
|
|
|
4380
4496
|
retry_body = dict(openai_body)
|
|
4381
4497
|
retry_body["stream"] = False
|
|
4382
4498
|
retry_body["tool_choice"] = tool_choice
|
|
4383
|
-
#
|
|
4499
|
+
# Cycle 15 Option 3: vary temperature across retries to break degenerate patterns.
|
|
4500
|
+
# Attempt 1: use configured retry temp (default 0.0) for deterministic first try.
|
|
4501
|
+
# Attempt 2+: increase to 0.5 to escape the degenerate local minimum.
|
|
4384
4502
|
if total_attempts > 1 and attempt > 1:
|
|
4385
|
-
retry_body["temperature"] = 0.
|
|
4503
|
+
retry_body["temperature"] = 0.5
|
|
4386
4504
|
else:
|
|
4387
4505
|
retry_body["temperature"] = PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE
|
|
4388
4506
|
|
|
@@ -5922,6 +6040,27 @@ async def messages(request: Request):
|
|
|
5922
6040
|
|
|
5923
6041
|
if strict_resp.status_code != 200:
|
|
5924
6042
|
error_text = strict_resp.text[:1000]
|
|
6043
|
+
# Cycle 19 Option 2: For 503 "Loading model", don't advance state
|
|
6044
|
+
# machine — return retriable 503 with Retry-After header so the
|
|
6045
|
+
# client can retry without wasting state machine budget.
|
|
6046
|
+
if _is_loading_model_503(strict_resp):
|
|
6047
|
+
logger.warning(
|
|
6048
|
+
"Upstream 503 Loading model (strict-stream) — returning retriable 503 without advancing state",
|
|
6049
|
+
)
|
|
6050
|
+
return Response(
|
|
6051
|
+
content=json.dumps(
|
|
6052
|
+
{
|
|
6053
|
+
"type": "error",
|
|
6054
|
+
"error": {
|
|
6055
|
+
"type": "overloaded_error",
|
|
6056
|
+
"message": "Upstream model is loading. Retry in 10 seconds.",
|
|
6057
|
+
},
|
|
6058
|
+
}
|
|
6059
|
+
),
|
|
6060
|
+
status_code=503,
|
|
6061
|
+
headers={"Retry-After": "10"},
|
|
6062
|
+
media_type="application/json",
|
|
6063
|
+
)
|
|
5925
6064
|
logger.error(
|
|
5926
6065
|
"Upstream HTTP %d (strict-stream): %s",
|
|
5927
6066
|
strict_resp.status_code,
|
|
@@ -4662,3 +4662,159 @@ class TestMalformedPayloadLoopFix(unittest.TestCase):
|
|
|
4662
4662
|
}
|
|
4663
4663
|
openai = proxy.build_openai_request(body, monitor)
|
|
4664
4664
|
self.assertAlmostEqual(openai.get("temperature", 1.0), 0.3, places=1)
|
|
4665
|
+
|
|
4666
|
+
|
|
4667
|
+
class TestToolCallJsonRepair(unittest.TestCase):
|
|
4668
|
+
"""Tests for Cycle 15 Option 1: JSON repair in tool call extraction."""
|
|
4669
|
+
|
|
4670
|
+
def test_repairs_trailing_braces(self):
|
|
4671
|
+
"""Runaway closing braces are trimmed and JSON parsed."""
|
|
4672
|
+
garbled = '{"name":"bash","arguments":{"command":"ls"}}}}'
|
|
4673
|
+
repaired = proxy._repair_tool_call_json(garbled)
|
|
4674
|
+
self.assertIsNotNone(repaired)
|
|
4675
|
+
parsed = json.loads(repaired)
|
|
4676
|
+
self.assertEqual(parsed["name"], "bash")
|
|
4677
|
+
|
|
4678
|
+
def test_repairs_unbalanced_open_braces(self):
|
|
4679
|
+
"""Missing closing braces are added."""
|
|
4680
|
+
garbled = '{"name":"read","arguments":{"file_path":"/foo"}'
|
|
4681
|
+
repaired = proxy._repair_tool_call_json(garbled)
|
|
4682
|
+
self.assertIsNotNone(repaired)
|
|
4683
|
+
parsed = json.loads(repaired)
|
|
4684
|
+
self.assertEqual(parsed["name"], "read")
|
|
4685
|
+
|
|
4686
|
+
def test_returns_none_for_total_garbage(self):
|
|
4687
|
+
"""Completely invalid JSON returns None."""
|
|
4688
|
+
result = proxy._repair_tool_call_json("not json at all")
|
|
4689
|
+
self.assertIsNone(result)
|
|
4690
|
+
|
|
4691
|
+
def test_extracts_repaired_tool_call_from_text(self):
|
|
4692
|
+
"""End-to-end: garbled <tool_call> XML is extracted after repair."""
|
|
4693
|
+
text = '<tool_call>\n{"name":"bash","arguments":{"command":"pwd"}}}\n</tool_call>'
|
|
4694
|
+
extracted, remaining = proxy._extract_tool_calls_from_text(text)
|
|
4695
|
+
self.assertEqual(len(extracted), 1)
|
|
4696
|
+
self.assertEqual(extracted[0]["function"]["name"], "bash")
|
|
4697
|
+
|
|
4698
|
+
|
|
4699
|
+
class TestRetryTemperatureVariance(unittest.TestCase):
|
|
4700
|
+
"""Tests for Cycle 15 Option 3: retry temperature variance."""
|
|
4701
|
+
|
|
4702
|
+
def test_retry_attempt_1_uses_configured_temp(self):
|
|
4703
|
+
"""First retry attempt uses PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE."""
|
|
4704
|
+
body = proxy._build_malformed_retry_body(
|
|
4705
|
+
{"messages": [{"role": "user", "content": "test"}], "tools": []},
|
|
4706
|
+
{"messages": [{"role": "user", "content": "test"}], "tools": []},
|
|
4707
|
+
retry_hint="fix it",
|
|
4708
|
+
tool_choice="required",
|
|
4709
|
+
attempt=1,
|
|
4710
|
+
total_attempts=3,
|
|
4711
|
+
is_garbled=False,
|
|
4712
|
+
)
|
|
4713
|
+
self.assertEqual(body["temperature"], proxy.PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE)
|
|
4714
|
+
|
|
4715
|
+
def test_retry_attempt_2_uses_higher_temp(self):
|
|
4716
|
+
"""Second retry attempt uses temp=0.5 to break degenerate patterns."""
|
|
4717
|
+
body = proxy._build_malformed_retry_body(
|
|
4718
|
+
{"messages": [{"role": "user", "content": "test"}], "tools": []},
|
|
4719
|
+
{"messages": [{"role": "user", "content": "test"}], "tools": []},
|
|
4720
|
+
retry_hint="fix it",
|
|
4721
|
+
tool_choice="required",
|
|
4722
|
+
attempt=2,
|
|
4723
|
+
total_attempts=3,
|
|
4724
|
+
is_garbled=False,
|
|
4725
|
+
)
|
|
4726
|
+
self.assertEqual(body["temperature"], 0.5)
|
|
4727
|
+
|
|
4728
|
+
|
|
4729
|
+
class TestCycle18SessionBanAndLogNoise(unittest.TestCase):
|
|
4730
|
+
"""Tests for Cycle 18: session tool banning and log noise reduction."""
|
|
4731
|
+
|
|
4732
|
+
def test_tool_banned_after_3_cycle_detections(self):
|
|
4733
|
+
"""Option 2: tool gets session-banned after cycling 3 times."""
|
|
4734
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
4735
|
+
# Simulate 3 separate cycle detections for 'task'
|
|
4736
|
+
monitor.tool_cycle_counts["task"] = 2
|
|
4737
|
+
monitor.cycling_tool_names = ["task"]
|
|
4738
|
+
|
|
4739
|
+
# This is what happens inside the cycle detection — manually trigger
|
|
4740
|
+
for name in monitor.cycling_tool_names:
|
|
4741
|
+
monitor.tool_cycle_counts[name] = monitor.tool_cycle_counts.get(name, 0) + 1
|
|
4742
|
+
if monitor.tool_cycle_counts[name] >= 3:
|
|
4743
|
+
monitor.session_banned_tools.add(name)
|
|
4744
|
+
|
|
4745
|
+
self.assertIn("task", monitor.session_banned_tools)
|
|
4746
|
+
self.assertEqual(monitor.tool_cycle_counts["task"], 3)
|
|
4747
|
+
|
|
4748
|
+
def test_session_ban_survives_state_reset(self):
|
|
4749
|
+
"""Option 2: session_banned_tools persists through reset_tool_turn_state."""
|
|
4750
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
4751
|
+
monitor.session_banned_tools.add("task")
|
|
4752
|
+
monitor.tool_cycle_counts["task"] = 3
|
|
4753
|
+
|
|
4754
|
+
monitor.reset_tool_turn_state(reason="test")
|
|
4755
|
+
|
|
4756
|
+
# Session bans survive resets — they're session-level, not phase-level
|
|
4757
|
+
self.assertIn("task", monitor.session_banned_tools)
|
|
4758
|
+
self.assertEqual(monitor.tool_cycle_counts["task"], 3)
|
|
4759
|
+
|
|
4760
|
+
def test_banned_tools_excluded_even_without_cycling(self):
|
|
4761
|
+
"""Option 2: session-banned tools are excluded even when cycling_tool_names is empty."""
|
|
4762
|
+
old_vals = {}
|
|
4763
|
+
for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
|
|
4764
|
+
"PROXY_TOOL_STATE_FORCED_BUDGET"]:
|
|
4765
|
+
old_vals[k] = getattr(proxy, k)
|
|
4766
|
+
try:
|
|
4767
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
|
|
4768
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
|
|
4769
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 6)
|
|
4770
|
+
|
|
4771
|
+
body = {
|
|
4772
|
+
"model": "test",
|
|
4773
|
+
"messages": [
|
|
4774
|
+
{"role": "user", "content": "do"},
|
|
4775
|
+
{"role": "assistant", "content": [
|
|
4776
|
+
{"type": "tool_use", "id": "t1", "name": "bash", "input": {"command": "ls"}}
|
|
4777
|
+
]},
|
|
4778
|
+
{"role": "user", "content": [
|
|
4779
|
+
{"type": "tool_result", "tool_use_id": "t1", "content": "ok"}
|
|
4780
|
+
]},
|
|
4781
|
+
],
|
|
4782
|
+
"tools": [
|
|
4783
|
+
{"name": "task", "description": "Task", "input_schema": {"type": "object"}},
|
|
4784
|
+
{"name": "bash", "description": "Bash", "input_schema": {"type": "object"}},
|
|
4785
|
+
{"name": "read", "description": "Read", "input_schema": {"type": "object"}},
|
|
4786
|
+
],
|
|
4787
|
+
}
|
|
4788
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
4789
|
+
monitor.session_banned_tools.add("task")
|
|
4790
|
+
monitor.cycling_tool_names = [] # no active cycling
|
|
4791
|
+
|
|
4792
|
+
openai = proxy.build_openai_request(body, monitor)
|
|
4793
|
+
remaining = [t["function"]["name"] for t in openai.get("tools", [])]
|
|
4794
|
+
self.assertNotIn("task", remaining)
|
|
4795
|
+
self.assertIn("bash", remaining)
|
|
4796
|
+
finally:
|
|
4797
|
+
for k, v in old_vals.items():
|
|
4798
|
+
setattr(proxy, k, v)
|
|
4799
|
+
|
|
4800
|
+
|
|
4801
|
+
class TestUpstream503Resilience(unittest.TestCase):
|
|
4802
|
+
"""Tests for Cycle 19: upstream 503 Loading model resilience."""
|
|
4803
|
+
|
|
4804
|
+
def test_is_loading_model_503_detects_loading(self):
|
|
4805
|
+
"""Detects 503 Loading model response."""
|
|
4806
|
+
resp = httpx.Response(
|
|
4807
|
+
503,
|
|
4808
|
+
text='{"error":{"message":"Loading model","type":"unavailable_error","code":503}}',
|
|
4809
|
+
)
|
|
4810
|
+
self.assertTrue(proxy._is_loading_model_503(resp))
|
|
4811
|
+
|
|
4812
|
+
def test_is_loading_model_503_ignores_other_503(self):
|
|
4813
|
+
"""Does not match 503 with different message."""
|
|
4814
|
+
resp = httpx.Response(503, text='{"error":{"message":"Server busy"}}')
|
|
4815
|
+
self.assertFalse(proxy._is_loading_model_503(resp))
|
|
4816
|
+
|
|
4817
|
+
def test_is_loading_model_503_ignores_200(self):
|
|
4818
|
+
"""Does not match 200 even with loading text."""
|
|
4819
|
+
resp = httpx.Response(200, text='{"status":"loading model"}')
|
|
4820
|
+
self.assertFalse(proxy._is_loading_model_503(resp))
|