@miller-tech/uap 1.15.8 → 1.15.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -306,6 +306,44 @@ def _load_tool_call_grammar(path: str) -> str:
|
|
|
306
306
|
|
|
307
307
|
|
|
308
308
|
TOOL_CALL_GBNF = _load_tool_call_grammar(PROXY_TOOL_CALL_GRAMMAR_PATH)
|
|
309
|
+
TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = True
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _is_grammar_tools_incompatibility(status_code: int, error_text: str) -> bool:
|
|
313
|
+
if status_code != 400:
|
|
314
|
+
return False
|
|
315
|
+
lowered = (error_text or "").lower()
|
|
316
|
+
return "custom grammar constraints" in lowered and "with tools" in lowered
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def _maybe_disable_grammar_for_tools_error(
|
|
320
|
+
request_body: dict,
|
|
321
|
+
status_code: int,
|
|
322
|
+
error_text: str,
|
|
323
|
+
source: str,
|
|
324
|
+
) -> bool:
|
|
325
|
+
global TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE
|
|
326
|
+
|
|
327
|
+
if "grammar" not in request_body or not request_body.get("tools"):
|
|
328
|
+
return False
|
|
329
|
+
if not _is_grammar_tools_incompatibility(status_code, error_text):
|
|
330
|
+
return False
|
|
331
|
+
|
|
332
|
+
request_body.pop("grammar", None)
|
|
333
|
+
if TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE:
|
|
334
|
+
TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = False
|
|
335
|
+
logger.warning(
|
|
336
|
+
"Tool-call grammar rejected by upstream for tool turns; "
|
|
337
|
+
"disabling grammar-on-tools for this proxy process (%s)",
|
|
338
|
+
source,
|
|
339
|
+
)
|
|
340
|
+
else:
|
|
341
|
+
logger.warning(
|
|
342
|
+
"Tool-call grammar already disabled for tool turns; retrying %s without grammar",
|
|
343
|
+
source,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
return True
|
|
309
347
|
|
|
310
348
|
|
|
311
349
|
def _apply_tool_call_grammar(
|
|
@@ -319,6 +357,9 @@ def _apply_tool_call_grammar(
|
|
|
319
357
|
if not request_body.get("tools"):
|
|
320
358
|
return
|
|
321
359
|
|
|
360
|
+
if not TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE:
|
|
361
|
+
return
|
|
362
|
+
|
|
322
363
|
effective_tool_choice = (
|
|
323
364
|
tool_choice if tool_choice is not None else request_body.get("tool_choice")
|
|
324
365
|
)
|
|
@@ -938,7 +979,7 @@ async def lifespan(app: FastAPI):
|
|
|
938
979
|
_resolve_prune_target_fraction() * 100,
|
|
939
980
|
)
|
|
940
981
|
logger.info(
|
|
941
|
-
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s path=%s)",
|
|
982
|
+
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s tools_compatible=%s path=%s)",
|
|
942
983
|
PROXY_MALFORMED_TOOL_GUARDRAIL,
|
|
943
984
|
PROXY_MALFORMED_TOOL_STREAM_STRICT,
|
|
944
985
|
PROXY_FORCE_NON_STREAM,
|
|
@@ -961,6 +1002,7 @@ async def lifespan(app: FastAPI):
|
|
|
961
1002
|
PROXY_TOOL_CALL_GRAMMAR,
|
|
962
1003
|
PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
|
|
963
1004
|
bool(TOOL_CALL_GBNF),
|
|
1005
|
+
TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE,
|
|
964
1006
|
PROXY_TOOL_CALL_GRAMMAR_PATH,
|
|
965
1007
|
)
|
|
966
1008
|
|
|
@@ -3481,6 +3523,20 @@ async def messages(request: Request):
|
|
|
3481
3523
|
headers={"Content-Type": "application/json"},
|
|
3482
3524
|
)
|
|
3483
3525
|
|
|
3526
|
+
if strict_resp.status_code != 200:
|
|
3527
|
+
error_text = strict_resp.text[:1000]
|
|
3528
|
+
if _maybe_disable_grammar_for_tools_error(
|
|
3529
|
+
strict_body,
|
|
3530
|
+
strict_resp.status_code,
|
|
3531
|
+
error_text,
|
|
3532
|
+
"strict-stream",
|
|
3533
|
+
):
|
|
3534
|
+
strict_resp = await client.post(
|
|
3535
|
+
f"{LLAMA_CPP_BASE}/chat/completions",
|
|
3536
|
+
json=strict_body,
|
|
3537
|
+
headers={"Content-Type": "application/json"},
|
|
3538
|
+
)
|
|
3539
|
+
|
|
3484
3540
|
if strict_resp.status_code != 200:
|
|
3485
3541
|
error_text = strict_resp.text[:1000]
|
|
3486
3542
|
logger.error(
|
|
@@ -3621,6 +3677,35 @@ async def messages(request: Request):
|
|
|
3621
3677
|
error_body = await resp.aread()
|
|
3622
3678
|
await resp.aclose()
|
|
3623
3679
|
error_text = error_body.decode("utf-8", errors="replace")[:1000]
|
|
3680
|
+
if _maybe_disable_grammar_for_tools_error(
|
|
3681
|
+
openai_body,
|
|
3682
|
+
resp.status_code,
|
|
3683
|
+
error_text,
|
|
3684
|
+
"stream",
|
|
3685
|
+
):
|
|
3686
|
+
resp = await client.send(
|
|
3687
|
+
client.build_request(
|
|
3688
|
+
"POST",
|
|
3689
|
+
f"{LLAMA_CPP_BASE}/chat/completions",
|
|
3690
|
+
json=openai_body,
|
|
3691
|
+
headers={"Content-Type": "application/json"},
|
|
3692
|
+
),
|
|
3693
|
+
stream=True,
|
|
3694
|
+
)
|
|
3695
|
+
if resp.status_code == 200:
|
|
3696
|
+
return StreamingResponse(
|
|
3697
|
+
stream_anthropic_response(resp, model, monitor, body),
|
|
3698
|
+
media_type="text/event-stream",
|
|
3699
|
+
headers={
|
|
3700
|
+
"Cache-Control": "no-cache",
|
|
3701
|
+
"Connection": "keep-alive",
|
|
3702
|
+
},
|
|
3703
|
+
)
|
|
3704
|
+
|
|
3705
|
+
error_body = await resp.aread()
|
|
3706
|
+
await resp.aclose()
|
|
3707
|
+
error_text = error_body.decode("utf-8", errors="replace")[:1000]
|
|
3708
|
+
|
|
3624
3709
|
logger.error("Upstream HTTP %d: %s", resp.status_code, error_text)
|
|
3625
3710
|
|
|
3626
3711
|
# Parse the error for a user-friendly message
|
|
@@ -3708,6 +3793,20 @@ async def messages(request: Request):
|
|
|
3708
3793
|
headers={"Content-Type": "application/json"},
|
|
3709
3794
|
)
|
|
3710
3795
|
|
|
3796
|
+
if resp.status_code != 200:
|
|
3797
|
+
error_text = resp.text[:1000]
|
|
3798
|
+
if _maybe_disable_grammar_for_tools_error(
|
|
3799
|
+
openai_body,
|
|
3800
|
+
resp.status_code,
|
|
3801
|
+
error_text,
|
|
3802
|
+
"non-stream",
|
|
3803
|
+
):
|
|
3804
|
+
resp = await client.post(
|
|
3805
|
+
f"{LLAMA_CPP_BASE}/chat/completions",
|
|
3806
|
+
json=openai_body,
|
|
3807
|
+
headers={"Content-Type": "application/json"},
|
|
3808
|
+
)
|
|
3809
|
+
|
|
3711
3810
|
# Option B: Handle non-streaming errors too
|
|
3712
3811
|
if resp.status_code != 200:
|
|
3713
3812
|
error_text = resp.text[:1000]
|
|
@@ -3851,6 +3950,7 @@ async def context_status(request: Request):
|
|
|
3851
3950
|
"required_only": PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
|
|
3852
3951
|
"path": PROXY_TOOL_CALL_GRAMMAR_PATH,
|
|
3853
3952
|
"loaded": bool(TOOL_CALL_GBNF),
|
|
3953
|
+
"tools_compatible": TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE,
|
|
3854
3954
|
},
|
|
3855
3955
|
# Loop protection stats
|
|
3856
3956
|
"loop_protection": {
|
|
@@ -518,10 +518,12 @@ class TestMalformedToolGuardrail(unittest.TestCase):
|
|
|
518
518
|
old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
|
|
519
519
|
old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
|
|
520
520
|
old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
|
|
521
|
+
old_tools_compatible = getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE")
|
|
521
522
|
try:
|
|
522
523
|
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
|
|
523
524
|
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
|
|
524
525
|
setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
|
|
526
|
+
setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", True)
|
|
525
527
|
|
|
526
528
|
openai_body = {
|
|
527
529
|
"model": "test",
|
|
@@ -548,6 +550,56 @@ class TestMalformedToolGuardrail(unittest.TestCase):
|
|
|
548
550
|
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
|
|
549
551
|
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
|
|
550
552
|
setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
|
|
553
|
+
setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", old_tools_compatible)
|
|
554
|
+
|
|
555
|
+
def test_apply_tool_call_grammar_skips_when_upstream_tools_are_incompatible(self):
|
|
556
|
+
old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
|
|
557
|
+
old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
|
|
558
|
+
old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
|
|
559
|
+
old_tools_compatible = getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE")
|
|
560
|
+
try:
|
|
561
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
|
|
562
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
|
|
563
|
+
setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
|
|
564
|
+
setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", False)
|
|
565
|
+
|
|
566
|
+
request = {
|
|
567
|
+
"tools": [{"type": "function", "function": {"name": "Read"}}],
|
|
568
|
+
"tool_choice": "required",
|
|
569
|
+
}
|
|
570
|
+
proxy._apply_tool_call_grammar(request)
|
|
571
|
+
|
|
572
|
+
self.assertNotIn("grammar", request)
|
|
573
|
+
finally:
|
|
574
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
|
|
575
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
|
|
576
|
+
setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
|
|
577
|
+
setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", old_tools_compatible)
|
|
578
|
+
|
|
579
|
+
def test_maybe_disable_grammar_for_tools_error_strips_grammar_and_disables_flag(
|
|
580
|
+
self,
|
|
581
|
+
):
|
|
582
|
+
old_tools_compatible = getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE")
|
|
583
|
+
try:
|
|
584
|
+
setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", True)
|
|
585
|
+
|
|
586
|
+
request = {
|
|
587
|
+
"tools": [{"type": "function", "function": {"name": "Read"}}],
|
|
588
|
+
"grammar": 'root ::= "<tool_call>"',
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
retried = proxy._maybe_disable_grammar_for_tools_error(
|
|
592
|
+
request,
|
|
593
|
+
400,
|
|
594
|
+
'{"error":{"message":"Cannot use custom grammar constraints with tools."}}',
|
|
595
|
+
"unit-test",
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
self.assertTrue(retried)
|
|
599
|
+
self.assertNotIn("grammar", request)
|
|
600
|
+
self.assertFalse(getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE"))
|
|
601
|
+
finally:
|
|
602
|
+
setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", old_tools_compatible)
|
|
551
603
|
|
|
552
604
|
def test_clean_guardrail_response_does_not_promise_future_tool_call(self):
|
|
553
605
|
guardrail = proxy._build_clean_guardrail_openai_response(
|