@miller-tech/uap 1.15.8 → 1.15.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -306,6 +306,44 @@ def _load_tool_call_grammar(path: str) -> str:
|
|
|
306
306
|
|
|
307
307
|
|
|
308
308
|
TOOL_CALL_GBNF = _load_tool_call_grammar(PROXY_TOOL_CALL_GRAMMAR_PATH)
|
|
309
|
+
TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = True
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _is_grammar_tools_incompatibility(status_code: int, error_text: str) -> bool:
|
|
313
|
+
if status_code != 400:
|
|
314
|
+
return False
|
|
315
|
+
lowered = (error_text or "").lower()
|
|
316
|
+
return "custom grammar constraints" in lowered and "with tools" in lowered
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def _maybe_disable_grammar_for_tools_error(
|
|
320
|
+
request_body: dict,
|
|
321
|
+
status_code: int,
|
|
322
|
+
error_text: str,
|
|
323
|
+
source: str,
|
|
324
|
+
) -> bool:
|
|
325
|
+
global TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE
|
|
326
|
+
|
|
327
|
+
if "grammar" not in request_body or not request_body.get("tools"):
|
|
328
|
+
return False
|
|
329
|
+
if not _is_grammar_tools_incompatibility(status_code, error_text):
|
|
330
|
+
return False
|
|
331
|
+
|
|
332
|
+
request_body.pop("grammar", None)
|
|
333
|
+
if TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE:
|
|
334
|
+
TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = False
|
|
335
|
+
logger.warning(
|
|
336
|
+
"Tool-call grammar rejected by upstream for tool turns; "
|
|
337
|
+
"disabling grammar-on-tools for this proxy process (%s)",
|
|
338
|
+
source,
|
|
339
|
+
)
|
|
340
|
+
else:
|
|
341
|
+
logger.warning(
|
|
342
|
+
"Tool-call grammar already disabled for tool turns; retrying %s without grammar",
|
|
343
|
+
source,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
return True
|
|
309
347
|
|
|
310
348
|
|
|
311
349
|
def _apply_tool_call_grammar(
|
|
@@ -319,6 +357,9 @@ def _apply_tool_call_grammar(
|
|
|
319
357
|
if not request_body.get("tools"):
|
|
320
358
|
return
|
|
321
359
|
|
|
360
|
+
if not TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE:
|
|
361
|
+
return
|
|
362
|
+
|
|
322
363
|
effective_tool_choice = (
|
|
323
364
|
tool_choice if tool_choice is not None else request_body.get("tool_choice")
|
|
324
365
|
)
|
|
@@ -938,7 +979,7 @@ async def lifespan(app: FastAPI):
|
|
|
938
979
|
_resolve_prune_target_fraction() * 100,
|
|
939
980
|
)
|
|
940
981
|
logger.info(
|
|
941
|
-
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s path=%s)",
|
|
982
|
+
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s tools_compatible=%s path=%s)",
|
|
942
983
|
PROXY_MALFORMED_TOOL_GUARDRAIL,
|
|
943
984
|
PROXY_MALFORMED_TOOL_STREAM_STRICT,
|
|
944
985
|
PROXY_FORCE_NON_STREAM,
|
|
@@ -961,6 +1002,7 @@ async def lifespan(app: FastAPI):
|
|
|
961
1002
|
PROXY_TOOL_CALL_GRAMMAR,
|
|
962
1003
|
PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
|
|
963
1004
|
bool(TOOL_CALL_GBNF),
|
|
1005
|
+
TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE,
|
|
964
1006
|
PROXY_TOOL_CALL_GRAMMAR_PATH,
|
|
965
1007
|
)
|
|
966
1008
|
|
|
@@ -2437,16 +2479,14 @@ def _classify_tool_response_issue(
|
|
|
2437
2479
|
has_tool_calls = _openai_has_tool_calls(openai_resp)
|
|
2438
2480
|
if not has_tool_calls:
|
|
2439
2481
|
if required_tool_choice:
|
|
2440
|
-
|
|
2441
|
-
|
|
2442
|
-
|
|
2443
|
-
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
|
|
2448
|
-
),
|
|
2449
|
-
)
|
|
2482
|
+
return ToolResponseIssue(
|
|
2483
|
+
kind="required_tool_miss",
|
|
2484
|
+
reason="required tool turn returned no tool calls",
|
|
2485
|
+
retry_hint=(
|
|
2486
|
+
"A tool call is mandatory for this turn. Emit exactly one valid tool call now "
|
|
2487
|
+
"with a strict JSON object in `arguments`."
|
|
2488
|
+
),
|
|
2489
|
+
)
|
|
2450
2490
|
return ToolResponseIssue()
|
|
2451
2491
|
|
|
2452
2492
|
if not PROXY_TOOL_ARGS_PREFLIGHT:
|
|
@@ -2524,6 +2564,49 @@ def _looks_malformed_tool_payload(text: str) -> bool:
|
|
|
2524
2564
|
return True
|
|
2525
2565
|
if lowered.count("</parameter") >= 1 and lowered.count('{"description"') >= 1:
|
|
2526
2566
|
return True
|
|
2567
|
+
if _looks_repetitive_policy_echo(text):
|
|
2568
|
+
return True
|
|
2569
|
+
return False
|
|
2570
|
+
|
|
2571
|
+
|
|
2572
|
+
def _looks_repetitive_policy_echo(text: str) -> bool:
|
|
2573
|
+
if not text:
|
|
2574
|
+
return False
|
|
2575
|
+
|
|
2576
|
+
lowered = text.lower()
|
|
2577
|
+
compact = re.sub(r"\s+", " ", lowered).strip()
|
|
2578
|
+
if not compact:
|
|
2579
|
+
return False
|
|
2580
|
+
|
|
2581
|
+
policy_phrase_markers = (
|
|
2582
|
+
"at least 2 new test cases",
|
|
2583
|
+
"tests must be in test/",
|
|
2584
|
+
"describe/it/expect using vitest",
|
|
2585
|
+
)
|
|
2586
|
+
if any(compact.count(marker) >= 4 for marker in policy_phrase_markers):
|
|
2587
|
+
return True
|
|
2588
|
+
|
|
2589
|
+
lines = [
|
|
2590
|
+
re.sub(r"\s+", " ", line.strip().lower())
|
|
2591
|
+
for line in text.splitlines()
|
|
2592
|
+
if line.strip()
|
|
2593
|
+
]
|
|
2594
|
+
if lines:
|
|
2595
|
+
line_counts: dict[str, int] = {}
|
|
2596
|
+
for line in lines:
|
|
2597
|
+
if len(line) < 24:
|
|
2598
|
+
continue
|
|
2599
|
+
line_counts[line] = line_counts.get(line, 0) + 1
|
|
2600
|
+
if line_counts and max(line_counts.values()) >= 8:
|
|
2601
|
+
return True
|
|
2602
|
+
|
|
2603
|
+
repeated_phrase_match = re.search(
|
|
2604
|
+
r"((?:[a-z0-9_./-]+\s+){2,8}[a-z0-9_./-]+)(?:\s+\1){7,}",
|
|
2605
|
+
compact,
|
|
2606
|
+
)
|
|
2607
|
+
if repeated_phrase_match:
|
|
2608
|
+
return True
|
|
2609
|
+
|
|
2527
2610
|
return False
|
|
2528
2611
|
|
|
2529
2612
|
|
|
@@ -3481,6 +3564,20 @@ async def messages(request: Request):
|
|
|
3481
3564
|
headers={"Content-Type": "application/json"},
|
|
3482
3565
|
)
|
|
3483
3566
|
|
|
3567
|
+
if strict_resp.status_code != 200:
|
|
3568
|
+
error_text = strict_resp.text[:1000]
|
|
3569
|
+
if _maybe_disable_grammar_for_tools_error(
|
|
3570
|
+
strict_body,
|
|
3571
|
+
strict_resp.status_code,
|
|
3572
|
+
error_text,
|
|
3573
|
+
"strict-stream",
|
|
3574
|
+
):
|
|
3575
|
+
strict_resp = await client.post(
|
|
3576
|
+
f"{LLAMA_CPP_BASE}/chat/completions",
|
|
3577
|
+
json=strict_body,
|
|
3578
|
+
headers={"Content-Type": "application/json"},
|
|
3579
|
+
)
|
|
3580
|
+
|
|
3484
3581
|
if strict_resp.status_code != 200:
|
|
3485
3582
|
error_text = strict_resp.text[:1000]
|
|
3486
3583
|
logger.error(
|
|
@@ -3621,6 +3718,35 @@ async def messages(request: Request):
|
|
|
3621
3718
|
error_body = await resp.aread()
|
|
3622
3719
|
await resp.aclose()
|
|
3623
3720
|
error_text = error_body.decode("utf-8", errors="replace")[:1000]
|
|
3721
|
+
if _maybe_disable_grammar_for_tools_error(
|
|
3722
|
+
openai_body,
|
|
3723
|
+
resp.status_code,
|
|
3724
|
+
error_text,
|
|
3725
|
+
"stream",
|
|
3726
|
+
):
|
|
3727
|
+
resp = await client.send(
|
|
3728
|
+
client.build_request(
|
|
3729
|
+
"POST",
|
|
3730
|
+
f"{LLAMA_CPP_BASE}/chat/completions",
|
|
3731
|
+
json=openai_body,
|
|
3732
|
+
headers={"Content-Type": "application/json"},
|
|
3733
|
+
),
|
|
3734
|
+
stream=True,
|
|
3735
|
+
)
|
|
3736
|
+
if resp.status_code == 200:
|
|
3737
|
+
return StreamingResponse(
|
|
3738
|
+
stream_anthropic_response(resp, model, monitor, body),
|
|
3739
|
+
media_type="text/event-stream",
|
|
3740
|
+
headers={
|
|
3741
|
+
"Cache-Control": "no-cache",
|
|
3742
|
+
"Connection": "keep-alive",
|
|
3743
|
+
},
|
|
3744
|
+
)
|
|
3745
|
+
|
|
3746
|
+
error_body = await resp.aread()
|
|
3747
|
+
await resp.aclose()
|
|
3748
|
+
error_text = error_body.decode("utf-8", errors="replace")[:1000]
|
|
3749
|
+
|
|
3624
3750
|
logger.error("Upstream HTTP %d: %s", resp.status_code, error_text)
|
|
3625
3751
|
|
|
3626
3752
|
# Parse the error for a user-friendly message
|
|
@@ -3708,6 +3834,20 @@ async def messages(request: Request):
|
|
|
3708
3834
|
headers={"Content-Type": "application/json"},
|
|
3709
3835
|
)
|
|
3710
3836
|
|
|
3837
|
+
if resp.status_code != 200:
|
|
3838
|
+
error_text = resp.text[:1000]
|
|
3839
|
+
if _maybe_disable_grammar_for_tools_error(
|
|
3840
|
+
openai_body,
|
|
3841
|
+
resp.status_code,
|
|
3842
|
+
error_text,
|
|
3843
|
+
"non-stream",
|
|
3844
|
+
):
|
|
3845
|
+
resp = await client.post(
|
|
3846
|
+
f"{LLAMA_CPP_BASE}/chat/completions",
|
|
3847
|
+
json=openai_body,
|
|
3848
|
+
headers={"Content-Type": "application/json"},
|
|
3849
|
+
)
|
|
3850
|
+
|
|
3711
3851
|
# Option B: Handle non-streaming errors too
|
|
3712
3852
|
if resp.status_code != 200:
|
|
3713
3853
|
error_text = resp.text[:1000]
|
|
@@ -3851,6 +3991,7 @@ async def context_status(request: Request):
|
|
|
3851
3991
|
"required_only": PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
|
|
3852
3992
|
"path": PROXY_TOOL_CALL_GRAMMAR_PATH,
|
|
3853
3993
|
"loaded": bool(TOOL_CALL_GBNF),
|
|
3994
|
+
"tools_compatible": TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE,
|
|
3854
3995
|
},
|
|
3855
3996
|
# Loop protection stats
|
|
3856
3997
|
"loop_protection": {
|
|
@@ -518,10 +518,12 @@ class TestMalformedToolGuardrail(unittest.TestCase):
|
|
|
518
518
|
old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
|
|
519
519
|
old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
|
|
520
520
|
old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
|
|
521
|
+
old_tools_compatible = getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE")
|
|
521
522
|
try:
|
|
522
523
|
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
|
|
523
524
|
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
|
|
524
525
|
setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
|
|
526
|
+
setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", True)
|
|
525
527
|
|
|
526
528
|
openai_body = {
|
|
527
529
|
"model": "test",
|
|
@@ -548,6 +550,56 @@ class TestMalformedToolGuardrail(unittest.TestCase):
|
|
|
548
550
|
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
|
|
549
551
|
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
|
|
550
552
|
setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
|
|
553
|
+
setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", old_tools_compatible)
|
|
554
|
+
|
|
555
|
+
def test_apply_tool_call_grammar_skips_when_upstream_tools_are_incompatible(self):
|
|
556
|
+
old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
|
|
557
|
+
old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
|
|
558
|
+
old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
|
|
559
|
+
old_tools_compatible = getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE")
|
|
560
|
+
try:
|
|
561
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
|
|
562
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
|
|
563
|
+
setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
|
|
564
|
+
setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", False)
|
|
565
|
+
|
|
566
|
+
request = {
|
|
567
|
+
"tools": [{"type": "function", "function": {"name": "Read"}}],
|
|
568
|
+
"tool_choice": "required",
|
|
569
|
+
}
|
|
570
|
+
proxy._apply_tool_call_grammar(request)
|
|
571
|
+
|
|
572
|
+
self.assertNotIn("grammar", request)
|
|
573
|
+
finally:
|
|
574
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
|
|
575
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
|
|
576
|
+
setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
|
|
577
|
+
setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", old_tools_compatible)
|
|
578
|
+
|
|
579
|
+
def test_maybe_disable_grammar_for_tools_error_strips_grammar_and_disables_flag(
|
|
580
|
+
self,
|
|
581
|
+
):
|
|
582
|
+
old_tools_compatible = getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE")
|
|
583
|
+
try:
|
|
584
|
+
setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", True)
|
|
585
|
+
|
|
586
|
+
request = {
|
|
587
|
+
"tools": [{"type": "function", "function": {"name": "Read"}}],
|
|
588
|
+
"grammar": 'root ::= "<tool_call>"',
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
retried = proxy._maybe_disable_grammar_for_tools_error(
|
|
592
|
+
request,
|
|
593
|
+
400,
|
|
594
|
+
'{"error":{"message":"Cannot use custom grammar constraints with tools."}}',
|
|
595
|
+
"unit-test",
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
self.assertTrue(retried)
|
|
599
|
+
self.assertNotIn("grammar", request)
|
|
600
|
+
self.assertFalse(getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE"))
|
|
601
|
+
finally:
|
|
602
|
+
setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", old_tools_compatible)
|
|
551
603
|
|
|
552
604
|
def test_clean_guardrail_response_does_not_promise_future_tool_call(self):
|
|
553
605
|
guardrail = proxy._build_clean_guardrail_openai_response(
|
|
@@ -753,6 +805,54 @@ class TestMalformedToolGuardrail(unittest.TestCase):
|
|
|
753
805
|
)
|
|
754
806
|
self.assertEqual(issue.kind, "required_tool_miss")
|
|
755
807
|
|
|
808
|
+
def test_required_tool_turn_with_long_text_without_tool_call_is_flagged(self):
|
|
809
|
+
openai_resp = {
|
|
810
|
+
"choices": [
|
|
811
|
+
{
|
|
812
|
+
"finish_reason": "stop",
|
|
813
|
+
"message": {
|
|
814
|
+
"content": (
|
|
815
|
+
"I reviewed the repository and here is a long explanation that still "
|
|
816
|
+
"does not include any valid tool call payload for this required turn."
|
|
817
|
+
),
|
|
818
|
+
"tool_calls": [],
|
|
819
|
+
},
|
|
820
|
+
}
|
|
821
|
+
]
|
|
822
|
+
}
|
|
823
|
+
anthropic_body = {
|
|
824
|
+
"tools": [{"name": "Edit", "input_schema": {"type": "object"}}],
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
issue = proxy._classify_tool_response_issue(
|
|
828
|
+
openai_resp, anthropic_body, required_tool_choice=True
|
|
829
|
+
)
|
|
830
|
+
self.assertEqual(issue.kind, "required_tool_miss")
|
|
831
|
+
|
|
832
|
+
def test_preflight_flags_repetitive_policy_echo_without_tool_call(self):
|
|
833
|
+
repeated = " (describe/it/expect using vitest" * 24
|
|
834
|
+
openai_resp = {
|
|
835
|
+
"choices": [
|
|
836
|
+
{
|
|
837
|
+
"finish_reason": "stop",
|
|
838
|
+
"message": {
|
|
839
|
+
"content": (
|
|
840
|
+
"- At least 2 new test cases before claiming done. "
|
|
841
|
+
"- Tests must be in test/ following existing patterns."
|
|
842
|
+
f"{repeated}"
|
|
843
|
+
),
|
|
844
|
+
"tool_calls": [],
|
|
845
|
+
},
|
|
846
|
+
}
|
|
847
|
+
]
|
|
848
|
+
}
|
|
849
|
+
anthropic_body = {
|
|
850
|
+
"tools": [{"name": "Read", "input_schema": {"type": "object"}}],
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
issue = proxy._classify_tool_response_issue(openai_resp, anthropic_body)
|
|
854
|
+
self.assertEqual(issue.kind, "malformed_payload")
|
|
855
|
+
|
|
756
856
|
def test_markup_repair_sanitizes_tool_arguments(self):
|
|
757
857
|
openai_resp = {
|
|
758
858
|
"choices": [
|