@miller-tech/uap 1.15.6 → 1.15.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -76,6 +76,7 @@ Dependencies
|
|
|
76
76
|
"""
|
|
77
77
|
|
|
78
78
|
import asyncio
|
|
79
|
+
import copy
|
|
79
80
|
import hashlib
|
|
80
81
|
import json
|
|
81
82
|
import logging
|
|
@@ -254,6 +255,28 @@ PROXY_ANALYSIS_ONLY_MIN_TOOLS = int(
|
|
|
254
255
|
PROXY_ANALYSIS_ONLY_MAX_MESSAGES = int(
|
|
255
256
|
os.environ.get("PROXY_ANALYSIS_ONLY_MAX_MESSAGES", "2")
|
|
256
257
|
)
|
|
258
|
+
PROXY_TOOL_CALL_GRAMMAR = os.environ.get(
|
|
259
|
+
"PROXY_TOOL_CALL_GRAMMAR", "on"
|
|
260
|
+
).lower() not in {
|
|
261
|
+
"0",
|
|
262
|
+
"false",
|
|
263
|
+
"off",
|
|
264
|
+
"no",
|
|
265
|
+
}
|
|
266
|
+
PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY = os.environ.get(
|
|
267
|
+
"PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", "on"
|
|
268
|
+
).lower() not in {
|
|
269
|
+
"0",
|
|
270
|
+
"false",
|
|
271
|
+
"off",
|
|
272
|
+
"no",
|
|
273
|
+
}
|
|
274
|
+
PROXY_TOOL_CALL_GRAMMAR_PATH = os.path.abspath(
|
|
275
|
+
os.environ.get(
|
|
276
|
+
"PROXY_TOOL_CALL_GRAMMAR_PATH",
|
|
277
|
+
os.path.join(os.path.dirname(__file__), "..", "config", "tool-call.gbnf"),
|
|
278
|
+
)
|
|
279
|
+
)
|
|
257
280
|
|
|
258
281
|
# ---------------------------------------------------------------------------
|
|
259
282
|
# Logging
|
|
@@ -266,6 +289,45 @@ logging.basicConfig(
|
|
|
266
289
|
logger = logging.getLogger("uap.anthropic_proxy")
|
|
267
290
|
|
|
268
291
|
|
|
292
|
+
def _load_tool_call_grammar(path: str) -> str:
|
|
293
|
+
if not PROXY_TOOL_CALL_GRAMMAR:
|
|
294
|
+
return ""
|
|
295
|
+
|
|
296
|
+
try:
|
|
297
|
+
with open(path, "r", encoding="utf-8") as fh:
|
|
298
|
+
return fh.read().strip()
|
|
299
|
+
except OSError as exc:
|
|
300
|
+
logger.warning(
|
|
301
|
+
"Tool-call grammar disabled: failed to read %s (%s)",
|
|
302
|
+
path,
|
|
303
|
+
exc,
|
|
304
|
+
)
|
|
305
|
+
return ""
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
TOOL_CALL_GBNF = _load_tool_call_grammar(PROXY_TOOL_CALL_GRAMMAR_PATH)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _apply_tool_call_grammar(
|
|
312
|
+
request_body: dict, tool_choice: str | None = None
|
|
313
|
+
) -> None:
|
|
314
|
+
request_body.pop("grammar", None)
|
|
315
|
+
|
|
316
|
+
if not PROXY_TOOL_CALL_GRAMMAR or not TOOL_CALL_GBNF:
|
|
317
|
+
return
|
|
318
|
+
|
|
319
|
+
if not request_body.get("tools"):
|
|
320
|
+
return
|
|
321
|
+
|
|
322
|
+
effective_tool_choice = (
|
|
323
|
+
tool_choice if tool_choice is not None else request_body.get("tool_choice")
|
|
324
|
+
)
|
|
325
|
+
if PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY and effective_tool_choice != "required":
|
|
326
|
+
return
|
|
327
|
+
|
|
328
|
+
request_body["grammar"] = TOOL_CALL_GBNF
|
|
329
|
+
|
|
330
|
+
|
|
269
331
|
# ---------------------------------------------------------------------------
|
|
270
332
|
# Option F: Session-level Context Window Monitor
|
|
271
333
|
# ---------------------------------------------------------------------------
|
|
@@ -876,7 +938,7 @@ async def lifespan(app: FastAPI):
|
|
|
876
938
|
_resolve_prune_target_fraction() * 100,
|
|
877
939
|
)
|
|
878
940
|
logger.info(
|
|
879
|
-
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d)",
|
|
941
|
+
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s path=%s)",
|
|
880
942
|
PROXY_MALFORMED_TOOL_GUARDRAIL,
|
|
881
943
|
PROXY_MALFORMED_TOOL_STREAM_STRICT,
|
|
882
944
|
PROXY_FORCE_NON_STREAM,
|
|
@@ -896,6 +958,10 @@ async def lifespan(app: FastAPI):
|
|
|
896
958
|
PROXY_ANALYSIS_ONLY_ROUTE,
|
|
897
959
|
PROXY_ANALYSIS_ONLY_MIN_TOOLS,
|
|
898
960
|
PROXY_ANALYSIS_ONLY_MAX_MESSAGES,
|
|
961
|
+
PROXY_TOOL_CALL_GRAMMAR,
|
|
962
|
+
PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
|
|
963
|
+
bool(TOOL_CALL_GBNF),
|
|
964
|
+
PROXY_TOOL_CALL_GRAMMAR_PATH,
|
|
899
965
|
)
|
|
900
966
|
|
|
901
967
|
yield
|
|
@@ -1445,6 +1511,8 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1445
1511
|
"Thinking disabled for tool turn (PROXY_DISABLE_THINKING_ON_TOOL_TURNS=on)"
|
|
1446
1512
|
)
|
|
1447
1513
|
|
|
1514
|
+
_apply_tool_call_grammar(openai_body)
|
|
1515
|
+
|
|
1448
1516
|
return openai_body
|
|
1449
1517
|
|
|
1450
1518
|
|
|
@@ -1848,6 +1916,20 @@ def _sanitize_markup_value(value):
|
|
|
1848
1916
|
return value, False
|
|
1849
1917
|
|
|
1850
1918
|
|
|
1919
|
+
_REQUIRED_PLACEHOLDER = "__uap_required__"
|
|
1920
|
+
_MISSING_REQUIRED_VALUE = object()
|
|
1921
|
+
|
|
1922
|
+
|
|
1923
|
+
def _contains_required_placeholder(value) -> bool:
|
|
1924
|
+
if isinstance(value, str):
|
|
1925
|
+
return value.strip() == _REQUIRED_PLACEHOLDER
|
|
1926
|
+
if isinstance(value, list):
|
|
1927
|
+
return any(_contains_required_placeholder(item) for item in value)
|
|
1928
|
+
if isinstance(value, dict):
|
|
1929
|
+
return any(_contains_required_placeholder(item) for item in value.values())
|
|
1930
|
+
return False
|
|
1931
|
+
|
|
1932
|
+
|
|
1851
1933
|
def _repair_tool_call_markup(openai_resp: dict) -> tuple[dict, int]:
|
|
1852
1934
|
if not _openai_has_tool_calls(openai_resp):
|
|
1853
1935
|
return openai_resp, 0
|
|
@@ -1919,33 +2001,30 @@ def _repair_tool_call_markup(openai_resp: dict) -> tuple[dict, int]:
|
|
|
1919
2001
|
|
|
1920
2002
|
|
|
1921
2003
|
def _default_required_value(field_name: str, field_schema: dict):
|
|
1922
|
-
|
|
1923
|
-
if isinstance(
|
|
1924
|
-
|
|
2004
|
+
_ = field_name
|
|
2005
|
+
if not isinstance(field_schema, dict):
|
|
2006
|
+
return _MISSING_REQUIRED_VALUE
|
|
2007
|
+
|
|
2008
|
+
if "default" in field_schema:
|
|
2009
|
+
default_value = copy.deepcopy(field_schema.get("default"))
|
|
2010
|
+
if not _contains_required_placeholder(default_value):
|
|
2011
|
+
return default_value
|
|
2012
|
+
|
|
2013
|
+
enum_values = field_schema.get("enum")
|
|
2014
|
+
if isinstance(enum_values, list):
|
|
2015
|
+
for candidate in enum_values:
|
|
2016
|
+
if _required_value_is_empty(candidate):
|
|
2017
|
+
continue
|
|
2018
|
+
if _contains_required_placeholder(candidate):
|
|
2019
|
+
continue
|
|
2020
|
+
return copy.deepcopy(candidate)
|
|
1925
2021
|
|
|
1926
|
-
if
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
if expected_type == "object":
|
|
1933
|
-
return {"value": "__uap_required__"}
|
|
1934
|
-
if expected_type == "array":
|
|
1935
|
-
return ["__uap_required__"]
|
|
1936
|
-
|
|
1937
|
-
key = (field_name or "").lower()
|
|
1938
|
-
if key in {"command", "cmd"}:
|
|
1939
|
-
return "pwd"
|
|
1940
|
-
if key == "cron":
|
|
1941
|
-
return "* * * * *"
|
|
1942
|
-
if key in {"pattern", "glob"}:
|
|
1943
|
-
return "*"
|
|
1944
|
-
if key == "subject":
|
|
1945
|
-
return "task"
|
|
1946
|
-
if key in {"path", "file", "filepath", "file_path"} or key.endswith("_path"):
|
|
1947
|
-
return "."
|
|
1948
|
-
return "__uap_required__"
|
|
2022
|
+
if "const" in field_schema:
|
|
2023
|
+
const_value = copy.deepcopy(field_schema.get("const"))
|
|
2024
|
+
if not _contains_required_placeholder(const_value):
|
|
2025
|
+
return const_value
|
|
2026
|
+
|
|
2027
|
+
return _MISSING_REQUIRED_VALUE
|
|
1949
2028
|
|
|
1950
2029
|
|
|
1951
2030
|
def _repair_required_tool_args(
|
|
@@ -2008,7 +2087,10 @@ def _repair_required_tool_args(
|
|
|
2008
2087
|
if isinstance(properties.get(field), dict)
|
|
2009
2088
|
else {}
|
|
2010
2089
|
)
|
|
2011
|
-
|
|
2090
|
+
fallback_value = _default_required_value(field, field_schema)
|
|
2091
|
+
if fallback_value is _MISSING_REQUIRED_VALUE:
|
|
2092
|
+
continue
|
|
2093
|
+
parsed_args[field] = fallback_value
|
|
2012
2094
|
changed = True
|
|
2013
2095
|
|
|
2014
2096
|
if not changed:
|
|
@@ -2231,6 +2313,18 @@ def _validate_tool_call_arguments(
|
|
|
2231
2313
|
),
|
|
2232
2314
|
)
|
|
2233
2315
|
|
|
2316
|
+
if _contains_required_placeholder(parsed):
|
|
2317
|
+
return ToolResponseIssue(
|
|
2318
|
+
kind="invalid_tool_args",
|
|
2319
|
+
reason=(
|
|
2320
|
+
f"arguments for '{tool_name}' contain unresolved placeholder values"
|
|
2321
|
+
),
|
|
2322
|
+
retry_hint=(
|
|
2323
|
+
f"Emit exactly one `{tool_name}` tool call with real schema-valid arguments. "
|
|
2324
|
+
f"Never emit `{_REQUIRED_PLACEHOLDER}` placeholders."
|
|
2325
|
+
),
|
|
2326
|
+
)
|
|
2327
|
+
|
|
2234
2328
|
if not isinstance(tool_schema, dict):
|
|
2235
2329
|
tool_schema = {}
|
|
2236
2330
|
|
|
@@ -2245,6 +2339,7 @@ def _validate_tool_call_arguments(
|
|
|
2245
2339
|
missing: list[str] = []
|
|
2246
2340
|
empty: list[str] = []
|
|
2247
2341
|
wrong_type: list[str] = []
|
|
2342
|
+
enum_mismatch: list[str] = []
|
|
2248
2343
|
|
|
2249
2344
|
for field in required:
|
|
2250
2345
|
if not isinstance(field, str):
|
|
@@ -2267,6 +2362,15 @@ def _validate_tool_call_arguments(
|
|
|
2267
2362
|
wrong_type.append(field)
|
|
2268
2363
|
continue
|
|
2269
2364
|
|
|
2365
|
+
enum_values = schema.get("enum")
|
|
2366
|
+
if isinstance(enum_values, list) and enum_values and value not in enum_values:
|
|
2367
|
+
enum_mismatch.append(field)
|
|
2368
|
+
continue
|
|
2369
|
+
|
|
2370
|
+
if "const" in schema and value != schema.get("const"):
|
|
2371
|
+
enum_mismatch.append(field)
|
|
2372
|
+
continue
|
|
2373
|
+
|
|
2270
2374
|
min_length = schema.get("minLength")
|
|
2271
2375
|
if (
|
|
2272
2376
|
isinstance(min_length, int)
|
|
@@ -2284,7 +2388,7 @@ def _validate_tool_call_arguments(
|
|
|
2284
2388
|
):
|
|
2285
2389
|
empty.append(field)
|
|
2286
2390
|
|
|
2287
|
-
if missing or empty or wrong_type:
|
|
2391
|
+
if missing or empty or wrong_type or enum_mismatch:
|
|
2288
2392
|
details = []
|
|
2289
2393
|
if missing:
|
|
2290
2394
|
details.append(f"missing: {', '.join(missing)}")
|
|
@@ -2292,6 +2396,8 @@ def _validate_tool_call_arguments(
|
|
|
2292
2396
|
details.append(f"empty: {', '.join(empty)}")
|
|
2293
2397
|
if wrong_type:
|
|
2294
2398
|
details.append(f"type mismatch: {', '.join(wrong_type)}")
|
|
2399
|
+
if enum_mismatch:
|
|
2400
|
+
details.append(f"enum mismatch: {', '.join(enum_mismatch)}")
|
|
2295
2401
|
required_fields = ", ".join(str(f) for f in required if isinstance(f, str))
|
|
2296
2402
|
required_hint = (
|
|
2297
2403
|
f"Required fields must be non-empty: {required_fields}. "
|
|
@@ -2485,6 +2591,8 @@ def _build_malformed_retry_body(
|
|
|
2485
2591
|
if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
|
|
2486
2592
|
retry_body["enable_thinking"] = False
|
|
2487
2593
|
|
|
2594
|
+
_apply_tool_call_grammar(retry_body, tool_choice=tool_choice)
|
|
2595
|
+
|
|
2488
2596
|
if retry_hint:
|
|
2489
2597
|
repair_prompt = (
|
|
2490
2598
|
f"[TOOL CALL REPAIR attempt {attempt}/{total_attempts}]\n"
|
|
@@ -2571,6 +2679,7 @@ async def _apply_unexpected_end_turn_guardrail(
|
|
|
2571
2679
|
retry_body = dict(openai_body)
|
|
2572
2680
|
retry_body["tool_choice"] = "required"
|
|
2573
2681
|
retry_body["stream"] = False
|
|
2682
|
+
_apply_tool_call_grammar(retry_body, tool_choice="required")
|
|
2574
2683
|
|
|
2575
2684
|
retry_resp = await client.post(
|
|
2576
2685
|
f"{LLAMA_CPP_BASE}/chat/completions",
|
|
@@ -3737,6 +3846,12 @@ async def context_status(request: Request):
|
|
|
3737
3846
|
"overflow_count": monitor.overflow_count,
|
|
3738
3847
|
"prune_threshold": PROXY_CONTEXT_PRUNE_THRESHOLD,
|
|
3739
3848
|
"recent_history": monitor.context_history[-10:],
|
|
3849
|
+
"tool_call_grammar": {
|
|
3850
|
+
"enabled": PROXY_TOOL_CALL_GRAMMAR,
|
|
3851
|
+
"required_only": PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
|
|
3852
|
+
"path": PROXY_TOOL_CALL_GRAMMAR_PATH,
|
|
3853
|
+
"loaded": bool(TOOL_CALL_GBNF),
|
|
3854
|
+
},
|
|
3740
3855
|
# Loop protection stats
|
|
3741
3856
|
"loop_protection": {
|
|
3742
3857
|
"enabled": PROXY_LOOP_BREAKER,
|
|
@@ -514,6 +514,41 @@ class TestMalformedToolGuardrail(unittest.TestCase):
|
|
|
514
514
|
self.assertEqual(proxy._retry_tool_choice_for_attempt(True, 2, 3), "auto")
|
|
515
515
|
self.assertEqual(proxy._retry_tool_choice_for_attempt(False, 0, 3), "auto")
|
|
516
516
|
|
|
517
|
+
def test_malformed_retry_body_applies_grammar_only_for_required_tool_choice(self):
|
|
518
|
+
old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
|
|
519
|
+
old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
|
|
520
|
+
old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
|
|
521
|
+
try:
|
|
522
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
|
|
523
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
|
|
524
|
+
setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
|
|
525
|
+
|
|
526
|
+
openai_body = {
|
|
527
|
+
"model": "test",
|
|
528
|
+
"messages": [{"role": "user", "content": "fix"}],
|
|
529
|
+
}
|
|
530
|
+
anthropic_body = {
|
|
531
|
+
"tools": [{"name": "Read", "input_schema": {"type": "object"}}]
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
required_retry = proxy._build_malformed_retry_body(
|
|
535
|
+
openai_body,
|
|
536
|
+
anthropic_body,
|
|
537
|
+
tool_choice="required",
|
|
538
|
+
)
|
|
539
|
+
auto_retry = proxy._build_malformed_retry_body(
|
|
540
|
+
openai_body,
|
|
541
|
+
anthropic_body,
|
|
542
|
+
tool_choice="auto",
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
self.assertEqual(required_retry.get("grammar"), 'root ::= "<tool_call>"')
|
|
546
|
+
self.assertNotIn("grammar", auto_retry)
|
|
547
|
+
finally:
|
|
548
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
|
|
549
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
|
|
550
|
+
setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
|
|
551
|
+
|
|
517
552
|
def test_clean_guardrail_response_does_not_promise_future_tool_call(self):
|
|
518
553
|
guardrail = proxy._build_clean_guardrail_openai_response(
|
|
519
554
|
{"model": "test-model"}
|
|
@@ -608,7 +643,11 @@ class TestMalformedToolGuardrail(unittest.TestCase):
|
|
|
608
643
|
"type": "object",
|
|
609
644
|
"required": ["cron", "command"],
|
|
610
645
|
"properties": {
|
|
611
|
-
"cron": {
|
|
646
|
+
"cron": {
|
|
647
|
+
"type": "string",
|
|
648
|
+
"minLength": 1,
|
|
649
|
+
"default": "* * * * *",
|
|
650
|
+
},
|
|
612
651
|
"command": {"type": "string", "minLength": 1},
|
|
613
652
|
},
|
|
614
653
|
},
|
|
@@ -921,9 +960,21 @@ class TestMalformedToolGuardrail(unittest.TestCase):
|
|
|
921
960
|
"type": "object",
|
|
922
961
|
"required": ["cron", "pattern", "subject"],
|
|
923
962
|
"properties": {
|
|
924
|
-
"cron": {
|
|
925
|
-
|
|
926
|
-
|
|
963
|
+
"cron": {
|
|
964
|
+
"type": "string",
|
|
965
|
+
"minLength": 1,
|
|
966
|
+
"default": "* * * * *",
|
|
967
|
+
},
|
|
968
|
+
"pattern": {
|
|
969
|
+
"type": "string",
|
|
970
|
+
"minLength": 1,
|
|
971
|
+
"default": "*",
|
|
972
|
+
},
|
|
973
|
+
"subject": {
|
|
974
|
+
"type": "string",
|
|
975
|
+
"minLength": 1,
|
|
976
|
+
"default": "task",
|
|
977
|
+
},
|
|
927
978
|
},
|
|
928
979
|
},
|
|
929
980
|
}
|
|
@@ -973,9 +1024,21 @@ class TestMalformedToolGuardrail(unittest.TestCase):
|
|
|
973
1024
|
"type": "object",
|
|
974
1025
|
"required": ["cron", "pattern", "subject"],
|
|
975
1026
|
"properties": {
|
|
976
|
-
"cron": {
|
|
977
|
-
|
|
978
|
-
|
|
1027
|
+
"cron": {
|
|
1028
|
+
"type": "string",
|
|
1029
|
+
"minLength": 1,
|
|
1030
|
+
"default": "* * * * *",
|
|
1031
|
+
},
|
|
1032
|
+
"pattern": {
|
|
1033
|
+
"type": "string",
|
|
1034
|
+
"minLength": 1,
|
|
1035
|
+
"default": "*",
|
|
1036
|
+
},
|
|
1037
|
+
"subject": {
|
|
1038
|
+
"type": "string",
|
|
1039
|
+
"minLength": 1,
|
|
1040
|
+
"default": "task",
|
|
1041
|
+
},
|
|
979
1042
|
},
|
|
980
1043
|
},
|
|
981
1044
|
}
|
|
@@ -1099,10 +1162,7 @@ class TestMalformedToolGuardrail(unittest.TestCase):
|
|
|
1099
1162
|
)
|
|
1100
1163
|
self.assertTrue(args["cron"].strip())
|
|
1101
1164
|
self.assertTrue(args["command"].strip())
|
|
1102
|
-
self.
|
|
1103
|
-
monitor.arg_preflight_repairs >= 1
|
|
1104
|
-
or monitor.arg_preflight_rejections >= 1
|
|
1105
|
-
)
|
|
1165
|
+
self.assertGreaterEqual(len(fake_client.requests), 1)
|
|
1106
1166
|
if fake_client.requests:
|
|
1107
1167
|
retry_payload = fake_client.requests[0]["kwargs"]["json"]
|
|
1108
1168
|
repair_message = retry_payload["messages"][-1]["content"]
|
|
@@ -1269,6 +1329,81 @@ class TestToolTurnControls(unittest.TestCase):
|
|
|
1269
1329
|
setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_REJECTIONS", old_rejections)
|
|
1270
1330
|
setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS", old_auto_turns)
|
|
1271
1331
|
|
|
1332
|
+
def test_build_request_applies_grammar_when_tool_choice_required(self):
|
|
1333
|
+
old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
|
|
1334
|
+
old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
|
|
1335
|
+
old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
|
|
1336
|
+
try:
|
|
1337
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
|
|
1338
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
|
|
1339
|
+
setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
|
|
1340
|
+
|
|
1341
|
+
body = {
|
|
1342
|
+
"model": "test",
|
|
1343
|
+
"messages": [
|
|
1344
|
+
{
|
|
1345
|
+
"role": "assistant",
|
|
1346
|
+
"content": [{"type": "text", "text": "I will continue."}],
|
|
1347
|
+
},
|
|
1348
|
+
{"role": "user", "content": "continue"},
|
|
1349
|
+
],
|
|
1350
|
+
"tools": [
|
|
1351
|
+
{
|
|
1352
|
+
"name": "Read",
|
|
1353
|
+
"description": "Read file",
|
|
1354
|
+
"input_schema": {"type": "object"},
|
|
1355
|
+
}
|
|
1356
|
+
],
|
|
1357
|
+
}
|
|
1358
|
+
|
|
1359
|
+
openai = proxy.build_openai_request(
|
|
1360
|
+
body, proxy.SessionMonitor(context_window=262144)
|
|
1361
|
+
)
|
|
1362
|
+
self.assertEqual(openai.get("tool_choice"), "required")
|
|
1363
|
+
self.assertEqual(openai.get("grammar"), 'root ::= "<tool_call>"')
|
|
1364
|
+
finally:
|
|
1365
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
|
|
1366
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
|
|
1367
|
+
setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
|
|
1368
|
+
|
|
1369
|
+
def test_build_request_omits_grammar_when_tool_choice_released_to_auto(self):
|
|
1370
|
+
old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
|
|
1371
|
+
old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
|
|
1372
|
+
old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
|
|
1373
|
+
try:
|
|
1374
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
|
|
1375
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
|
|
1376
|
+
setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
|
|
1377
|
+
|
|
1378
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
1379
|
+
monitor.forced_auto_cooldown_turns = 1
|
|
1380
|
+
|
|
1381
|
+
body = {
|
|
1382
|
+
"model": "test",
|
|
1383
|
+
"messages": [
|
|
1384
|
+
{
|
|
1385
|
+
"role": "assistant",
|
|
1386
|
+
"content": [{"type": "text", "text": "I will continue."}],
|
|
1387
|
+
},
|
|
1388
|
+
{"role": "user", "content": "continue"},
|
|
1389
|
+
],
|
|
1390
|
+
"tools": [
|
|
1391
|
+
{
|
|
1392
|
+
"name": "Read",
|
|
1393
|
+
"description": "Read file",
|
|
1394
|
+
"input_schema": {"type": "object"},
|
|
1395
|
+
}
|
|
1396
|
+
],
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
openai = proxy.build_openai_request(body, monitor)
|
|
1400
|
+
self.assertEqual(openai.get("tool_choice"), "auto")
|
|
1401
|
+
self.assertNotIn("grammar", openai)
|
|
1402
|
+
finally:
|
|
1403
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
|
|
1404
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
|
|
1405
|
+
setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
|
|
1406
|
+
|
|
1272
1407
|
def test_no_tools_does_not_inject_agentic_system_message(self):
|
|
1273
1408
|
body = {
|
|
1274
1409
|
"model": "test",
|
|
@@ -1378,6 +1513,139 @@ class TestToolTurnControls(unittest.TestCase):
|
|
|
1378
1513
|
setattr(proxy, "PROXY_ANALYSIS_ONLY_MAX_MESSAGES", old_max_messages)
|
|
1379
1514
|
|
|
1380
1515
|
|
|
1516
|
+
class TestRequiredArgRepair(unittest.TestCase):
|
|
1517
|
+
def test_repair_required_args_uses_schema_enum_value(self):
|
|
1518
|
+
openai_resp = {
|
|
1519
|
+
"choices": [
|
|
1520
|
+
{
|
|
1521
|
+
"message": {
|
|
1522
|
+
"tool_calls": [
|
|
1523
|
+
{
|
|
1524
|
+
"id": "call_1",
|
|
1525
|
+
"function": {
|
|
1526
|
+
"name": "omp_task",
|
|
1527
|
+
"arguments": '{"prompt":"analyze"}',
|
|
1528
|
+
},
|
|
1529
|
+
}
|
|
1530
|
+
]
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
]
|
|
1534
|
+
}
|
|
1535
|
+
anthropic_body = {
|
|
1536
|
+
"tools": [
|
|
1537
|
+
{
|
|
1538
|
+
"name": "omp_task",
|
|
1539
|
+
"input_schema": {
|
|
1540
|
+
"type": "object",
|
|
1541
|
+
"required": ["agent", "prompt"],
|
|
1542
|
+
"properties": {
|
|
1543
|
+
"agent": {
|
|
1544
|
+
"type": "string",
|
|
1545
|
+
"enum": ["task", "explore", "plan"],
|
|
1546
|
+
},
|
|
1547
|
+
"prompt": {"type": "string"},
|
|
1548
|
+
},
|
|
1549
|
+
},
|
|
1550
|
+
}
|
|
1551
|
+
]
|
|
1552
|
+
}
|
|
1553
|
+
|
|
1554
|
+
repaired, repaired_count = proxy._repair_required_tool_args(
|
|
1555
|
+
openai_resp, anthropic_body
|
|
1556
|
+
)
|
|
1557
|
+
|
|
1558
|
+
self.assertEqual(repaired_count, 1)
|
|
1559
|
+
args = json.loads(
|
|
1560
|
+
repaired["choices"][0]["message"]["tool_calls"][0]["function"]["arguments"]
|
|
1561
|
+
)
|
|
1562
|
+
self.assertEqual(args["agent"], "task")
|
|
1563
|
+
|
|
1564
|
+
def test_repair_required_args_does_not_inject_placeholder_without_schema_defaults(
|
|
1565
|
+
self,
|
|
1566
|
+
):
|
|
1567
|
+
openai_resp = {
|
|
1568
|
+
"choices": [
|
|
1569
|
+
{
|
|
1570
|
+
"message": {
|
|
1571
|
+
"tool_calls": [
|
|
1572
|
+
{
|
|
1573
|
+
"id": "call_1",
|
|
1574
|
+
"function": {
|
|
1575
|
+
"name": "omp_task",
|
|
1576
|
+
"arguments": '{"prompt":"analyze"}',
|
|
1577
|
+
},
|
|
1578
|
+
}
|
|
1579
|
+
]
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
]
|
|
1583
|
+
}
|
|
1584
|
+
anthropic_body = {
|
|
1585
|
+
"tools": [
|
|
1586
|
+
{
|
|
1587
|
+
"name": "omp_task",
|
|
1588
|
+
"input_schema": {
|
|
1589
|
+
"type": "object",
|
|
1590
|
+
"required": ["agent", "prompt"],
|
|
1591
|
+
"properties": {
|
|
1592
|
+
"agent": {"type": "string"},
|
|
1593
|
+
"prompt": {"type": "string"},
|
|
1594
|
+
},
|
|
1595
|
+
},
|
|
1596
|
+
}
|
|
1597
|
+
]
|
|
1598
|
+
}
|
|
1599
|
+
|
|
1600
|
+
repaired, repaired_count = proxy._repair_required_tool_args(
|
|
1601
|
+
openai_resp, anthropic_body
|
|
1602
|
+
)
|
|
1603
|
+
|
|
1604
|
+
self.assertEqual(repaired_count, 0)
|
|
1605
|
+
args = json.loads(
|
|
1606
|
+
repaired["choices"][0]["message"]["tool_calls"][0]["function"]["arguments"]
|
|
1607
|
+
)
|
|
1608
|
+
self.assertNotIn("agent", args)
|
|
1609
|
+
|
|
1610
|
+
def test_validate_tool_args_rejects_placeholder_values(self):
|
|
1611
|
+
issue = proxy._validate_tool_call_arguments(
|
|
1612
|
+
"omp_task",
|
|
1613
|
+
'{"agent":"__uap_required__","prompt":"analyze"}',
|
|
1614
|
+
{
|
|
1615
|
+
"type": "object",
|
|
1616
|
+
"required": ["agent", "prompt"],
|
|
1617
|
+
"properties": {
|
|
1618
|
+
"agent": {"type": "string", "enum": ["task", "explore"]},
|
|
1619
|
+
"prompt": {"type": "string"},
|
|
1620
|
+
},
|
|
1621
|
+
},
|
|
1622
|
+
{"omp_task"},
|
|
1623
|
+
)
|
|
1624
|
+
|
|
1625
|
+
self.assertTrue(issue.has_issue())
|
|
1626
|
+
self.assertEqual(issue.kind, "invalid_tool_args")
|
|
1627
|
+
self.assertIn("placeholder", issue.reason)
|
|
1628
|
+
|
|
1629
|
+
def test_validate_tool_args_rejects_enum_mismatch(self):
|
|
1630
|
+
issue = proxy._validate_tool_call_arguments(
|
|
1631
|
+
"omp_task",
|
|
1632
|
+
'{"agent":"planner","prompt":"analyze"}',
|
|
1633
|
+
{
|
|
1634
|
+
"type": "object",
|
|
1635
|
+
"required": ["agent", "prompt"],
|
|
1636
|
+
"properties": {
|
|
1637
|
+
"agent": {"type": "string", "enum": ["task", "explore"]},
|
|
1638
|
+
"prompt": {"type": "string"},
|
|
1639
|
+
},
|
|
1640
|
+
},
|
|
1641
|
+
{"omp_task"},
|
|
1642
|
+
)
|
|
1643
|
+
|
|
1644
|
+
self.assertTrue(issue.has_issue())
|
|
1645
|
+
self.assertEqual(issue.kind, "invalid_tool_args")
|
|
1646
|
+
self.assertIn("enum mismatch", issue.reason)
|
|
1647
|
+
|
|
1648
|
+
|
|
1381
1649
|
class TestSessionContaminationBreaker(unittest.TestCase):
|
|
1382
1650
|
def test_contamination_breaker_trims_and_resets_streak(self):
|
|
1383
1651
|
old_enabled = getattr(proxy, "PROXY_SESSION_CONTAMINATION_BREAKER")
|