@miller-tech/uap 1.15.6 → 1.15.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -254,6 +254,28 @@ PROXY_ANALYSIS_ONLY_MIN_TOOLS = int(
|
|
|
254
254
|
PROXY_ANALYSIS_ONLY_MAX_MESSAGES = int(
|
|
255
255
|
os.environ.get("PROXY_ANALYSIS_ONLY_MAX_MESSAGES", "2")
|
|
256
256
|
)
|
|
257
|
+
PROXY_TOOL_CALL_GRAMMAR = os.environ.get(
|
|
258
|
+
"PROXY_TOOL_CALL_GRAMMAR", "on"
|
|
259
|
+
).lower() not in {
|
|
260
|
+
"0",
|
|
261
|
+
"false",
|
|
262
|
+
"off",
|
|
263
|
+
"no",
|
|
264
|
+
}
|
|
265
|
+
PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY = os.environ.get(
|
|
266
|
+
"PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", "on"
|
|
267
|
+
).lower() not in {
|
|
268
|
+
"0",
|
|
269
|
+
"false",
|
|
270
|
+
"off",
|
|
271
|
+
"no",
|
|
272
|
+
}
|
|
273
|
+
PROXY_TOOL_CALL_GRAMMAR_PATH = os.path.abspath(
|
|
274
|
+
os.environ.get(
|
|
275
|
+
"PROXY_TOOL_CALL_GRAMMAR_PATH",
|
|
276
|
+
os.path.join(os.path.dirname(__file__), "..", "config", "tool-call.gbnf"),
|
|
277
|
+
)
|
|
278
|
+
)
|
|
257
279
|
|
|
258
280
|
# ---------------------------------------------------------------------------
|
|
259
281
|
# Logging
|
|
@@ -266,6 +288,45 @@ logging.basicConfig(
|
|
|
266
288
|
logger = logging.getLogger("uap.anthropic_proxy")
|
|
267
289
|
|
|
268
290
|
|
|
291
|
+
def _load_tool_call_grammar(path: str) -> str:
|
|
292
|
+
if not PROXY_TOOL_CALL_GRAMMAR:
|
|
293
|
+
return ""
|
|
294
|
+
|
|
295
|
+
try:
|
|
296
|
+
with open(path, "r", encoding="utf-8") as fh:
|
|
297
|
+
return fh.read().strip()
|
|
298
|
+
except OSError as exc:
|
|
299
|
+
logger.warning(
|
|
300
|
+
"Tool-call grammar disabled: failed to read %s (%s)",
|
|
301
|
+
path,
|
|
302
|
+
exc,
|
|
303
|
+
)
|
|
304
|
+
return ""
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
TOOL_CALL_GBNF = _load_tool_call_grammar(PROXY_TOOL_CALL_GRAMMAR_PATH)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _apply_tool_call_grammar(
|
|
311
|
+
request_body: dict, tool_choice: str | None = None
|
|
312
|
+
) -> None:
|
|
313
|
+
request_body.pop("grammar", None)
|
|
314
|
+
|
|
315
|
+
if not PROXY_TOOL_CALL_GRAMMAR or not TOOL_CALL_GBNF:
|
|
316
|
+
return
|
|
317
|
+
|
|
318
|
+
if not request_body.get("tools"):
|
|
319
|
+
return
|
|
320
|
+
|
|
321
|
+
effective_tool_choice = (
|
|
322
|
+
tool_choice if tool_choice is not None else request_body.get("tool_choice")
|
|
323
|
+
)
|
|
324
|
+
if PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY and effective_tool_choice != "required":
|
|
325
|
+
return
|
|
326
|
+
|
|
327
|
+
request_body["grammar"] = TOOL_CALL_GBNF
|
|
328
|
+
|
|
329
|
+
|
|
269
330
|
# ---------------------------------------------------------------------------
|
|
270
331
|
# Option F: Session-level Context Window Monitor
|
|
271
332
|
# ---------------------------------------------------------------------------
|
|
@@ -876,7 +937,7 @@ async def lifespan(app: FastAPI):
|
|
|
876
937
|
_resolve_prune_target_fraction() * 100,
|
|
877
938
|
)
|
|
878
939
|
logger.info(
|
|
879
|
-
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d)",
|
|
940
|
+
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s path=%s)",
|
|
880
941
|
PROXY_MALFORMED_TOOL_GUARDRAIL,
|
|
881
942
|
PROXY_MALFORMED_TOOL_STREAM_STRICT,
|
|
882
943
|
PROXY_FORCE_NON_STREAM,
|
|
@@ -896,6 +957,10 @@ async def lifespan(app: FastAPI):
|
|
|
896
957
|
PROXY_ANALYSIS_ONLY_ROUTE,
|
|
897
958
|
PROXY_ANALYSIS_ONLY_MIN_TOOLS,
|
|
898
959
|
PROXY_ANALYSIS_ONLY_MAX_MESSAGES,
|
|
960
|
+
PROXY_TOOL_CALL_GRAMMAR,
|
|
961
|
+
PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
|
|
962
|
+
bool(TOOL_CALL_GBNF),
|
|
963
|
+
PROXY_TOOL_CALL_GRAMMAR_PATH,
|
|
899
964
|
)
|
|
900
965
|
|
|
901
966
|
yield
|
|
@@ -1445,6 +1510,8 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1445
1510
|
"Thinking disabled for tool turn (PROXY_DISABLE_THINKING_ON_TOOL_TURNS=on)"
|
|
1446
1511
|
)
|
|
1447
1512
|
|
|
1513
|
+
_apply_tool_call_grammar(openai_body)
|
|
1514
|
+
|
|
1448
1515
|
return openai_body
|
|
1449
1516
|
|
|
1450
1517
|
|
|
@@ -2485,6 +2552,8 @@ def _build_malformed_retry_body(
|
|
|
2485
2552
|
if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
|
|
2486
2553
|
retry_body["enable_thinking"] = False
|
|
2487
2554
|
|
|
2555
|
+
_apply_tool_call_grammar(retry_body, tool_choice=tool_choice)
|
|
2556
|
+
|
|
2488
2557
|
if retry_hint:
|
|
2489
2558
|
repair_prompt = (
|
|
2490
2559
|
f"[TOOL CALL REPAIR attempt {attempt}/{total_attempts}]\n"
|
|
@@ -2571,6 +2640,7 @@ async def _apply_unexpected_end_turn_guardrail(
|
|
|
2571
2640
|
retry_body = dict(openai_body)
|
|
2572
2641
|
retry_body["tool_choice"] = "required"
|
|
2573
2642
|
retry_body["stream"] = False
|
|
2643
|
+
_apply_tool_call_grammar(retry_body, tool_choice="required")
|
|
2574
2644
|
|
|
2575
2645
|
retry_resp = await client.post(
|
|
2576
2646
|
f"{LLAMA_CPP_BASE}/chat/completions",
|
|
@@ -3737,6 +3807,12 @@ async def context_status(request: Request):
|
|
|
3737
3807
|
"overflow_count": monitor.overflow_count,
|
|
3738
3808
|
"prune_threshold": PROXY_CONTEXT_PRUNE_THRESHOLD,
|
|
3739
3809
|
"recent_history": monitor.context_history[-10:],
|
|
3810
|
+
"tool_call_grammar": {
|
|
3811
|
+
"enabled": PROXY_TOOL_CALL_GRAMMAR,
|
|
3812
|
+
"required_only": PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
|
|
3813
|
+
"path": PROXY_TOOL_CALL_GRAMMAR_PATH,
|
|
3814
|
+
"loaded": bool(TOOL_CALL_GBNF),
|
|
3815
|
+
},
|
|
3740
3816
|
# Loop protection stats
|
|
3741
3817
|
"loop_protection": {
|
|
3742
3818
|
"enabled": PROXY_LOOP_BREAKER,
|
|
@@ -514,6 +514,41 @@ class TestMalformedToolGuardrail(unittest.TestCase):
|
|
|
514
514
|
self.assertEqual(proxy._retry_tool_choice_for_attempt(True, 2, 3), "auto")
|
|
515
515
|
self.assertEqual(proxy._retry_tool_choice_for_attempt(False, 0, 3), "auto")
|
|
516
516
|
|
|
517
|
+
def test_malformed_retry_body_applies_grammar_only_for_required_tool_choice(self):
|
|
518
|
+
old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
|
|
519
|
+
old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
|
|
520
|
+
old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
|
|
521
|
+
try:
|
|
522
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
|
|
523
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
|
|
524
|
+
setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
|
|
525
|
+
|
|
526
|
+
openai_body = {
|
|
527
|
+
"model": "test",
|
|
528
|
+
"messages": [{"role": "user", "content": "fix"}],
|
|
529
|
+
}
|
|
530
|
+
anthropic_body = {
|
|
531
|
+
"tools": [{"name": "Read", "input_schema": {"type": "object"}}]
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
required_retry = proxy._build_malformed_retry_body(
|
|
535
|
+
openai_body,
|
|
536
|
+
anthropic_body,
|
|
537
|
+
tool_choice="required",
|
|
538
|
+
)
|
|
539
|
+
auto_retry = proxy._build_malformed_retry_body(
|
|
540
|
+
openai_body,
|
|
541
|
+
anthropic_body,
|
|
542
|
+
tool_choice="auto",
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
self.assertEqual(required_retry.get("grammar"), 'root ::= "<tool_call>"')
|
|
546
|
+
self.assertNotIn("grammar", auto_retry)
|
|
547
|
+
finally:
|
|
548
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
|
|
549
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
|
|
550
|
+
setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
|
|
551
|
+
|
|
517
552
|
def test_clean_guardrail_response_does_not_promise_future_tool_call(self):
|
|
518
553
|
guardrail = proxy._build_clean_guardrail_openai_response(
|
|
519
554
|
{"model": "test-model"}
|
|
@@ -1269,6 +1304,81 @@ class TestToolTurnControls(unittest.TestCase):
|
|
|
1269
1304
|
setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_REJECTIONS", old_rejections)
|
|
1270
1305
|
setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS", old_auto_turns)
|
|
1271
1306
|
|
|
1307
|
+
def test_build_request_applies_grammar_when_tool_choice_required(self):
|
|
1308
|
+
old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
|
|
1309
|
+
old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
|
|
1310
|
+
old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
|
|
1311
|
+
try:
|
|
1312
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
|
|
1313
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
|
|
1314
|
+
setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
|
|
1315
|
+
|
|
1316
|
+
body = {
|
|
1317
|
+
"model": "test",
|
|
1318
|
+
"messages": [
|
|
1319
|
+
{
|
|
1320
|
+
"role": "assistant",
|
|
1321
|
+
"content": [{"type": "text", "text": "I will continue."}],
|
|
1322
|
+
},
|
|
1323
|
+
{"role": "user", "content": "continue"},
|
|
1324
|
+
],
|
|
1325
|
+
"tools": [
|
|
1326
|
+
{
|
|
1327
|
+
"name": "Read",
|
|
1328
|
+
"description": "Read file",
|
|
1329
|
+
"input_schema": {"type": "object"},
|
|
1330
|
+
}
|
|
1331
|
+
],
|
|
1332
|
+
}
|
|
1333
|
+
|
|
1334
|
+
openai = proxy.build_openai_request(
|
|
1335
|
+
body, proxy.SessionMonitor(context_window=262144)
|
|
1336
|
+
)
|
|
1337
|
+
self.assertEqual(openai.get("tool_choice"), "required")
|
|
1338
|
+
self.assertEqual(openai.get("grammar"), 'root ::= "<tool_call>"')
|
|
1339
|
+
finally:
|
|
1340
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
|
|
1341
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
|
|
1342
|
+
setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
|
|
1343
|
+
|
|
1344
|
+
def test_build_request_omits_grammar_when_tool_choice_released_to_auto(self):
|
|
1345
|
+
old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
|
|
1346
|
+
old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
|
|
1347
|
+
old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
|
|
1348
|
+
try:
|
|
1349
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
|
|
1350
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
|
|
1351
|
+
setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
|
|
1352
|
+
|
|
1353
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
1354
|
+
monitor.forced_auto_cooldown_turns = 1
|
|
1355
|
+
|
|
1356
|
+
body = {
|
|
1357
|
+
"model": "test",
|
|
1358
|
+
"messages": [
|
|
1359
|
+
{
|
|
1360
|
+
"role": "assistant",
|
|
1361
|
+
"content": [{"type": "text", "text": "I will continue."}],
|
|
1362
|
+
},
|
|
1363
|
+
{"role": "user", "content": "continue"},
|
|
1364
|
+
],
|
|
1365
|
+
"tools": [
|
|
1366
|
+
{
|
|
1367
|
+
"name": "Read",
|
|
1368
|
+
"description": "Read file",
|
|
1369
|
+
"input_schema": {"type": "object"},
|
|
1370
|
+
}
|
|
1371
|
+
],
|
|
1372
|
+
}
|
|
1373
|
+
|
|
1374
|
+
openai = proxy.build_openai_request(body, monitor)
|
|
1375
|
+
self.assertEqual(openai.get("tool_choice"), "auto")
|
|
1376
|
+
self.assertNotIn("grammar", openai)
|
|
1377
|
+
finally:
|
|
1378
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
|
|
1379
|
+
setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
|
|
1380
|
+
setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
|
|
1381
|
+
|
|
1272
1382
|
def test_no_tools_does_not_inject_agentic_system_message(self):
|
|
1273
1383
|
body = {
|
|
1274
1384
|
"model": "test",
|