@miller-tech/uap 1.15.8 → 1.15.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.15.8",
3
+ "version": "1.15.9",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -306,6 +306,44 @@ def _load_tool_call_grammar(path: str) -> str:
306
306
 
307
307
 
308
308
  TOOL_CALL_GBNF = _load_tool_call_grammar(PROXY_TOOL_CALL_GRAMMAR_PATH)
309
+ TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = True
310
+
311
+
312
+ def _is_grammar_tools_incompatibility(status_code: int, error_text: str) -> bool:
313
+ if status_code != 400:
314
+ return False
315
+ lowered = (error_text or "").lower()
316
+ return "custom grammar constraints" in lowered and "with tools" in lowered
317
+
318
+
319
+ def _maybe_disable_grammar_for_tools_error(
320
+ request_body: dict,
321
+ status_code: int,
322
+ error_text: str,
323
+ source: str,
324
+ ) -> bool:
325
+ global TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE
326
+
327
+ if "grammar" not in request_body or not request_body.get("tools"):
328
+ return False
329
+ if not _is_grammar_tools_incompatibility(status_code, error_text):
330
+ return False
331
+
332
+ request_body.pop("grammar", None)
333
+ if TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE:
334
+ TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = False
335
+ logger.warning(
336
+ "Tool-call grammar rejected by upstream for tool turns; "
337
+ "disabling grammar-on-tools for this proxy process (%s)",
338
+ source,
339
+ )
340
+ else:
341
+ logger.warning(
342
+ "Tool-call grammar already disabled for tool turns; retrying %s without grammar",
343
+ source,
344
+ )
345
+
346
+ return True
309
347
 
310
348
 
311
349
  def _apply_tool_call_grammar(
@@ -319,6 +357,9 @@ def _apply_tool_call_grammar(
319
357
  if not request_body.get("tools"):
320
358
  return
321
359
 
360
+ if not TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE:
361
+ return
362
+
322
363
  effective_tool_choice = (
323
364
  tool_choice if tool_choice is not None else request_body.get("tool_choice")
324
365
  )
@@ -938,7 +979,7 @@ async def lifespan(app: FastAPI):
938
979
  _resolve_prune_target_fraction() * 100,
939
980
  )
940
981
  logger.info(
941
- "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s path=%s)",
982
+ "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s tools_compatible=%s path=%s)",
942
983
  PROXY_MALFORMED_TOOL_GUARDRAIL,
943
984
  PROXY_MALFORMED_TOOL_STREAM_STRICT,
944
985
  PROXY_FORCE_NON_STREAM,
@@ -961,6 +1002,7 @@ async def lifespan(app: FastAPI):
961
1002
  PROXY_TOOL_CALL_GRAMMAR,
962
1003
  PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
963
1004
  bool(TOOL_CALL_GBNF),
1005
+ TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE,
964
1006
  PROXY_TOOL_CALL_GRAMMAR_PATH,
965
1007
  )
966
1008
 
@@ -3481,6 +3523,20 @@ async def messages(request: Request):
3481
3523
  headers={"Content-Type": "application/json"},
3482
3524
  )
3483
3525
 
3526
+ if strict_resp.status_code != 200:
3527
+ error_text = strict_resp.text[:1000]
3528
+ if _maybe_disable_grammar_for_tools_error(
3529
+ strict_body,
3530
+ strict_resp.status_code,
3531
+ error_text,
3532
+ "strict-stream",
3533
+ ):
3534
+ strict_resp = await client.post(
3535
+ f"{LLAMA_CPP_BASE}/chat/completions",
3536
+ json=strict_body,
3537
+ headers={"Content-Type": "application/json"},
3538
+ )
3539
+
3484
3540
  if strict_resp.status_code != 200:
3485
3541
  error_text = strict_resp.text[:1000]
3486
3542
  logger.error(
@@ -3621,6 +3677,35 @@ async def messages(request: Request):
3621
3677
  error_body = await resp.aread()
3622
3678
  await resp.aclose()
3623
3679
  error_text = error_body.decode("utf-8", errors="replace")[:1000]
3680
+ if _maybe_disable_grammar_for_tools_error(
3681
+ openai_body,
3682
+ resp.status_code,
3683
+ error_text,
3684
+ "stream",
3685
+ ):
3686
+ resp = await client.send(
3687
+ client.build_request(
3688
+ "POST",
3689
+ f"{LLAMA_CPP_BASE}/chat/completions",
3690
+ json=openai_body,
3691
+ headers={"Content-Type": "application/json"},
3692
+ ),
3693
+ stream=True,
3694
+ )
3695
+ if resp.status_code == 200:
3696
+ return StreamingResponse(
3697
+ stream_anthropic_response(resp, model, monitor, body),
3698
+ media_type="text/event-stream",
3699
+ headers={
3700
+ "Cache-Control": "no-cache",
3701
+ "Connection": "keep-alive",
3702
+ },
3703
+ )
3704
+
3705
+ error_body = await resp.aread()
3706
+ await resp.aclose()
3707
+ error_text = error_body.decode("utf-8", errors="replace")[:1000]
3708
+
3624
3709
  logger.error("Upstream HTTP %d: %s", resp.status_code, error_text)
3625
3710
 
3626
3711
  # Parse the error for a user-friendly message
@@ -3708,6 +3793,20 @@ async def messages(request: Request):
3708
3793
  headers={"Content-Type": "application/json"},
3709
3794
  )
3710
3795
 
3796
+ if resp.status_code != 200:
3797
+ error_text = resp.text[:1000]
3798
+ if _maybe_disable_grammar_for_tools_error(
3799
+ openai_body,
3800
+ resp.status_code,
3801
+ error_text,
3802
+ "non-stream",
3803
+ ):
3804
+ resp = await client.post(
3805
+ f"{LLAMA_CPP_BASE}/chat/completions",
3806
+ json=openai_body,
3807
+ headers={"Content-Type": "application/json"},
3808
+ )
3809
+
3711
3810
  # Option B: Handle non-streaming errors too
3712
3811
  if resp.status_code != 200:
3713
3812
  error_text = resp.text[:1000]
@@ -3851,6 +3950,7 @@ async def context_status(request: Request):
3851
3950
  "required_only": PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
3852
3951
  "path": PROXY_TOOL_CALL_GRAMMAR_PATH,
3853
3952
  "loaded": bool(TOOL_CALL_GBNF),
3953
+ "tools_compatible": TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE,
3854
3954
  },
3855
3955
  # Loop protection stats
3856
3956
  "loop_protection": {
@@ -518,10 +518,12 @@ class TestMalformedToolGuardrail(unittest.TestCase):
518
518
  old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
519
519
  old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
520
520
  old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
521
+ old_tools_compatible = getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE")
521
522
  try:
522
523
  setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
523
524
  setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
524
525
  setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
526
+ setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", True)
525
527
 
526
528
  openai_body = {
527
529
  "model": "test",
@@ -548,6 +550,56 @@ class TestMalformedToolGuardrail(unittest.TestCase):
548
550
  setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
549
551
  setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
550
552
  setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
553
+ setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", old_tools_compatible)
554
+
555
+ def test_apply_tool_call_grammar_skips_when_upstream_tools_are_incompatible(self):
556
+ old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
557
+ old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
558
+ old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
559
+ old_tools_compatible = getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE")
560
+ try:
561
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
562
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
563
+ setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
564
+ setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", False)
565
+
566
+ request = {
567
+ "tools": [{"type": "function", "function": {"name": "Read"}}],
568
+ "tool_choice": "required",
569
+ }
570
+ proxy._apply_tool_call_grammar(request)
571
+
572
+ self.assertNotIn("grammar", request)
573
+ finally:
574
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
575
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
576
+ setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
577
+ setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", old_tools_compatible)
578
+
579
+ def test_maybe_disable_grammar_for_tools_error_strips_grammar_and_disables_flag(
580
+ self,
581
+ ):
582
+ old_tools_compatible = getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE")
583
+ try:
584
+ setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", True)
585
+
586
+ request = {
587
+ "tools": [{"type": "function", "function": {"name": "Read"}}],
588
+ "grammar": 'root ::= "<tool_call>"',
589
+ }
590
+
591
+ retried = proxy._maybe_disable_grammar_for_tools_error(
592
+ request,
593
+ 400,
594
+ '{"error":{"message":"Cannot use custom grammar constraints with tools."}}',
595
+ "unit-test",
596
+ )
597
+
598
+ self.assertTrue(retried)
599
+ self.assertNotIn("grammar", request)
600
+ self.assertFalse(getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE"))
601
+ finally:
602
+ setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", old_tools_compatible)
551
603
 
552
604
  def test_clean_guardrail_response_does_not_promise_future_tool_call(self):
553
605
  guardrail = proxy._build_clean_guardrail_openai_response(