@miller-tech/uap 1.15.8 → 1.15.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.15.8",
3
+ "version": "1.15.10",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -306,6 +306,44 @@ def _load_tool_call_grammar(path: str) -> str:
306
306
 
307
307
 
308
308
  TOOL_CALL_GBNF = _load_tool_call_grammar(PROXY_TOOL_CALL_GRAMMAR_PATH)
309
+ TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = True
310
+
311
+
312
+ def _is_grammar_tools_incompatibility(status_code: int, error_text: str) -> bool:
313
+ if status_code != 400:
314
+ return False
315
+ lowered = (error_text or "").lower()
316
+ return "custom grammar constraints" in lowered and "with tools" in lowered
317
+
318
+
319
+ def _maybe_disable_grammar_for_tools_error(
320
+ request_body: dict,
321
+ status_code: int,
322
+ error_text: str,
323
+ source: str,
324
+ ) -> bool:
325
+ global TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE
326
+
327
+ if "grammar" not in request_body or not request_body.get("tools"):
328
+ return False
329
+ if not _is_grammar_tools_incompatibility(status_code, error_text):
330
+ return False
331
+
332
+ request_body.pop("grammar", None)
333
+ if TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE:
334
+ TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = False
335
+ logger.warning(
336
+ "Tool-call grammar rejected by upstream for tool turns; "
337
+ "disabling grammar-on-tools for this proxy process (%s)",
338
+ source,
339
+ )
340
+ else:
341
+ logger.warning(
342
+ "Tool-call grammar already disabled for tool turns; retrying %s without grammar",
343
+ source,
344
+ )
345
+
346
+ return True
309
347
 
310
348
 
311
349
  def _apply_tool_call_grammar(
@@ -319,6 +357,9 @@ def _apply_tool_call_grammar(
319
357
  if not request_body.get("tools"):
320
358
  return
321
359
 
360
+ if not TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE:
361
+ return
362
+
322
363
  effective_tool_choice = (
323
364
  tool_choice if tool_choice is not None else request_body.get("tool_choice")
324
365
  )
@@ -938,7 +979,7 @@ async def lifespan(app: FastAPI):
938
979
  _resolve_prune_target_fraction() * 100,
939
980
  )
940
981
  logger.info(
941
- "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s path=%s)",
982
+ "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d) grammar=%s(required_only=%s loaded=%s tools_compatible=%s path=%s)",
942
983
  PROXY_MALFORMED_TOOL_GUARDRAIL,
943
984
  PROXY_MALFORMED_TOOL_STREAM_STRICT,
944
985
  PROXY_FORCE_NON_STREAM,
@@ -961,6 +1002,7 @@ async def lifespan(app: FastAPI):
961
1002
  PROXY_TOOL_CALL_GRAMMAR,
962
1003
  PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
963
1004
  bool(TOOL_CALL_GBNF),
1005
+ TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE,
964
1006
  PROXY_TOOL_CALL_GRAMMAR_PATH,
965
1007
  )
966
1008
 
@@ -2437,16 +2479,14 @@ def _classify_tool_response_issue(
2437
2479
  has_tool_calls = _openai_has_tool_calls(openai_resp)
2438
2480
  if not has_tool_calls:
2439
2481
  if required_tool_choice:
2440
- text = _openai_message_text(openai_resp).strip()
2441
- if not text or len(text) <= 48:
2442
- return ToolResponseIssue(
2443
- kind="required_tool_miss",
2444
- reason="required tool turn returned no tool calls",
2445
- retry_hint=(
2446
- "A tool call is mandatory for this turn. Emit exactly one valid tool call now "
2447
- "with a strict JSON object in `arguments`."
2448
- ),
2449
- )
2482
+ return ToolResponseIssue(
2483
+ kind="required_tool_miss",
2484
+ reason="required tool turn returned no tool calls",
2485
+ retry_hint=(
2486
+ "A tool call is mandatory for this turn. Emit exactly one valid tool call now "
2487
+ "with a strict JSON object in `arguments`."
2488
+ ),
2489
+ )
2450
2490
  return ToolResponseIssue()
2451
2491
 
2452
2492
  if not PROXY_TOOL_ARGS_PREFLIGHT:
@@ -2524,6 +2564,49 @@ def _looks_malformed_tool_payload(text: str) -> bool:
2524
2564
  return True
2525
2565
  if lowered.count("</parameter") >= 1 and lowered.count('{"description"') >= 1:
2526
2566
  return True
2567
+ if _looks_repetitive_policy_echo(text):
2568
+ return True
2569
+ return False
2570
+
2571
+
2572
+ def _looks_repetitive_policy_echo(text: str) -> bool:
2573
+ if not text:
2574
+ return False
2575
+
2576
+ lowered = text.lower()
2577
+ compact = re.sub(r"\s+", " ", lowered).strip()
2578
+ if not compact:
2579
+ return False
2580
+
2581
+ policy_phrase_markers = (
2582
+ "at least 2 new test cases",
2583
+ "tests must be in test/",
2584
+ "describe/it/expect using vitest",
2585
+ )
2586
+ if any(compact.count(marker) >= 4 for marker in policy_phrase_markers):
2587
+ return True
2588
+
2589
+ lines = [
2590
+ re.sub(r"\s+", " ", line.strip().lower())
2591
+ for line in text.splitlines()
2592
+ if line.strip()
2593
+ ]
2594
+ if lines:
2595
+ line_counts: dict[str, int] = {}
2596
+ for line in lines:
2597
+ if len(line) < 24:
2598
+ continue
2599
+ line_counts[line] = line_counts.get(line, 0) + 1
2600
+ if line_counts and max(line_counts.values()) >= 8:
2601
+ return True
2602
+
2603
+ repeated_phrase_match = re.search(
2604
+ r"((?:[a-z0-9_./-]+\s+){2,8}[a-z0-9_./-]+)(?:\s+\1){7,}",
2605
+ compact,
2606
+ )
2607
+ if repeated_phrase_match:
2608
+ return True
2609
+
2527
2610
  return False
2528
2611
 
2529
2612
 
@@ -3481,6 +3564,20 @@ async def messages(request: Request):
3481
3564
  headers={"Content-Type": "application/json"},
3482
3565
  )
3483
3566
 
3567
+ if strict_resp.status_code != 200:
3568
+ error_text = strict_resp.text[:1000]
3569
+ if _maybe_disable_grammar_for_tools_error(
3570
+ strict_body,
3571
+ strict_resp.status_code,
3572
+ error_text,
3573
+ "strict-stream",
3574
+ ):
3575
+ strict_resp = await client.post(
3576
+ f"{LLAMA_CPP_BASE}/chat/completions",
3577
+ json=strict_body,
3578
+ headers={"Content-Type": "application/json"},
3579
+ )
3580
+
3484
3581
  if strict_resp.status_code != 200:
3485
3582
  error_text = strict_resp.text[:1000]
3486
3583
  logger.error(
@@ -3621,6 +3718,35 @@ async def messages(request: Request):
3621
3718
  error_body = await resp.aread()
3622
3719
  await resp.aclose()
3623
3720
  error_text = error_body.decode("utf-8", errors="replace")[:1000]
3721
+ if _maybe_disable_grammar_for_tools_error(
3722
+ openai_body,
3723
+ resp.status_code,
3724
+ error_text,
3725
+ "stream",
3726
+ ):
3727
+ resp = await client.send(
3728
+ client.build_request(
3729
+ "POST",
3730
+ f"{LLAMA_CPP_BASE}/chat/completions",
3731
+ json=openai_body,
3732
+ headers={"Content-Type": "application/json"},
3733
+ ),
3734
+ stream=True,
3735
+ )
3736
+ if resp.status_code == 200:
3737
+ return StreamingResponse(
3738
+ stream_anthropic_response(resp, model, monitor, body),
3739
+ media_type="text/event-stream",
3740
+ headers={
3741
+ "Cache-Control": "no-cache",
3742
+ "Connection": "keep-alive",
3743
+ },
3744
+ )
3745
+
3746
+ error_body = await resp.aread()
3747
+ await resp.aclose()
3748
+ error_text = error_body.decode("utf-8", errors="replace")[:1000]
3749
+
3624
3750
  logger.error("Upstream HTTP %d: %s", resp.status_code, error_text)
3625
3751
 
3626
3752
  # Parse the error for a user-friendly message
@@ -3708,6 +3834,20 @@ async def messages(request: Request):
3708
3834
  headers={"Content-Type": "application/json"},
3709
3835
  )
3710
3836
 
3837
+ if resp.status_code != 200:
3838
+ error_text = resp.text[:1000]
3839
+ if _maybe_disable_grammar_for_tools_error(
3840
+ openai_body,
3841
+ resp.status_code,
3842
+ error_text,
3843
+ "non-stream",
3844
+ ):
3845
+ resp = await client.post(
3846
+ f"{LLAMA_CPP_BASE}/chat/completions",
3847
+ json=openai_body,
3848
+ headers={"Content-Type": "application/json"},
3849
+ )
3850
+
3711
3851
  # Option B: Handle non-streaming errors too
3712
3852
  if resp.status_code != 200:
3713
3853
  error_text = resp.text[:1000]
@@ -3851,6 +3991,7 @@ async def context_status(request: Request):
3851
3991
  "required_only": PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY,
3852
3992
  "path": PROXY_TOOL_CALL_GRAMMAR_PATH,
3853
3993
  "loaded": bool(TOOL_CALL_GBNF),
3994
+ "tools_compatible": TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE,
3854
3995
  },
3855
3996
  # Loop protection stats
3856
3997
  "loop_protection": {
@@ -518,10 +518,12 @@ class TestMalformedToolGuardrail(unittest.TestCase):
518
518
  old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
519
519
  old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
520
520
  old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
521
+ old_tools_compatible = getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE")
521
522
  try:
522
523
  setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
523
524
  setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
524
525
  setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
526
+ setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", True)
525
527
 
526
528
  openai_body = {
527
529
  "model": "test",
@@ -548,6 +550,56 @@ class TestMalformedToolGuardrail(unittest.TestCase):
548
550
  setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
549
551
  setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
550
552
  setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
553
+ setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", old_tools_compatible)
554
+
555
+ def test_apply_tool_call_grammar_skips_when_upstream_tools_are_incompatible(self):
556
+ old_enabled = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR")
557
+ old_required_only = getattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY")
558
+ old_grammar = getattr(proxy, "TOOL_CALL_GBNF")
559
+ old_tools_compatible = getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE")
560
+ try:
561
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", True)
562
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", True)
563
+ setattr(proxy, "TOOL_CALL_GBNF", 'root ::= "<tool_call>"')
564
+ setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", False)
565
+
566
+ request = {
567
+ "tools": [{"type": "function", "function": {"name": "Read"}}],
568
+ "tool_choice": "required",
569
+ }
570
+ proxy._apply_tool_call_grammar(request)
571
+
572
+ self.assertNotIn("grammar", request)
573
+ finally:
574
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR", old_enabled)
575
+ setattr(proxy, "PROXY_TOOL_CALL_GRAMMAR_REQUIRED_ONLY", old_required_only)
576
+ setattr(proxy, "TOOL_CALL_GBNF", old_grammar)
577
+ setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", old_tools_compatible)
578
+
579
+ def test_maybe_disable_grammar_for_tools_error_strips_grammar_and_disables_flag(
580
+ self,
581
+ ):
582
+ old_tools_compatible = getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE")
583
+ try:
584
+ setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", True)
585
+
586
+ request = {
587
+ "tools": [{"type": "function", "function": {"name": "Read"}}],
588
+ "grammar": 'root ::= "<tool_call>"',
589
+ }
590
+
591
+ retried = proxy._maybe_disable_grammar_for_tools_error(
592
+ request,
593
+ 400,
594
+ '{"error":{"message":"Cannot use custom grammar constraints with tools."}}',
595
+ "unit-test",
596
+ )
597
+
598
+ self.assertTrue(retried)
599
+ self.assertNotIn("grammar", request)
600
+ self.assertFalse(getattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE"))
601
+ finally:
602
+ setattr(proxy, "TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE", old_tools_compatible)
551
603
 
552
604
  def test_clean_guardrail_response_does_not_promise_future_tool_call(self):
553
605
  guardrail = proxy._build_clean_guardrail_openai_response(
@@ -753,6 +805,54 @@ class TestMalformedToolGuardrail(unittest.TestCase):
753
805
  )
754
806
  self.assertEqual(issue.kind, "required_tool_miss")
755
807
 
808
+ def test_required_tool_turn_with_long_text_without_tool_call_is_flagged(self):
809
+ openai_resp = {
810
+ "choices": [
811
+ {
812
+ "finish_reason": "stop",
813
+ "message": {
814
+ "content": (
815
+ "I reviewed the repository and here is a long explanation that still "
816
+ "does not include any valid tool call payload for this required turn."
817
+ ),
818
+ "tool_calls": [],
819
+ },
820
+ }
821
+ ]
822
+ }
823
+ anthropic_body = {
824
+ "tools": [{"name": "Edit", "input_schema": {"type": "object"}}],
825
+ }
826
+
827
+ issue = proxy._classify_tool_response_issue(
828
+ openai_resp, anthropic_body, required_tool_choice=True
829
+ )
830
+ self.assertEqual(issue.kind, "required_tool_miss")
831
+
832
+ def test_preflight_flags_repetitive_policy_echo_without_tool_call(self):
833
+ repeated = " (describe/it/expect using vitest" * 24
834
+ openai_resp = {
835
+ "choices": [
836
+ {
837
+ "finish_reason": "stop",
838
+ "message": {
839
+ "content": (
840
+ "- At least 2 new test cases before claiming done. "
841
+ "- Tests must be in test/ following existing patterns."
842
+ f"{repeated}"
843
+ ),
844
+ "tool_calls": [],
845
+ },
846
+ }
847
+ ]
848
+ }
849
+ anthropic_body = {
850
+ "tools": [{"name": "Read", "input_schema": {"type": "object"}}],
851
+ }
852
+
853
+ issue = proxy._classify_tool_response_issue(openai_resp, anthropic_body)
854
+ self.assertEqual(issue.kind, "malformed_payload")
855
+
756
856
  def test_markup_repair_sanitizes_tool_arguments(self):
757
857
  openai_resp = {
758
858
  "choices": [