@miller-tech/uap 1.20.6 → 1.20.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.6",
3
+ "version": "1.20.8",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -227,6 +227,9 @@ PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS = int(
227
227
  PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE = float(
228
228
  os.environ.get("PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE", "0")
229
229
  )
230
+ PROXY_TOOL_TURN_TEMPERATURE = float(
231
+ os.environ.get("PROXY_TOOL_TURN_TEMPERATURE", "0.3")
232
+ )
230
233
  PROXY_MALFORMED_TOOL_STREAM_STRICT = os.environ.get(
231
234
  "PROXY_MALFORMED_TOOL_STREAM_STRICT", "off"
232
235
  ).lower() not in {
@@ -1623,8 +1626,14 @@ _AGENTIC_SYSTEM_SUPPLEMENT_CLEAN = (
1623
1626
  "</agentic-protocol>"
1624
1627
  )
1625
1628
 
1629
+ _AGENTIC_SYSTEM_SUPPLEMENT_MINIMAL = (
1630
+ "\n\nUse tools for all actions. Respond with tool calls, not descriptions of what to do."
1631
+ )
1632
+
1626
1633
  if PROXY_AGENTIC_SUPPLEMENT_MODE == "legacy":
1627
1634
  _AGENTIC_SYSTEM_SUPPLEMENT = _AGENTIC_SYSTEM_SUPPLEMENT_LEGACY
1635
+ elif PROXY_AGENTIC_SUPPLEMENT_MODE == "minimal":
1636
+ _AGENTIC_SYSTEM_SUPPLEMENT = _AGENTIC_SYSTEM_SUPPLEMENT_MINIMAL
1628
1637
  elif PROXY_AGENTIC_SUPPLEMENT_MODE == "clean":
1629
1638
  _AGENTIC_SYSTEM_SUPPLEMENT = _AGENTIC_SYSTEM_SUPPLEMENT_CLEAN
1630
1639
  else:
@@ -2109,19 +2118,26 @@ def build_openai_request(
2109
2118
  has_tools = _has_tool_definitions(anthropic_body)
2110
2119
 
2111
2120
  # Inject agentic protocol instructions only for tool-enabled turns.
2121
+ # Use minimal supplement for qwen models to reduce prompt leak surface.
2112
2122
  if has_tools:
2123
+ model_name = anthropic_body.get("model", "").lower()
2124
+ supplement = (
2125
+ _AGENTIC_SYSTEM_SUPPLEMENT_MINIMAL
2126
+ if "qwen" in model_name and PROXY_AGENTIC_SUPPLEMENT_MODE != "legacy"
2127
+ else _AGENTIC_SYSTEM_SUPPLEMENT
2128
+ )
2113
2129
  if (
2114
2130
  openai_body["messages"]
2115
2131
  and openai_body["messages"][0].get("role") == "system"
2116
2132
  ):
2117
- openai_body["messages"][0]["content"] += _AGENTIC_SYSTEM_SUPPLEMENT
2133
+ openai_body["messages"][0]["content"] += supplement
2118
2134
  else:
2119
2135
  # No system message from the client; inject one.
2120
2136
  openai_body["messages"].insert(
2121
2137
  0,
2122
2138
  {
2123
2139
  "role": "system",
2124
- "content": _AGENTIC_SYSTEM_SUPPLEMENT.strip(),
2140
+ "content": supplement.strip(),
2125
2141
  },
2126
2142
  )
2127
2143
  if profile_prompt_suffix:
@@ -2208,6 +2224,17 @@ def build_openai_request(
2208
2224
  if "stop_sequences" in anthropic_body:
2209
2225
  openai_body["stop"] = anthropic_body["stop_sequences"]
2210
2226
 
2227
+ # Force controlled temperature for tool-call turns to reduce garbled output
2228
+ if has_tools:
2229
+ client_temp = openai_body.get("temperature")
2230
+ if client_temp is None or client_temp > PROXY_TOOL_TURN_TEMPERATURE:
2231
+ openai_body["temperature"] = PROXY_TOOL_TURN_TEMPERATURE
2232
+ logger.info(
2233
+ "TOOL TURN TEMP: forcing temperature=%.2f (was %s) for tool-enabled request",
2234
+ PROXY_TOOL_TURN_TEMPERATURE,
2235
+ client_temp,
2236
+ )
2237
+
2211
2238
  # Convert Anthropic tools to OpenAI function-calling tools
2212
2239
  if has_tools:
2213
2240
  openai_body["tools"] = _convert_anthropic_tools_to_openai(
@@ -2655,6 +2682,221 @@ def _extract_tool_calls_from_text(text: str) -> tuple[list[dict], str]:
2655
2682
  return extracted, remaining
2656
2683
 
2657
2684
 
2685
+ # Pattern: runaway closing braces like }}}}}
2686
+ _GARBLED_RUNAWAY_BRACES_RE = re.compile(r"\}{4,}")
2687
+ # Pattern: repetitive digit sequences like 000000 or 398859738398859738
2688
+ _GARBLED_REPETITIVE_DIGITS_RE = re.compile(r"(\d{3,})\1{2,}")
2689
+ # Pattern: long runs of zeros
2690
+ _GARBLED_ZEROS_RE = re.compile(r"0{8,}")
2691
+ # Pattern: extremely long unbroken digit strings (>30 digits)
2692
+ _GARBLED_LONG_DIGITS_RE = re.compile(r"\d{30,}")
2693
+
2694
+
2695
+ def _is_garbled_tool_arguments(arguments_str: str) -> bool:
2696
+ """Detect garbled/degenerate tool call arguments.
2697
+
2698
+ Returns True if the arguments string shows signs of degenerate generation:
2699
+ - Runaway closing braces (}}}}})
2700
+ - Repetitive digit patterns (000000, 398859738398859738)
2701
+ - Extremely long digit strings
2702
+ - Unbalanced braces suggesting truncated/corrupt JSON
2703
+ """
2704
+ if not arguments_str or arguments_str == "{}":
2705
+ return False
2706
+
2707
+ if _GARBLED_RUNAWAY_BRACES_RE.search(arguments_str):
2708
+ return True
2709
+ if _GARBLED_REPETITIVE_DIGITS_RE.search(arguments_str):
2710
+ return True
2711
+ if _GARBLED_ZEROS_RE.search(arguments_str):
2712
+ return True
2713
+ if _GARBLED_LONG_DIGITS_RE.search(arguments_str):
2714
+ return True
2715
+
2716
+ # Check brace balance — more than 2 unmatched braces suggests corruption
2717
+ open_count = arguments_str.count("{")
2718
+ close_count = arguments_str.count("}")
2719
+ if abs(open_count - close_count) > 2:
2720
+ return True
2721
+
2722
+ return False
2723
+
2724
+
2725
+ def _sanitize_garbled_tool_calls(openai_resp: dict) -> bool:
2726
+ """Check tool calls in an OpenAI response for garbled arguments.
2727
+
2728
+ If garbled arguments are detected, removes the affected tool calls
2729
+ and logs a warning. Returns True if any tool calls were removed.
2730
+ """
2731
+ choice = (openai_resp.get("choices") or [{}])[0]
2732
+ message = choice.get("message", {})
2733
+ tool_calls = message.get("tool_calls")
2734
+ if not tool_calls:
2735
+ return False
2736
+
2737
+ clean = []
2738
+ garbled_count = 0
2739
+ for tc in tool_calls:
2740
+ fn = tc.get("function", {})
2741
+ args_str = fn.get("arguments", "{}")
2742
+ if _is_garbled_tool_arguments(args_str):
2743
+ garbled_count += 1
2744
+ logger.warning(
2745
+ "GARBLED TOOL ARGS: name=%s args_preview=%.120s",
2746
+ fn.get("name", "?"),
2747
+ args_str,
2748
+ )
2749
+ else:
2750
+ clean.append(tc)
2751
+
2752
+ if garbled_count == 0:
2753
+ return False
2754
+
2755
+ if clean:
2756
+ message["tool_calls"] = clean
2757
+ else:
2758
+ # All tool calls were garbled — remove tool_calls entirely
2759
+ message.pop("tool_calls", None)
2760
+ choice["finish_reason"] = "stop"
2761
+
2762
+ logger.warning(
2763
+ "GARBLED TOOL ARGS: removed %d garbled tool call(s), %d clean remaining",
2764
+ garbled_count,
2765
+ len(clean),
2766
+ )
2767
+ return True
2768
+
2769
+
2770
+ # Distinctive phrases from the agentic system supplement that Qwen3.5 leaks
2771
+ # into tool call arguments. Keep lowercase for case-insensitive matching.
2772
+ _SYSTEM_PROMPT_LEAK_MARKERS = (
2773
+ "agentic-protocol",
2774
+ "agentic coding loop",
2775
+ "follow these rules",
2776
+ "function signatures within",
2777
+ "provided with function signatures",
2778
+ "you are provided with function",
2779
+ "call one or more functions",
2780
+ "xml tags:",
2781
+ "do not summarize the issue",
2782
+ "you must call a tool",
2783
+ "proceed immediately to make the fix",
2784
+ "do not ask for permission or confirmation",
2785
+ "do not give up after one failure",
2786
+ "emit a valid tool call object",
2787
+ "never output protocol fragments",
2788
+ "never emit literal tag artifacts",
2789
+ "use tools for concrete work",
2790
+ "stopping at analysis",
2791
+ )
2792
+
2793
+
2794
+ def _contains_system_prompt_leak(value) -> bool:
2795
+ """Check if any string leaf in *value* contains system prompt fragments."""
2796
+ for text in _iter_string_leaves(value):
2797
+ lowered = text.lower()
2798
+ if any(marker in lowered for marker in _SYSTEM_PROMPT_LEAK_MARKERS):
2799
+ return True
2800
+ return False
2801
+
2802
+
2803
+ def _find_earliest_leak_position(text: str) -> int | None:
2804
+ """Return the character index where the first system prompt leak starts, or None."""
2805
+ lowered = text.lower()
2806
+ earliest = None
2807
+ for marker in _SYSTEM_PROMPT_LEAK_MARKERS:
2808
+ idx = lowered.find(marker)
2809
+ if idx != -1 and (earliest is None or idx < earliest):
2810
+ earliest = idx
2811
+ return earliest
2812
+
2813
+
2814
+ def _repair_system_prompt_leak(openai_resp: dict) -> tuple[dict, int]:
2815
+ """Strip system prompt leak fragments from tool call argument values.
2816
+
2817
+ Truncates string values at the first detected leak marker.
2818
+ Returns (possibly-mutated response, repair count).
2819
+ """
2820
+ if not _openai_has_tool_calls(openai_resp):
2821
+ return openai_resp, 0
2822
+
2823
+ choice, message = _extract_openai_choice(openai_resp)
2824
+ tool_calls = message.get("tool_calls") or []
2825
+ if not tool_calls:
2826
+ return openai_resp, 0
2827
+
2828
+ repaired_tool_calls = []
2829
+ repaired_count = 0
2830
+
2831
+ for tool_call in tool_calls:
2832
+ fn = tool_call.get("function") if isinstance(tool_call, dict) else {}
2833
+ if not isinstance(fn, dict):
2834
+ fn = {}
2835
+
2836
+ raw_args = fn.get("arguments", "{}")
2837
+ if isinstance(raw_args, dict):
2838
+ parsed_args = dict(raw_args)
2839
+ else:
2840
+ try:
2841
+ parsed_args = json.loads(str(raw_args))
2842
+ except json.JSONDecodeError:
2843
+ repaired_tool_calls.append(tool_call)
2844
+ continue
2845
+
2846
+ if not isinstance(parsed_args, dict):
2847
+ repaired_tool_calls.append(tool_call)
2848
+ continue
2849
+
2850
+ changed = False
2851
+ cleaned_args = {}
2852
+ for key, val in parsed_args.items():
2853
+ if isinstance(val, str):
2854
+ pos = _find_earliest_leak_position(val)
2855
+ if pos is not None and pos > 0:
2856
+ cleaned_args[key] = val[:pos].rstrip()
2857
+ changed = True
2858
+ logger.warning(
2859
+ "PROMPT LEAK REPAIR: tool=%s field=%s truncated at pos=%d",
2860
+ fn.get("name", "?"),
2861
+ key,
2862
+ pos,
2863
+ )
2864
+ elif pos == 0:
2865
+ # Entire value is leaked content — clear it
2866
+ cleaned_args[key] = ""
2867
+ changed = True
2868
+ else:
2869
+ cleaned_args[key] = val
2870
+ else:
2871
+ cleaned_args[key] = val
2872
+
2873
+ if not changed:
2874
+ repaired_tool_calls.append(tool_call)
2875
+ continue
2876
+
2877
+ new_tool_call = dict(tool_call)
2878
+ new_fn = dict(fn)
2879
+ new_fn["arguments"] = json.dumps(cleaned_args, separators=(",", ":"))
2880
+ new_tool_call["function"] = new_fn
2881
+ repaired_tool_calls.append(new_tool_call)
2882
+ repaired_count += 1
2883
+
2884
+ if repaired_count > 0:
2885
+ repaired_response = dict(openai_resp)
2886
+ repaired_choice = dict(choice)
2887
+ repaired_message = dict(message)
2888
+ repaired_message["tool_calls"] = repaired_tool_calls
2889
+ repaired_choice["message"] = repaired_message
2890
+ repaired_response["choices"] = [repaired_choice]
2891
+ logger.warning(
2892
+ "PROMPT LEAK REPAIR: repaired %d tool call(s)",
2893
+ repaired_count,
2894
+ )
2895
+ return repaired_response, repaired_count
2896
+
2897
+ return openai_resp, 0
2898
+
2899
+
2658
2900
  def _tool_schema_map_from_anthropic_body(anthropic_body: dict) -> dict[str, dict]:
2659
2901
  schema_map: dict[str, dict] = {}
2660
2902
  for tool in anthropic_body.get("tools", []) or []:
@@ -3206,6 +3448,16 @@ def _validate_tool_call_arguments(
3206
3448
  ),
3207
3449
  )
3208
3450
 
3451
+ if _contains_system_prompt_leak(parsed):
3452
+ return ToolResponseIssue(
3453
+ kind="invalid_tool_args",
3454
+ reason=f"arguments for '{tool_name}' contain leaked system prompt fragments",
3455
+ retry_hint=(
3456
+ f"Emit exactly one `{tool_name}` tool call with only the requested arguments. "
3457
+ "Do not include any system instructions or protocol text in argument values."
3458
+ ),
3459
+ )
3460
+
3209
3461
  if _contains_required_placeholder(parsed):
3210
3462
  return ToolResponseIssue(
3211
3463
  kind="invalid_tool_args",
@@ -3761,7 +4013,8 @@ async def _apply_malformed_tool_guardrail(
3761
4013
  working_resp, anthropic_body
3762
4014
  )
3763
4015
  working_resp, bash_repairs = _repair_bash_command_artifacts(working_resp)
3764
- repair_count = markup_repairs + required_repairs + bash_repairs
4016
+ working_resp, leak_repairs = _repair_system_prompt_leak(working_resp)
4017
+ repair_count = markup_repairs + required_repairs + bash_repairs + leak_repairs
3765
4018
 
3766
4019
  required_tool_choice = openai_body.get("tool_choice") == "required"
3767
4020
  has_tool_calls = _openai_has_tool_calls(working_resp)
@@ -3850,8 +4103,11 @@ async def _apply_malformed_tool_guardrail(
3850
4103
  retry_working, retry_bash_repairs = _repair_bash_command_artifacts(
3851
4104
  retry_working
3852
4105
  )
4106
+ retry_working, retry_leak_repairs = _repair_system_prompt_leak(
4107
+ retry_working
4108
+ )
3853
4109
  retry_repairs = (
3854
- retry_markup_repairs + retry_required_repairs + retry_bash_repairs
4110
+ retry_markup_repairs + retry_required_repairs + retry_bash_repairs + retry_leak_repairs
3855
4111
  )
3856
4112
 
3857
4113
  working_resp = retry_working
@@ -4048,6 +4304,8 @@ def openai_to_anthropic_response(openai_resp: dict, model: str) -> dict:
4048
4304
  """Convert an OpenAI Chat Completions response to Anthropic Messages format."""
4049
4305
  # First: try to recover tool calls trapped in text XML tags
4050
4306
  _maybe_extract_text_tool_calls(openai_resp)
4307
+ # Second: strip garbled/degenerate tool call arguments
4308
+ _sanitize_garbled_tool_calls(openai_resp)
4051
4309
 
4052
4310
  choice = openai_resp.get("choices", [{}])[0]
4053
4311
  message = choice.get("message", {})
@@ -2925,6 +2925,216 @@ class TestToolCallXMLExtraction(unittest.TestCase):
2925
2925
  self.assertEqual(anthropic["stop_reason"], "tool_use")
2926
2926
 
2927
2927
 
2928
+ class TestGarbledToolArgDetection(unittest.TestCase):
2929
+ """Tests for detecting and sanitizing garbled tool call arguments."""
2930
+
2931
+ def test_runaway_braces_detected(self):
2932
+ self.assertTrue(proxy._is_garbled_tool_arguments('{"command":"echo test}}}}}'))
2933
+
2934
+ def test_repetitive_digits_detected(self):
2935
+ self.assertTrue(proxy._is_garbled_tool_arguments('{"command":"echo 398398398398398398"}'))
2936
+
2937
+ def test_long_zeros_detected(self):
2938
+ self.assertTrue(proxy._is_garbled_tool_arguments('{"command":"echo 00000000000"}'))
2939
+
2940
+ def test_extremely_long_digits_detected(self):
2941
+ self.assertTrue(proxy._is_garbled_tool_arguments('{"x":"' + "1" * 35 + '"}'))
2942
+
2943
+ def test_unbalanced_braces_detected(self):
2944
+ self.assertTrue(proxy._is_garbled_tool_arguments('{"a":{"b":{"c":"d"'))
2945
+
2946
+ def test_normal_args_not_flagged(self):
2947
+ self.assertFalse(proxy._is_garbled_tool_arguments('{"command":"ls -la /tmp"}'))
2948
+ self.assertFalse(proxy._is_garbled_tool_arguments('{"file_path":"/home/user/test.py"}'))
2949
+
2950
+ def test_empty_args_not_flagged(self):
2951
+ self.assertFalse(proxy._is_garbled_tool_arguments("{}"))
2952
+ self.assertFalse(proxy._is_garbled_tool_arguments(""))
2953
+
2954
+ def test_sanitize_removes_garbled_calls(self):
2955
+ openai_resp = {
2956
+ "choices": [{
2957
+ "finish_reason": "tool_calls",
2958
+ "message": {
2959
+ "tool_calls": [
2960
+ {"function": {"name": "Bash", "arguments": '{"command":"ls"}'}},
2961
+ {"function": {"name": "Bash", "arguments": '{"command":"echo test}}}}}}'}},
2962
+ ],
2963
+ },
2964
+ }]
2965
+ }
2966
+ removed = proxy._sanitize_garbled_tool_calls(openai_resp)
2967
+ self.assertTrue(removed)
2968
+ msg = openai_resp["choices"][0]["message"]
2969
+ self.assertEqual(len(msg["tool_calls"]), 1)
2970
+ self.assertEqual(msg["tool_calls"][0]["function"]["name"], "Bash")
2971
+
2972
+ def test_sanitize_all_garbled_removes_tool_calls(self):
2973
+ openai_resp = {
2974
+ "choices": [{
2975
+ "finish_reason": "tool_calls",
2976
+ "message": {
2977
+ "tool_calls": [
2978
+ {"function": {"name": "Bash", "arguments": '{"command":"echo }}}}}}'}},
2979
+ ],
2980
+ },
2981
+ }]
2982
+ }
2983
+ removed = proxy._sanitize_garbled_tool_calls(openai_resp)
2984
+ self.assertTrue(removed)
2985
+ msg = openai_resp["choices"][0]["message"]
2986
+ self.assertNotIn("tool_calls", msg)
2987
+ self.assertEqual(openai_resp["choices"][0]["finish_reason"], "stop")
2988
+
2989
+ def test_sanitize_clean_args_noop(self):
2990
+ openai_resp = {
2991
+ "choices": [{
2992
+ "finish_reason": "tool_calls",
2993
+ "message": {
2994
+ "tool_calls": [
2995
+ {"function": {"name": "Read", "arguments": '{"file_path":"/x.py"}'}},
2996
+ ],
2997
+ },
2998
+ }]
2999
+ }
3000
+ removed = proxy._sanitize_garbled_tool_calls(openai_resp)
3001
+ self.assertFalse(removed)
3002
+
3003
+
3004
+ class TestToolTurnTemperature(unittest.TestCase):
3005
+ """Tests for per-request temperature forcing on tool-enabled turns."""
3006
+
3007
+ def _make_monitor(self):
3008
+ return proxy.SessionMonitor()
3009
+
3010
+ def test_tool_turn_forces_temperature(self):
3011
+ body = {
3012
+ "model": "qwen3.5",
3013
+ "messages": [{"role": "user", "content": "hello"}],
3014
+ "tools": [{"name": "Bash", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}}}],
3015
+ "temperature": 0.8,
3016
+ }
3017
+ result = proxy.build_openai_request(body, self._make_monitor())
3018
+ self.assertLessEqual(result["temperature"], proxy.PROXY_TOOL_TURN_TEMPERATURE)
3019
+
3020
+ def test_no_tools_preserves_temperature(self):
3021
+ body = {
3022
+ "model": "qwen3.5",
3023
+ "messages": [{"role": "user", "content": "hello"}],
3024
+ "temperature": 0.8,
3025
+ }
3026
+ result = proxy.build_openai_request(body, self._make_monitor())
3027
+ self.assertEqual(result["temperature"], 0.8)
3028
+
3029
+
3030
+ class TestSystemPromptLeakDetection(unittest.TestCase):
3031
+ """Tests for detecting and repairing system prompt leaks in tool args."""
3032
+
3033
+ def test_detects_agentic_protocol_leak(self):
3034
+ self.assertTrue(proxy._contains_system_prompt_leak(
3035
+ {"command": "echo test call one or more functions to assist"}
3036
+ ))
3037
+
3038
+ def test_detects_follow_rules_leak(self):
3039
+ self.assertTrue(proxy._contains_system_prompt_leak(
3040
+ {"command": "ls Follow these rules: 1. Use tools"}
3041
+ ))
3042
+
3043
+ def test_detects_xml_tags_leak(self):
3044
+ self.assertTrue(proxy._contains_system_prompt_leak(
3045
+ {"command": "echo function signatures within <tools></tools> XML tags:"}
3046
+ ))
3047
+
3048
+ def test_clean_args_not_flagged(self):
3049
+ self.assertFalse(proxy._contains_system_prompt_leak(
3050
+ {"command": "echo hello world"}
3051
+ ))
3052
+ self.assertFalse(proxy._contains_system_prompt_leak(
3053
+ {"file_path": "/home/user/test.py"}
3054
+ ))
3055
+
3056
+ def test_find_earliest_leak_position(self):
3057
+ text = "echo test-1 call one or more functions to assist"
3058
+ pos = proxy._find_earliest_leak_position(text)
3059
+ self.assertIsNotNone(pos)
3060
+ self.assertEqual(text[:pos].strip(), "echo test-1")
3061
+
3062
+ def test_find_no_leak_returns_none(self):
3063
+ self.assertIsNone(proxy._find_earliest_leak_position("echo hello"))
3064
+
3065
+ def test_repair_truncates_at_leak(self):
3066
+ openai_resp = {
3067
+ "choices": [{
3068
+ "finish_reason": "tool_calls",
3069
+ "message": {
3070
+ "tool_calls": [{
3071
+ "function": {
3072
+ "name": "Bash",
3073
+ "arguments": '{"command":"echo test-1 call one or more functions to assist"}'
3074
+ }
3075
+ }],
3076
+ },
3077
+ }]
3078
+ }
3079
+ repaired, count = proxy._repair_system_prompt_leak(openai_resp)
3080
+ self.assertEqual(count, 1)
3081
+ fn = repaired["choices"][0]["message"]["tool_calls"][0]["function"]
3082
+ args = json.loads(fn["arguments"])
3083
+ self.assertEqual(args["command"], "echo test-1")
3084
+
3085
+ def test_repair_noop_on_clean_args(self):
3086
+ openai_resp = {
3087
+ "choices": [{
3088
+ "finish_reason": "tool_calls",
3089
+ "message": {
3090
+ "tool_calls": [{
3091
+ "function": {"name": "Bash", "arguments": '{"command":"ls -la"}'}
3092
+ }],
3093
+ },
3094
+ }]
3095
+ }
3096
+ repaired, count = proxy._repair_system_prompt_leak(openai_resp)
3097
+ self.assertEqual(count, 0)
3098
+
3099
+ def test_validate_rejects_leaked_args(self):
3100
+ result = proxy._validate_tool_call_arguments(
3101
+ "Bash",
3102
+ '{"command":"echo test follow these rules"}',
3103
+ {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]},
3104
+ {"Bash"},
3105
+ )
3106
+ self.assertTrue(result.has_issue())
3107
+ self.assertIn("leaked system prompt", result.reason)
3108
+
3109
+
3110
+ class TestMinimalSupplementForQwen(unittest.TestCase):
3111
+ """Tests for model-based supplement selection."""
3112
+
3113
+ def _make_monitor(self):
3114
+ return proxy.SessionMonitor()
3115
+
3116
+ def test_qwen_model_gets_minimal_supplement(self):
3117
+ body = {
3118
+ "model": "qwen3.5",
3119
+ "messages": [{"role": "user", "content": "hello"}],
3120
+ "tools": [{"name": "Bash", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}}}],
3121
+ }
3122
+ result = proxy.build_openai_request(body, self._make_monitor())
3123
+ system_msg = result["messages"][0]["content"]
3124
+ self.assertNotIn("agentic-protocol", system_msg)
3125
+ self.assertIn("Use tools for all actions", system_msg)
3126
+
3127
+ def test_non_qwen_model_gets_full_supplement(self):
3128
+ body = {
3129
+ "model": "claude-3",
3130
+ "messages": [{"role": "user", "content": "hello"}],
3131
+ "tools": [{"name": "Bash", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}}}],
3132
+ }
3133
+ result = proxy.build_openai_request(body, self._make_monitor())
3134
+ system_msg = result["messages"][0]["content"]
3135
+ self.assertIn("agentic-protocol", system_msg)
3136
+
3137
+
2928
3138
  if __name__ == "__main__":
2929
3139
  unittest.main()
2930
3140