@miller-tech/uap 1.20.7 → 1.20.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -51,7 +51,7 @@ Configuration (Environment Variables)
|
|
|
51
51
|
|
|
52
52
|
PROXY_CONTEXT_PRUNE_THRESHOLD Fraction of context window at which
|
|
53
53
|
conversation pruning activates (0.0-1.0)
|
|
54
|
-
Default: 0.
|
|
54
|
+
Default: 0.85
|
|
55
55
|
|
|
56
56
|
Usage
|
|
57
57
|
-----
|
|
@@ -113,10 +113,10 @@ PROXY_UPSTREAM_RETRY_DELAY_SECS = float(os.environ.get("PROXY_UPSTREAM_RETRY_DEL
|
|
|
113
113
|
PROXY_MAX_CONNECTIONS = int(os.environ.get("PROXY_MAX_CONNECTIONS", "20"))
|
|
114
114
|
PROXY_CONTEXT_WINDOW = int(os.environ.get("PROXY_CONTEXT_WINDOW", "0"))
|
|
115
115
|
PROXY_CONTEXT_PRUNE_THRESHOLD = float(
|
|
116
|
-
os.environ.get("PROXY_CONTEXT_PRUNE_THRESHOLD", "0.
|
|
116
|
+
os.environ.get("PROXY_CONTEXT_PRUNE_THRESHOLD", "0.85")
|
|
117
117
|
)
|
|
118
118
|
PROXY_CONTEXT_PRUNE_TARGET_FRACTION = float(
|
|
119
|
-
os.environ.get("PROXY_CONTEXT_PRUNE_TARGET_FRACTION", "0.
|
|
119
|
+
os.environ.get("PROXY_CONTEXT_PRUNE_TARGET_FRACTION", "0.50")
|
|
120
120
|
)
|
|
121
121
|
PROXY_LOOP_BREAKER = os.environ.get("PROXY_LOOP_BREAKER", "on").lower() not in {
|
|
122
122
|
"0",
|
|
@@ -277,6 +277,12 @@ PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS = int(
|
|
|
277
277
|
PROXY_FORCED_TOOL_DAMPENER_REJECTIONS = int(
|
|
278
278
|
os.environ.get("PROXY_FORCED_TOOL_DAMPENER_REJECTIONS", "2")
|
|
279
279
|
)
|
|
280
|
+
PROXY_TOOL_STARVATION_THRESHOLD = int(
|
|
281
|
+
os.environ.get("PROXY_TOOL_STARVATION_THRESHOLD", "5")
|
|
282
|
+
)
|
|
283
|
+
PROXY_CONTEXT_HIGH_RELAXATION_THRESHOLD = float(
|
|
284
|
+
os.environ.get("PROXY_CONTEXT_HIGH_RELAXATION_THRESHOLD", "0.70")
|
|
285
|
+
)
|
|
280
286
|
PROXY_SESSION_CONTAMINATION_BREAKER = os.environ.get(
|
|
281
287
|
"PROXY_SESSION_CONTAMINATION_BREAKER", "on"
|
|
282
288
|
).lower() not in {
|
|
@@ -609,6 +615,7 @@ class SessionMonitor:
|
|
|
609
615
|
loop_warnings_emitted: int = 0 # How many loop warnings sent to the model
|
|
610
616
|
no_progress_streak: int = 0 # Forced tool turns without new tool_result
|
|
611
617
|
unexpected_end_turn_count: int = 0 # end_turn without tool_use in active loop
|
|
618
|
+
tool_starvation_streak: int = 0 # Consecutive forced turns with no tool_calls produced
|
|
612
619
|
malformed_tool_streak: int = 0 # consecutive malformed pseudo tool payloads
|
|
613
620
|
invalid_tool_call_streak: int = 0 # consecutive invalid tool arg payloads
|
|
614
621
|
required_tool_miss_streak: int = 0 # required tool turns with no tool call
|
|
@@ -1626,8 +1633,14 @@ _AGENTIC_SYSTEM_SUPPLEMENT_CLEAN = (
|
|
|
1626
1633
|
"</agentic-protocol>"
|
|
1627
1634
|
)
|
|
1628
1635
|
|
|
1636
|
+
_AGENTIC_SYSTEM_SUPPLEMENT_MINIMAL = (
|
|
1637
|
+
"\n\nUse tools for all actions. Respond with tool calls, not descriptions of what to do."
|
|
1638
|
+
)
|
|
1639
|
+
|
|
1629
1640
|
if PROXY_AGENTIC_SUPPLEMENT_MODE == "legacy":
|
|
1630
1641
|
_AGENTIC_SYSTEM_SUPPLEMENT = _AGENTIC_SYSTEM_SUPPLEMENT_LEGACY
|
|
1642
|
+
elif PROXY_AGENTIC_SUPPLEMENT_MODE == "minimal":
|
|
1643
|
+
_AGENTIC_SYSTEM_SUPPLEMENT = _AGENTIC_SYSTEM_SUPPLEMENT_MINIMAL
|
|
1631
1644
|
elif PROXY_AGENTIC_SUPPLEMENT_MODE == "clean":
|
|
1632
1645
|
_AGENTIC_SYSTEM_SUPPLEMENT = _AGENTIC_SYSTEM_SUPPLEMENT_CLEAN
|
|
1633
1646
|
else:
|
|
@@ -2112,19 +2125,26 @@ def build_openai_request(
|
|
|
2112
2125
|
has_tools = _has_tool_definitions(anthropic_body)
|
|
2113
2126
|
|
|
2114
2127
|
# Inject agentic protocol instructions only for tool-enabled turns.
|
|
2128
|
+
# Use minimal supplement for qwen models to reduce prompt leak surface.
|
|
2115
2129
|
if has_tools:
|
|
2130
|
+
model_name = anthropic_body.get("model", "").lower()
|
|
2131
|
+
supplement = (
|
|
2132
|
+
_AGENTIC_SYSTEM_SUPPLEMENT_MINIMAL
|
|
2133
|
+
if "qwen" in model_name and PROXY_AGENTIC_SUPPLEMENT_MODE != "legacy"
|
|
2134
|
+
else _AGENTIC_SYSTEM_SUPPLEMENT
|
|
2135
|
+
)
|
|
2116
2136
|
if (
|
|
2117
2137
|
openai_body["messages"]
|
|
2118
2138
|
and openai_body["messages"][0].get("role") == "system"
|
|
2119
2139
|
):
|
|
2120
|
-
openai_body["messages"][0]["content"] +=
|
|
2140
|
+
openai_body["messages"][0]["content"] += supplement
|
|
2121
2141
|
else:
|
|
2122
2142
|
# No system message from the client; inject one.
|
|
2123
2143
|
openai_body["messages"].insert(
|
|
2124
2144
|
0,
|
|
2125
2145
|
{
|
|
2126
2146
|
"role": "system",
|
|
2127
|
-
"content":
|
|
2147
|
+
"content": supplement.strip(),
|
|
2128
2148
|
},
|
|
2129
2149
|
)
|
|
2130
2150
|
if profile_prompt_suffix:
|
|
@@ -2266,6 +2286,29 @@ def build_openai_request(
|
|
|
2266
2286
|
last_user_has_tool_result,
|
|
2267
2287
|
)
|
|
2268
2288
|
|
|
2289
|
+
# TOOL STARVATION BREAKER: if model repeatedly fails to produce tool
|
|
2290
|
+
# calls despite required, strip tools to let it generate text and break
|
|
2291
|
+
# the forcing loop.
|
|
2292
|
+
if (
|
|
2293
|
+
monitor.consecutive_forced_count >= PROXY_TOOL_STARVATION_THRESHOLD
|
|
2294
|
+
and _last_assistant_was_text_only(anthropic_body)
|
|
2295
|
+
):
|
|
2296
|
+
openai_body.pop("tool_choice", None)
|
|
2297
|
+
openai_body.pop("tools", None)
|
|
2298
|
+
monitor.tool_starvation_streak += 1
|
|
2299
|
+
monitor.consecutive_forced_count = 0
|
|
2300
|
+
monitor.no_progress_streak = 0
|
|
2301
|
+
monitor.reset_tool_turn_state(reason="tool_starvation_breaker")
|
|
2302
|
+
logger.warning(
|
|
2303
|
+
"TOOL STARVATION BREAKER: stripped tools after %d forced turns with no tool output (starvation_streak=%d)",
|
|
2304
|
+
PROXY_TOOL_STARVATION_THRESHOLD,
|
|
2305
|
+
monitor.tool_starvation_streak,
|
|
2306
|
+
)
|
|
2307
|
+
# Skip all further tool_choice logic — no tools this turn
|
|
2308
|
+
if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
|
|
2309
|
+
openai_body["enable_thinking"] = False
|
|
2310
|
+
return openai_body
|
|
2311
|
+
|
|
2269
2312
|
# Check if forced-tool dampener or loop breaker should override tool_choice
|
|
2270
2313
|
if monitor.consume_forced_auto_turn():
|
|
2271
2314
|
openai_body["tool_choice"] = "auto"
|
|
@@ -2345,6 +2388,23 @@ def build_openai_request(
|
|
|
2345
2388
|
if not has_tool_results:
|
|
2346
2389
|
monitor.reset_tool_turn_state(reason="no_tool_results")
|
|
2347
2390
|
|
|
2391
|
+
# CONTEXT-AWARE RELAXATION: when context utilization is high and
|
|
2392
|
+
# tool_choice was forced to required, relax to auto to let the model
|
|
2393
|
+
# emit shorter text responses instead of consuming more tokens.
|
|
2394
|
+
if openai_body.get("tool_choice") == "required":
|
|
2395
|
+
ctx_utilization = (
|
|
2396
|
+
monitor.last_input_tokens / monitor.context_window
|
|
2397
|
+
if monitor.context_window > 0
|
|
2398
|
+
else 0.0
|
|
2399
|
+
)
|
|
2400
|
+
if ctx_utilization >= PROXY_CONTEXT_HIGH_RELAXATION_THRESHOLD:
|
|
2401
|
+
openai_body["tool_choice"] = "auto"
|
|
2402
|
+
logger.warning(
|
|
2403
|
+
"CONTEXT-AWARE RELAXATION: tool_choice=auto (utilization=%.1f%% >= %.0f%% threshold)",
|
|
2404
|
+
ctx_utilization * 100,
|
|
2405
|
+
PROXY_CONTEXT_HIGH_RELAXATION_THRESHOLD * 100,
|
|
2406
|
+
)
|
|
2407
|
+
|
|
2348
2408
|
if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
|
|
2349
2409
|
openai_body["enable_thinking"] = False
|
|
2350
2410
|
logger.info(
|
|
@@ -2754,6 +2814,145 @@ def _sanitize_garbled_tool_calls(openai_resp: dict) -> bool:
|
|
|
2754
2814
|
return True
|
|
2755
2815
|
|
|
2756
2816
|
|
|
2817
|
+
# Distinctive phrases from the agentic system supplement that Qwen3.5 leaks
|
|
2818
|
+
# into tool call arguments. Keep lowercase for case-insensitive matching.
|
|
2819
|
+
_SYSTEM_PROMPT_LEAK_MARKERS = (
|
|
2820
|
+
"agentic-protocol",
|
|
2821
|
+
"agentic coding loop",
|
|
2822
|
+
"follow these rules",
|
|
2823
|
+
"function signatures within",
|
|
2824
|
+
"provided with function signatures",
|
|
2825
|
+
"you are provided with function",
|
|
2826
|
+
"call one or more functions",
|
|
2827
|
+
"xml tags:",
|
|
2828
|
+
"do not summarize the issue",
|
|
2829
|
+
"you must call a tool",
|
|
2830
|
+
"proceed immediately to make the fix",
|
|
2831
|
+
"do not ask for permission or confirmation",
|
|
2832
|
+
"do not give up after one failure",
|
|
2833
|
+
"emit a valid tool call object",
|
|
2834
|
+
"never output protocol fragments",
|
|
2835
|
+
"never emit literal tag artifacts",
|
|
2836
|
+
"use tools for concrete work",
|
|
2837
|
+
"stopping at analysis",
|
|
2838
|
+
# Client system prompt phrases that also leak into tool args
|
|
2839
|
+
"only produce a final text response without tool calls",
|
|
2840
|
+
"the entire task is fully complete",
|
|
2841
|
+
"always use tools to read, edit, write",
|
|
2842
|
+
"after reading files and identifying an issue",
|
|
2843
|
+
"do not output raw protocol tags",
|
|
2844
|
+
"valid tool call with strict json",
|
|
2845
|
+
"return exactly one valid tool call",
|
|
2846
|
+
"invalid tool call format",
|
|
2847
|
+
)
|
|
2848
|
+
|
|
2849
|
+
|
|
2850
|
+
def _contains_system_prompt_leak(value) -> bool:
|
|
2851
|
+
"""Check if any string leaf in *value* contains system prompt fragments."""
|
|
2852
|
+
for text in _iter_string_leaves(value):
|
|
2853
|
+
lowered = text.lower()
|
|
2854
|
+
if any(marker in lowered for marker in _SYSTEM_PROMPT_LEAK_MARKERS):
|
|
2855
|
+
return True
|
|
2856
|
+
return False
|
|
2857
|
+
|
|
2858
|
+
|
|
2859
|
+
def _find_earliest_leak_position(text: str) -> int | None:
|
|
2860
|
+
"""Return the character index where the first system prompt leak starts, or None."""
|
|
2861
|
+
lowered = text.lower()
|
|
2862
|
+
earliest = None
|
|
2863
|
+
for marker in _SYSTEM_PROMPT_LEAK_MARKERS:
|
|
2864
|
+
idx = lowered.find(marker)
|
|
2865
|
+
if idx != -1 and (earliest is None or idx < earliest):
|
|
2866
|
+
earliest = idx
|
|
2867
|
+
return earliest
|
|
2868
|
+
|
|
2869
|
+
|
|
2870
|
+
def _repair_system_prompt_leak(openai_resp: dict) -> tuple[dict, int]:
|
|
2871
|
+
"""Strip system prompt leak fragments from tool call argument values.
|
|
2872
|
+
|
|
2873
|
+
Truncates string values at the first detected leak marker.
|
|
2874
|
+
Returns (possibly-mutated response, repair count).
|
|
2875
|
+
"""
|
|
2876
|
+
if not _openai_has_tool_calls(openai_resp):
|
|
2877
|
+
return openai_resp, 0
|
|
2878
|
+
|
|
2879
|
+
choice, message = _extract_openai_choice(openai_resp)
|
|
2880
|
+
tool_calls = message.get("tool_calls") or []
|
|
2881
|
+
if not tool_calls:
|
|
2882
|
+
return openai_resp, 0
|
|
2883
|
+
|
|
2884
|
+
repaired_tool_calls = []
|
|
2885
|
+
repaired_count = 0
|
|
2886
|
+
|
|
2887
|
+
for tool_call in tool_calls:
|
|
2888
|
+
fn = tool_call.get("function") if isinstance(tool_call, dict) else {}
|
|
2889
|
+
if not isinstance(fn, dict):
|
|
2890
|
+
fn = {}
|
|
2891
|
+
|
|
2892
|
+
raw_args = fn.get("arguments", "{}")
|
|
2893
|
+
if isinstance(raw_args, dict):
|
|
2894
|
+
parsed_args = dict(raw_args)
|
|
2895
|
+
else:
|
|
2896
|
+
try:
|
|
2897
|
+
parsed_args = json.loads(str(raw_args))
|
|
2898
|
+
except json.JSONDecodeError:
|
|
2899
|
+
repaired_tool_calls.append(tool_call)
|
|
2900
|
+
continue
|
|
2901
|
+
|
|
2902
|
+
if not isinstance(parsed_args, dict):
|
|
2903
|
+
repaired_tool_calls.append(tool_call)
|
|
2904
|
+
continue
|
|
2905
|
+
|
|
2906
|
+
changed = False
|
|
2907
|
+
cleaned_args = {}
|
|
2908
|
+
for key, val in parsed_args.items():
|
|
2909
|
+
if isinstance(val, str):
|
|
2910
|
+
pos = _find_earliest_leak_position(val)
|
|
2911
|
+
if pos is not None and pos > 0:
|
|
2912
|
+
cleaned_args[key] = val[:pos].rstrip()
|
|
2913
|
+
changed = True
|
|
2914
|
+
logger.warning(
|
|
2915
|
+
"PROMPT LEAK REPAIR: tool=%s field=%s truncated at pos=%d",
|
|
2916
|
+
fn.get("name", "?"),
|
|
2917
|
+
key,
|
|
2918
|
+
pos,
|
|
2919
|
+
)
|
|
2920
|
+
elif pos == 0:
|
|
2921
|
+
# Entire value is leaked content — clear it
|
|
2922
|
+
cleaned_args[key] = ""
|
|
2923
|
+
changed = True
|
|
2924
|
+
else:
|
|
2925
|
+
cleaned_args[key] = val
|
|
2926
|
+
else:
|
|
2927
|
+
cleaned_args[key] = val
|
|
2928
|
+
|
|
2929
|
+
if not changed:
|
|
2930
|
+
repaired_tool_calls.append(tool_call)
|
|
2931
|
+
continue
|
|
2932
|
+
|
|
2933
|
+
new_tool_call = dict(tool_call)
|
|
2934
|
+
new_fn = dict(fn)
|
|
2935
|
+
new_fn["arguments"] = json.dumps(cleaned_args, separators=(",", ":"))
|
|
2936
|
+
new_tool_call["function"] = new_fn
|
|
2937
|
+
repaired_tool_calls.append(new_tool_call)
|
|
2938
|
+
repaired_count += 1
|
|
2939
|
+
|
|
2940
|
+
if repaired_count > 0:
|
|
2941
|
+
repaired_response = dict(openai_resp)
|
|
2942
|
+
repaired_choice = dict(choice)
|
|
2943
|
+
repaired_message = dict(message)
|
|
2944
|
+
repaired_message["tool_calls"] = repaired_tool_calls
|
|
2945
|
+
repaired_choice["message"] = repaired_message
|
|
2946
|
+
repaired_response["choices"] = [repaired_choice]
|
|
2947
|
+
logger.warning(
|
|
2948
|
+
"PROMPT LEAK REPAIR: repaired %d tool call(s)",
|
|
2949
|
+
repaired_count,
|
|
2950
|
+
)
|
|
2951
|
+
return repaired_response, repaired_count
|
|
2952
|
+
|
|
2953
|
+
return openai_resp, 0
|
|
2954
|
+
|
|
2955
|
+
|
|
2757
2956
|
def _tool_schema_map_from_anthropic_body(anthropic_body: dict) -> dict[str, dict]:
|
|
2758
2957
|
schema_map: dict[str, dict] = {}
|
|
2759
2958
|
for tool in anthropic_body.get("tools", []) or []:
|
|
@@ -3305,6 +3504,16 @@ def _validate_tool_call_arguments(
|
|
|
3305
3504
|
),
|
|
3306
3505
|
)
|
|
3307
3506
|
|
|
3507
|
+
if _contains_system_prompt_leak(parsed):
|
|
3508
|
+
return ToolResponseIssue(
|
|
3509
|
+
kind="invalid_tool_args",
|
|
3510
|
+
reason=f"arguments for '{tool_name}' contain leaked system prompt fragments",
|
|
3511
|
+
retry_hint=(
|
|
3512
|
+
f"Emit exactly one `{tool_name}` tool call with only the requested arguments. "
|
|
3513
|
+
"Do not include any system instructions or protocol text in argument values."
|
|
3514
|
+
),
|
|
3515
|
+
)
|
|
3516
|
+
|
|
3308
3517
|
if _contains_required_placeholder(parsed):
|
|
3309
3518
|
return ToolResponseIssue(
|
|
3310
3519
|
kind="invalid_tool_args",
|
|
@@ -3860,7 +4069,8 @@ async def _apply_malformed_tool_guardrail(
|
|
|
3860
4069
|
working_resp, anthropic_body
|
|
3861
4070
|
)
|
|
3862
4071
|
working_resp, bash_repairs = _repair_bash_command_artifacts(working_resp)
|
|
3863
|
-
|
|
4072
|
+
working_resp, leak_repairs = _repair_system_prompt_leak(working_resp)
|
|
4073
|
+
repair_count = markup_repairs + required_repairs + bash_repairs + leak_repairs
|
|
3864
4074
|
|
|
3865
4075
|
required_tool_choice = openai_body.get("tool_choice") == "required"
|
|
3866
4076
|
has_tool_calls = _openai_has_tool_calls(working_resp)
|
|
@@ -3949,8 +4159,11 @@ async def _apply_malformed_tool_guardrail(
|
|
|
3949
4159
|
retry_working, retry_bash_repairs = _repair_bash_command_artifacts(
|
|
3950
4160
|
retry_working
|
|
3951
4161
|
)
|
|
4162
|
+
retry_working, retry_leak_repairs = _repair_system_prompt_leak(
|
|
4163
|
+
retry_working
|
|
4164
|
+
)
|
|
3952
4165
|
retry_repairs = (
|
|
3953
|
-
retry_markup_repairs + retry_required_repairs + retry_bash_repairs
|
|
4166
|
+
retry_markup_repairs + retry_required_repairs + retry_bash_repairs + retry_leak_repairs
|
|
3954
4167
|
)
|
|
3955
4168
|
|
|
3956
4169
|
working_resp = retry_working
|
|
@@ -3027,6 +3027,183 @@ class TestToolTurnTemperature(unittest.TestCase):
|
|
|
3027
3027
|
self.assertEqual(result["temperature"], 0.8)
|
|
3028
3028
|
|
|
3029
3029
|
|
|
3030
|
+
class TestSystemPromptLeakDetection(unittest.TestCase):
|
|
3031
|
+
"""Tests for detecting and repairing system prompt leaks in tool args."""
|
|
3032
|
+
|
|
3033
|
+
def test_detects_agentic_protocol_leak(self):
|
|
3034
|
+
self.assertTrue(proxy._contains_system_prompt_leak(
|
|
3035
|
+
{"command": "echo test call one or more functions to assist"}
|
|
3036
|
+
))
|
|
3037
|
+
|
|
3038
|
+
def test_detects_follow_rules_leak(self):
|
|
3039
|
+
self.assertTrue(proxy._contains_system_prompt_leak(
|
|
3040
|
+
{"command": "ls Follow these rules: 1. Use tools"}
|
|
3041
|
+
))
|
|
3042
|
+
|
|
3043
|
+
def test_detects_xml_tags_leak(self):
|
|
3044
|
+
self.assertTrue(proxy._contains_system_prompt_leak(
|
|
3045
|
+
{"command": "echo function signatures within <tools></tools> XML tags:"}
|
|
3046
|
+
))
|
|
3047
|
+
|
|
3048
|
+
def test_clean_args_not_flagged(self):
|
|
3049
|
+
self.assertFalse(proxy._contains_system_prompt_leak(
|
|
3050
|
+
{"command": "echo hello world"}
|
|
3051
|
+
))
|
|
3052
|
+
self.assertFalse(proxy._contains_system_prompt_leak(
|
|
3053
|
+
{"file_path": "/home/user/test.py"}
|
|
3054
|
+
))
|
|
3055
|
+
|
|
3056
|
+
def test_find_earliest_leak_position(self):
|
|
3057
|
+
text = "echo test-1 call one or more functions to assist"
|
|
3058
|
+
pos = proxy._find_earliest_leak_position(text)
|
|
3059
|
+
self.assertIsNotNone(pos)
|
|
3060
|
+
self.assertEqual(text[:pos].strip(), "echo test-1")
|
|
3061
|
+
|
|
3062
|
+
def test_find_no_leak_returns_none(self):
|
|
3063
|
+
self.assertIsNone(proxy._find_earliest_leak_position("echo hello"))
|
|
3064
|
+
|
|
3065
|
+
def test_repair_truncates_at_leak(self):
|
|
3066
|
+
openai_resp = {
|
|
3067
|
+
"choices": [{
|
|
3068
|
+
"finish_reason": "tool_calls",
|
|
3069
|
+
"message": {
|
|
3070
|
+
"tool_calls": [{
|
|
3071
|
+
"function": {
|
|
3072
|
+
"name": "Bash",
|
|
3073
|
+
"arguments": '{"command":"echo test-1 call one or more functions to assist"}'
|
|
3074
|
+
}
|
|
3075
|
+
}],
|
|
3076
|
+
},
|
|
3077
|
+
}]
|
|
3078
|
+
}
|
|
3079
|
+
repaired, count = proxy._repair_system_prompt_leak(openai_resp)
|
|
3080
|
+
self.assertEqual(count, 1)
|
|
3081
|
+
fn = repaired["choices"][0]["message"]["tool_calls"][0]["function"]
|
|
3082
|
+
args = json.loads(fn["arguments"])
|
|
3083
|
+
self.assertEqual(args["command"], "echo test-1")
|
|
3084
|
+
|
|
3085
|
+
def test_repair_noop_on_clean_args(self):
|
|
3086
|
+
openai_resp = {
|
|
3087
|
+
"choices": [{
|
|
3088
|
+
"finish_reason": "tool_calls",
|
|
3089
|
+
"message": {
|
|
3090
|
+
"tool_calls": [{
|
|
3091
|
+
"function": {"name": "Bash", "arguments": '{"command":"ls -la"}'}
|
|
3092
|
+
}],
|
|
3093
|
+
},
|
|
3094
|
+
}]
|
|
3095
|
+
}
|
|
3096
|
+
repaired, count = proxy._repair_system_prompt_leak(openai_resp)
|
|
3097
|
+
self.assertEqual(count, 0)
|
|
3098
|
+
|
|
3099
|
+
def test_validate_rejects_leaked_args(self):
|
|
3100
|
+
result = proxy._validate_tool_call_arguments(
|
|
3101
|
+
"Bash",
|
|
3102
|
+
'{"command":"echo test follow these rules"}',
|
|
3103
|
+
{"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]},
|
|
3104
|
+
{"Bash"},
|
|
3105
|
+
)
|
|
3106
|
+
self.assertTrue(result.has_issue())
|
|
3107
|
+
self.assertIn("leaked system prompt", result.reason)
|
|
3108
|
+
|
|
3109
|
+
|
|
3110
|
+
class TestMinimalSupplementForQwen(unittest.TestCase):
|
|
3111
|
+
"""Tests for model-based supplement selection."""
|
|
3112
|
+
|
|
3113
|
+
def _make_monitor(self):
|
|
3114
|
+
return proxy.SessionMonitor()
|
|
3115
|
+
|
|
3116
|
+
def test_qwen_model_gets_minimal_supplement(self):
|
|
3117
|
+
body = {
|
|
3118
|
+
"model": "qwen3.5",
|
|
3119
|
+
"messages": [{"role": "user", "content": "hello"}],
|
|
3120
|
+
"tools": [{"name": "Bash", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}}}],
|
|
3121
|
+
}
|
|
3122
|
+
result = proxy.build_openai_request(body, self._make_monitor())
|
|
3123
|
+
system_msg = result["messages"][0]["content"]
|
|
3124
|
+
self.assertNotIn("agentic-protocol", system_msg)
|
|
3125
|
+
self.assertIn("Use tools for all actions", system_msg)
|
|
3126
|
+
|
|
3127
|
+
def test_non_qwen_model_gets_full_supplement(self):
|
|
3128
|
+
body = {
|
|
3129
|
+
"model": "claude-3",
|
|
3130
|
+
"messages": [{"role": "user", "content": "hello"}],
|
|
3131
|
+
"tools": [{"name": "Bash", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}}}],
|
|
3132
|
+
}
|
|
3133
|
+
result = proxy.build_openai_request(body, self._make_monitor())
|
|
3134
|
+
system_msg = result["messages"][0]["content"]
|
|
3135
|
+
self.assertIn("agentic-protocol", system_msg)
|
|
3136
|
+
|
|
3137
|
+
|
|
3138
|
+
class TestToolStarvationBreaker(unittest.TestCase):
|
|
3139
|
+
"""Tests for tool-call starvation breaker."""
|
|
3140
|
+
|
|
3141
|
+
def _make_body_with_tools(self):
|
|
3142
|
+
return {
|
|
3143
|
+
"model": "qwen3.5",
|
|
3144
|
+
"messages": [
|
|
3145
|
+
{"role": "user", "content": "hello"},
|
|
3146
|
+
{"role": "assistant", "content": "I will help you."},
|
|
3147
|
+
{"role": "user", "content": [{"type": "tool_result", "tool_use_id": "x", "content": "ok"}]},
|
|
3148
|
+
],
|
|
3149
|
+
"tools": [{"name": "Bash", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}}}],
|
|
3150
|
+
}
|
|
3151
|
+
|
|
3152
|
+
def test_starvation_breaker_strips_tools(self):
|
|
3153
|
+
monitor = proxy.SessionMonitor()
|
|
3154
|
+
monitor.consecutive_forced_count = proxy.PROXY_TOOL_STARVATION_THRESHOLD
|
|
3155
|
+
body = self._make_body_with_tools()
|
|
3156
|
+
result = proxy.build_openai_request(body, monitor)
|
|
3157
|
+
self.assertNotIn("tools", result)
|
|
3158
|
+
self.assertNotIn("tool_choice", result)
|
|
3159
|
+
self.assertEqual(monitor.tool_starvation_streak, 1)
|
|
3160
|
+
|
|
3161
|
+
def test_no_starvation_below_threshold(self):
|
|
3162
|
+
monitor = proxy.SessionMonitor()
|
|
3163
|
+
monitor.consecutive_forced_count = proxy.PROXY_TOOL_STARVATION_THRESHOLD - 1
|
|
3164
|
+
body = self._make_body_with_tools()
|
|
3165
|
+
result = proxy.build_openai_request(body, monitor)
|
|
3166
|
+
self.assertIn("tools", result)
|
|
3167
|
+
|
|
3168
|
+
|
|
3169
|
+
class TestContextAwareRelaxation(unittest.TestCase):
|
|
3170
|
+
"""Tests for context-aware tool_choice relaxation."""
|
|
3171
|
+
|
|
3172
|
+
def test_relaxes_at_high_utilization(self):
|
|
3173
|
+
monitor = proxy.SessionMonitor()
|
|
3174
|
+
monitor.context_window = 100000
|
|
3175
|
+
monitor.last_input_tokens = 75000 # 75% > 70% threshold
|
|
3176
|
+
body = {
|
|
3177
|
+
"model": "qwen3.5",
|
|
3178
|
+
"messages": [
|
|
3179
|
+
{"role": "user", "content": "hello"},
|
|
3180
|
+
{"role": "assistant", "content": "text only"},
|
|
3181
|
+
{"role": "user", "content": [{"type": "tool_result", "tool_use_id": "x", "content": "ok"}]},
|
|
3182
|
+
],
|
|
3183
|
+
"tools": [{"name": "Bash", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}}}],
|
|
3184
|
+
}
|
|
3185
|
+
result = proxy.build_openai_request(body, monitor)
|
|
3186
|
+
# Should be auto, not required
|
|
3187
|
+
self.assertEqual(result.get("tool_choice"), "auto")
|
|
3188
|
+
|
|
3189
|
+
def test_no_relaxation_below_threshold(self):
|
|
3190
|
+
monitor = proxy.SessionMonitor()
|
|
3191
|
+
monitor.context_window = 100000
|
|
3192
|
+
monitor.last_input_tokens = 50000 # 50% < 70%
|
|
3193
|
+
body = {
|
|
3194
|
+
"model": "qwen3.5",
|
|
3195
|
+
"messages": [
|
|
3196
|
+
{"role": "user", "content": "hello"},
|
|
3197
|
+
{"role": "assistant", "content": "text only"},
|
|
3198
|
+
{"role": "user", "content": [{"type": "tool_result", "tool_use_id": "x", "content": "ok"}]},
|
|
3199
|
+
],
|
|
3200
|
+
"tools": [{"name": "Bash", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}}}],
|
|
3201
|
+
}
|
|
3202
|
+
result = proxy.build_openai_request(body, monitor)
|
|
3203
|
+
# Should still be required (state machine forces it)
|
|
3204
|
+
self.assertEqual(result.get("tool_choice"), "required")
|
|
3205
|
+
|
|
3206
|
+
|
|
3030
3207
|
if __name__ == "__main__":
|
|
3031
3208
|
unittest.main()
|
|
3032
3209
|
|