@miller-tech/uap 1.20.12 → 1.20.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -143,7 +143,7 @@ PROXY_TOOL_STATE_MIN_MESSAGES = int(
|
|
|
143
143
|
os.environ.get("PROXY_TOOL_STATE_MIN_MESSAGES", "6")
|
|
144
144
|
)
|
|
145
145
|
PROXY_TOOL_STATE_FORCED_BUDGET = int(
|
|
146
|
-
os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "
|
|
146
|
+
os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "12")
|
|
147
147
|
)
|
|
148
148
|
PROXY_TOOL_STATE_AUTO_BUDGET = int(os.environ.get("PROXY_TOOL_STATE_AUTO_BUDGET", "2"))
|
|
149
149
|
PROXY_TOOL_STATE_STAGNATION_THRESHOLD = int(
|
|
@@ -156,7 +156,7 @@ PROXY_TOOL_STATE_FINALIZE_THRESHOLD = int(
|
|
|
156
156
|
os.environ.get("PROXY_TOOL_STATE_FINALIZE_THRESHOLD", "18")
|
|
157
157
|
)
|
|
158
158
|
PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT = int(
|
|
159
|
-
os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "
|
|
159
|
+
os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "1")
|
|
160
160
|
)
|
|
161
161
|
PROXY_CLIENT_RATE_WINDOW_SECS = int(
|
|
162
162
|
os.environ.get("PROXY_CLIENT_RATE_WINDOW_SECS", "60")
|
|
@@ -219,7 +219,7 @@ PROXY_MALFORMED_TOOL_GUARDRAIL = os.environ.get(
|
|
|
219
219
|
"no",
|
|
220
220
|
}
|
|
221
221
|
PROXY_MALFORMED_TOOL_RETRY_MAX = int(
|
|
222
|
-
os.environ.get("PROXY_MALFORMED_TOOL_RETRY_MAX", "
|
|
222
|
+
os.environ.get("PROXY_MALFORMED_TOOL_RETRY_MAX", "3")
|
|
223
223
|
)
|
|
224
224
|
PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS = int(
|
|
225
225
|
os.environ.get("PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS", "2048")
|
|
@@ -628,6 +628,7 @@ class SessionMonitor:
|
|
|
628
628
|
tool_state_transitions: int = 0
|
|
629
629
|
tool_state_review_cycles: int = 0
|
|
630
630
|
last_tool_fingerprint: str = ""
|
|
631
|
+
cycling_tool_names: list = field(default_factory=list)
|
|
631
632
|
finalize_turn_active: bool = False
|
|
632
633
|
completion_required: bool = False
|
|
633
634
|
completion_pending: bool = False
|
|
@@ -832,6 +833,7 @@ class SessionMonitor:
|
|
|
832
833
|
self.tool_state_auto_budget_remaining = 0
|
|
833
834
|
self.tool_state_stagnation_streak = 0
|
|
834
835
|
self.tool_state_review_cycles = 0
|
|
836
|
+
self.cycling_tool_names = []
|
|
835
837
|
self.last_tool_fingerprint = ""
|
|
836
838
|
|
|
837
839
|
def update_completion_state(self, anthropic_body: dict, has_tool_results: bool):
|
|
@@ -2053,12 +2055,17 @@ def _resolve_state_machine_tool_choice(
|
|
|
2053
2055
|
monitor.tool_state_forced_budget_remaining = max(
|
|
2054
2056
|
1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
|
|
2055
2057
|
)
|
|
2058
|
+
# Capture which tools are cycling for narrowing/hint injection
|
|
2059
|
+
window = max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
|
|
2060
|
+
recent = [fp for fp in monitor.tool_call_history[-window:] if fp]
|
|
2061
|
+
monitor.cycling_tool_names = list(dict.fromkeys(recent))
|
|
2056
2062
|
logger.warning(
|
|
2057
|
-
"TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d)",
|
|
2063
|
+
"TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d cycling_tools=%s)",
|
|
2058
2064
|
cycle_looping,
|
|
2059
2065
|
cycle_repeat,
|
|
2060
2066
|
monitor.tool_state_stagnation_streak,
|
|
2061
2067
|
monitor.tool_state_review_cycles,
|
|
2068
|
+
monitor.cycling_tool_names,
|
|
2062
2069
|
)
|
|
2063
2070
|
return "required", reason
|
|
2064
2071
|
|
|
@@ -2349,6 +2356,49 @@ def build_openai_request(
|
|
|
2349
2356
|
monitor.no_progress_streak = (
|
|
2350
2357
|
0 if last_user_has_tool_result else monitor.no_progress_streak + 1
|
|
2351
2358
|
)
|
|
2359
|
+
# Option 1: Inject cycle-break instruction when entering review
|
|
2360
|
+
if (
|
|
2361
|
+
monitor.tool_turn_phase == "review"
|
|
2362
|
+
and state_reason in {"cycle_detected", "stagnation"}
|
|
2363
|
+
and monitor.cycling_tool_names
|
|
2364
|
+
):
|
|
2365
|
+
cycling_names = ", ".join(monitor.cycling_tool_names)
|
|
2366
|
+
cycle_hint = (
|
|
2367
|
+
f"You have been repeatedly calling the same tool(s): {cycling_names}. "
|
|
2368
|
+
"This is not making progress. Use a DIFFERENT tool to advance the task, "
|
|
2369
|
+
"or call a tool that produces your final answer."
|
|
2370
|
+
)
|
|
2371
|
+
messages = openai_body.get("messages", [])
|
|
2372
|
+
messages.append({"role": "user", "content": cycle_hint})
|
|
2373
|
+
openai_body["messages"] = messages
|
|
2374
|
+
logger.warning(
|
|
2375
|
+
"CYCLE BREAK: injected hint about cycling tools: %s",
|
|
2376
|
+
cycling_names,
|
|
2377
|
+
)
|
|
2378
|
+
# Option 2: Narrow tools during review to exclude cycling tools
|
|
2379
|
+
if (
|
|
2380
|
+
monitor.tool_turn_phase == "review"
|
|
2381
|
+
and monitor.cycling_tool_names
|
|
2382
|
+
and "tools" in openai_body
|
|
2383
|
+
):
|
|
2384
|
+
original_count = len(openai_body["tools"])
|
|
2385
|
+
narrowed = [
|
|
2386
|
+
t
|
|
2387
|
+
for t in openai_body["tools"]
|
|
2388
|
+
if t.get("function", {}).get("name") not in monitor.cycling_tool_names
|
|
2389
|
+
]
|
|
2390
|
+
if narrowed:
|
|
2391
|
+
openai_body["tools"] = narrowed
|
|
2392
|
+
logger.warning(
|
|
2393
|
+
"CYCLE BREAK: narrowed tools from %d to %d (excluded %s)",
|
|
2394
|
+
original_count,
|
|
2395
|
+
len(narrowed),
|
|
2396
|
+
monitor.cycling_tool_names,
|
|
2397
|
+
)
|
|
2398
|
+
else:
|
|
2399
|
+
logger.warning(
|
|
2400
|
+
"CYCLE BREAK: cannot narrow tools — all tools are cycling, keeping original set",
|
|
2401
|
+
)
|
|
2352
2402
|
logger.info(
|
|
2353
2403
|
"tool_choice forced to 'required' by TOOL STATE MACHINE (phase=%s reason=%s forced_budget=%d)",
|
|
2354
2404
|
monitor.tool_turn_phase,
|
|
@@ -3840,6 +3890,40 @@ async def _apply_completion_contract_guardrail(
|
|
|
3840
3890
|
return retried
|
|
3841
3891
|
|
|
3842
3892
|
|
|
3893
|
+
def _sanitize_assistant_messages_for_retry(messages: list[dict]) -> list[dict]:
|
|
3894
|
+
"""Strip malformed tool-like text from assistant messages to prevent copy-contamination.
|
|
3895
|
+
|
|
3896
|
+
Only sanitizes the last 4 assistant messages to avoid excessive processing.
|
|
3897
|
+
"""
|
|
3898
|
+
import re
|
|
3899
|
+
|
|
3900
|
+
# Patterns that indicate malformed tool call text in assistant content
|
|
3901
|
+
_TOOL_LIKE_PATTERNS = re.compile(
|
|
3902
|
+
r"<tool_call>.*?</tool_call>"
|
|
3903
|
+
r"|<function_call>.*?</function_call>"
|
|
3904
|
+
r'|\{"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:'
|
|
3905
|
+
r"|```json\s*\{[^}]*\"name\"\s*:",
|
|
3906
|
+
re.DOTALL,
|
|
3907
|
+
)
|
|
3908
|
+
|
|
3909
|
+
result = list(messages)
|
|
3910
|
+
sanitized_count = 0
|
|
3911
|
+
for i in range(len(result) - 1, -1, -1):
|
|
3912
|
+
if sanitized_count >= 4:
|
|
3913
|
+
break
|
|
3914
|
+
msg = result[i]
|
|
3915
|
+
if msg.get("role") != "assistant":
|
|
3916
|
+
continue
|
|
3917
|
+
content = msg.get("content", "")
|
|
3918
|
+
if isinstance(content, str) and _TOOL_LIKE_PATTERNS.search(content):
|
|
3919
|
+
cleaned = _TOOL_LIKE_PATTERNS.sub("", content).strip()
|
|
3920
|
+
if not cleaned:
|
|
3921
|
+
cleaned = "I will use the appropriate tool."
|
|
3922
|
+
result[i] = {**msg, "content": cleaned}
|
|
3923
|
+
sanitized_count += 1
|
|
3924
|
+
return result
|
|
3925
|
+
|
|
3926
|
+
|
|
3843
3927
|
def _build_malformed_retry_body(
|
|
3844
3928
|
openai_body: dict,
|
|
3845
3929
|
anthropic_body: dict,
|
|
@@ -3851,7 +3935,11 @@ def _build_malformed_retry_body(
|
|
|
3851
3935
|
retry_body = dict(openai_body)
|
|
3852
3936
|
retry_body["stream"] = False
|
|
3853
3937
|
retry_body["tool_choice"] = tool_choice
|
|
3854
|
-
|
|
3938
|
+
# Escalate temperature down on successive retries for more deterministic output
|
|
3939
|
+
if total_attempts > 1 and attempt > 1:
|
|
3940
|
+
retry_body["temperature"] = 0.0
|
|
3941
|
+
else:
|
|
3942
|
+
retry_body["temperature"] = PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE
|
|
3855
3943
|
|
|
3856
3944
|
if tool_choice == "required":
|
|
3857
3945
|
retry_instruction = (
|
|
@@ -3872,7 +3960,10 @@ def _build_malformed_retry_body(
|
|
|
3872
3960
|
}
|
|
3873
3961
|
existing_messages = retry_body.get("messages")
|
|
3874
3962
|
if isinstance(existing_messages, list) and existing_messages:
|
|
3875
|
-
|
|
3963
|
+
# Strip malformed tool-like text from assistant messages to prevent
|
|
3964
|
+
# the model from copying contaminated patterns on retry
|
|
3965
|
+
sanitized = _sanitize_assistant_messages_for_retry(existing_messages)
|
|
3966
|
+
retry_body["messages"] = [*sanitized, malformed_retry_instruction]
|
|
3876
3967
|
|
|
3877
3968
|
if PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS > 0:
|
|
3878
3969
|
current_max = int(
|
|
@@ -3236,6 +3236,207 @@ class TestPruningImprovements(unittest.TestCase):
|
|
|
3236
3236
|
self.assertEqual(monitor.tool_turn_phase, "finalize")
|
|
3237
3237
|
|
|
3238
3238
|
|
|
3239
|
+
class TestCycleBreakOptions(unittest.TestCase):
|
|
3240
|
+
"""Tests for cycle-break options: hint injection, tool narrowing, reduced budgets."""
|
|
3241
|
+
|
|
3242
|
+
def test_cycle_break_injects_hint_message(self):
|
|
3243
|
+
"""Option 1: cycle detection injects a user hint about the cycling tools."""
|
|
3244
|
+
old_state = getattr(proxy, "PROXY_TOOL_STATE_MACHINE")
|
|
3245
|
+
old_min_msgs = getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES")
|
|
3246
|
+
old_forced = getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET")
|
|
3247
|
+
old_auto = getattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET")
|
|
3248
|
+
old_stagnation = getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD")
|
|
3249
|
+
old_cycle_window = getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW")
|
|
3250
|
+
try:
|
|
3251
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
|
|
3252
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
|
|
3253
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
|
|
3254
|
+
setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", 2)
|
|
3255
|
+
setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 99)
|
|
3256
|
+
setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 4)
|
|
3257
|
+
|
|
3258
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
3259
|
+
monitor.tool_turn_phase = "act"
|
|
3260
|
+
monitor.tool_state_forced_budget_remaining = 20
|
|
3261
|
+
monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
|
|
3262
|
+
monitor.last_tool_fingerprint = "Bash"
|
|
3263
|
+
|
|
3264
|
+
body = {
|
|
3265
|
+
"model": "test",
|
|
3266
|
+
"messages": [
|
|
3267
|
+
{"role": "user", "content": "start"},
|
|
3268
|
+
{
|
|
3269
|
+
"role": "assistant",
|
|
3270
|
+
"content": [
|
|
3271
|
+
{"type": "tool_use", "id": "t1", "name": "Bash", "input": {"command": "ls"}},
|
|
3272
|
+
],
|
|
3273
|
+
},
|
|
3274
|
+
{
|
|
3275
|
+
"role": "user",
|
|
3276
|
+
"content": [
|
|
3277
|
+
{"type": "tool_result", "tool_use_id": "t1", "content": "ok"},
|
|
3278
|
+
],
|
|
3279
|
+
},
|
|
3280
|
+
],
|
|
3281
|
+
"tools": [
|
|
3282
|
+
{"name": "Bash", "description": "Run command", "input_schema": {"type": "object"}},
|
|
3283
|
+
{"name": "Read", "description": "Read file", "input_schema": {"type": "object"}},
|
|
3284
|
+
],
|
|
3285
|
+
}
|
|
3286
|
+
|
|
3287
|
+
openai = proxy.build_openai_request(body, monitor)
|
|
3288
|
+
self.assertEqual(monitor.tool_turn_phase, "review")
|
|
3289
|
+
# Check that a cycle-break hint was injected
|
|
3290
|
+
messages = openai.get("messages", [])
|
|
3291
|
+
last_msg = messages[-1] if messages else {}
|
|
3292
|
+
self.assertEqual(last_msg.get("role"), "user")
|
|
3293
|
+
self.assertIn("Bash", last_msg.get("content", ""))
|
|
3294
|
+
self.assertIn("DIFFERENT tool", last_msg.get("content", ""))
|
|
3295
|
+
finally:
|
|
3296
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
|
|
3297
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
|
|
3298
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", old_forced)
|
|
3299
|
+
setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", old_auto)
|
|
3300
|
+
setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", old_stagnation)
|
|
3301
|
+
setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", old_cycle_window)
|
|
3302
|
+
|
|
3303
|
+
def test_cycle_break_narrows_tools(self):
|
|
3304
|
+
"""Option 2: cycling tools are excluded from the tools array during review."""
|
|
3305
|
+
old_state = getattr(proxy, "PROXY_TOOL_STATE_MACHINE")
|
|
3306
|
+
old_min_msgs = getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES")
|
|
3307
|
+
old_forced = getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET")
|
|
3308
|
+
old_auto = getattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET")
|
|
3309
|
+
old_stagnation = getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD")
|
|
3310
|
+
old_cycle_window = getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW")
|
|
3311
|
+
try:
|
|
3312
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
|
|
3313
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
|
|
3314
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
|
|
3315
|
+
setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", 2)
|
|
3316
|
+
setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 99)
|
|
3317
|
+
setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 4)
|
|
3318
|
+
|
|
3319
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
3320
|
+
monitor.tool_turn_phase = "act"
|
|
3321
|
+
monitor.tool_state_forced_budget_remaining = 20
|
|
3322
|
+
monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
|
|
3323
|
+
monitor.last_tool_fingerprint = "Bash"
|
|
3324
|
+
|
|
3325
|
+
body = {
|
|
3326
|
+
"model": "test",
|
|
3327
|
+
"messages": [
|
|
3328
|
+
{"role": "user", "content": "start"},
|
|
3329
|
+
{
|
|
3330
|
+
"role": "assistant",
|
|
3331
|
+
"content": [
|
|
3332
|
+
{"type": "tool_use", "id": "t1", "name": "Bash", "input": {"command": "ls"}},
|
|
3333
|
+
],
|
|
3334
|
+
},
|
|
3335
|
+
{
|
|
3336
|
+
"role": "user",
|
|
3337
|
+
"content": [
|
|
3338
|
+
{"type": "tool_result", "tool_use_id": "t1", "content": "ok"},
|
|
3339
|
+
],
|
|
3340
|
+
},
|
|
3341
|
+
],
|
|
3342
|
+
"tools": [
|
|
3343
|
+
{"name": "Bash", "description": "Run command", "input_schema": {"type": "object"}},
|
|
3344
|
+
{"name": "Read", "description": "Read file", "input_schema": {"type": "object"}},
|
|
3345
|
+
{"name": "Write", "description": "Write file", "input_schema": {"type": "object"}},
|
|
3346
|
+
],
|
|
3347
|
+
}
|
|
3348
|
+
|
|
3349
|
+
openai = proxy.build_openai_request(body, monitor)
|
|
3350
|
+
self.assertEqual(monitor.tool_turn_phase, "review")
|
|
3351
|
+
# Bash should be excluded, Read and Write should remain
|
|
3352
|
+
tool_names = [t["function"]["name"] for t in openai.get("tools", [])]
|
|
3353
|
+
self.assertNotIn("Bash", tool_names)
|
|
3354
|
+
self.assertIn("Read", tool_names)
|
|
3355
|
+
self.assertIn("Write", tool_names)
|
|
3356
|
+
finally:
|
|
3357
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
|
|
3358
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
|
|
3359
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", old_forced)
|
|
3360
|
+
setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", old_auto)
|
|
3361
|
+
setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", old_stagnation)
|
|
3362
|
+
setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", old_cycle_window)
|
|
3363
|
+
|
|
3364
|
+
def test_forced_budget_default_is_12(self):
|
|
3365
|
+
"""Option 3: default forced budget reduced from 24 to 12."""
|
|
3366
|
+
self.assertEqual(proxy.PROXY_TOOL_STATE_FORCED_BUDGET, 12)
|
|
3367
|
+
|
|
3368
|
+
def test_review_cycle_limit_default_is_1(self):
|
|
3369
|
+
"""Option 4: default review cycle limit reduced from 2 to 1."""
|
|
3370
|
+
self.assertEqual(proxy.PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT, 1)
|
|
3371
|
+
|
|
3372
|
+
def test_cycling_tool_names_cleared_on_reset(self):
|
|
3373
|
+
"""cycling_tool_names is cleared when tool turn state resets."""
|
|
3374
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
3375
|
+
monitor.cycling_tool_names = ["Bash", "Read"]
|
|
3376
|
+
monitor.reset_tool_turn_state(reason="test")
|
|
3377
|
+
self.assertEqual(monitor.cycling_tool_names, [])
|
|
3378
|
+
|
|
3379
|
+
|
|
3380
|
+
class TestMalformedRetryHardening(unittest.TestCase):
|
|
3381
|
+
"""Tests for malformed retry improvements: budget, temp escalation, message sanitization."""
|
|
3382
|
+
|
|
3383
|
+
def test_retry_max_default_is_3(self):
|
|
3384
|
+
"""Option 1: default retry budget increased from 2 to 3."""
|
|
3385
|
+
self.assertEqual(proxy.PROXY_MALFORMED_TOOL_RETRY_MAX, 3)
|
|
3386
|
+
|
|
3387
|
+
def test_sanitize_assistant_messages_strips_tool_like_text(self):
|
|
3388
|
+
"""Option 3: malformed tool-like text stripped from assistant messages on retry."""
|
|
3389
|
+
messages = [
|
|
3390
|
+
{"role": "system", "content": "You are helpful."},
|
|
3391
|
+
{"role": "user", "content": "Run a command"},
|
|
3392
|
+
{"role": "assistant", "content": 'Here is the result <tool_call>{"name": "Bash", "arguments": {"command": "ls"}}</tool_call>'},
|
|
3393
|
+
{"role": "user", "content": "ok"},
|
|
3394
|
+
]
|
|
3395
|
+
sanitized = proxy._sanitize_assistant_messages_for_retry(messages)
|
|
3396
|
+
# System and user messages unchanged
|
|
3397
|
+
self.assertEqual(sanitized[0]["content"], "You are helpful.")
|
|
3398
|
+
self.assertEqual(sanitized[1]["content"], "Run a command")
|
|
3399
|
+
self.assertEqual(sanitized[3]["content"], "ok")
|
|
3400
|
+
# Assistant message should have tool_call stripped
|
|
3401
|
+
self.assertNotIn("<tool_call>", sanitized[2]["content"])
|
|
3402
|
+
self.assertNotIn("Bash", sanitized[2]["content"])
|
|
3403
|
+
|
|
3404
|
+
def test_sanitize_preserves_clean_assistant_messages(self):
|
|
3405
|
+
"""Clean assistant messages are not modified by sanitization."""
|
|
3406
|
+
messages = [
|
|
3407
|
+
{"role": "assistant", "content": "I will read the file for you."},
|
|
3408
|
+
]
|
|
3409
|
+
sanitized = proxy._sanitize_assistant_messages_for_retry(messages)
|
|
3410
|
+
self.assertEqual(sanitized[0]["content"], "I will read the file for you.")
|
|
3411
|
+
|
|
3412
|
+
def test_sanitize_replaces_empty_content_with_placeholder(self):
|
|
3413
|
+
"""If stripping leaves empty content, a placeholder is used."""
|
|
3414
|
+
messages = [
|
|
3415
|
+
{"role": "assistant", "content": '<tool_call>{"name": "Bash", "arguments": {}}</tool_call>'},
|
|
3416
|
+
]
|
|
3417
|
+
sanitized = proxy._sanitize_assistant_messages_for_retry(messages)
|
|
3418
|
+
self.assertEqual(sanitized[0]["content"], "I will use the appropriate tool.")
|
|
3419
|
+
|
|
3420
|
+
def test_retry_body_uses_sanitized_messages(self):
|
|
3421
|
+
"""Retry body messages are sanitized before adding retry instruction."""
|
|
3422
|
+
openai_body = {
|
|
3423
|
+
"messages": [
|
|
3424
|
+
{"role": "system", "content": "sys"},
|
|
3425
|
+
{"role": "user", "content": "do it"},
|
|
3426
|
+
{"role": "assistant", "content": '<tool_call>{"name":"X","arguments":{}}</tool_call>'},
|
|
3427
|
+
],
|
|
3428
|
+
"tools": [{"type": "function", "function": {"name": "X", "parameters": {}}}],
|
|
3429
|
+
}
|
|
3430
|
+
anthropic_body = {"tools": [{"name": "X", "input_schema": {"type": "object"}}]}
|
|
3431
|
+
retry = proxy._build_malformed_retry_body(
|
|
3432
|
+
openai_body, anthropic_body, attempt=1, total_attempts=3,
|
|
3433
|
+
)
|
|
3434
|
+
# The assistant message should be sanitized
|
|
3435
|
+
assistant_msgs = [m for m in retry["messages"] if m.get("role") == "assistant"]
|
|
3436
|
+
for m in assistant_msgs:
|
|
3437
|
+
self.assertNotIn("<tool_call>", m.get("content", ""))
|
|
3438
|
+
|
|
3439
|
+
|
|
3239
3440
|
if __name__ == "__main__":
|
|
3240
3441
|
unittest.main()
|
|
3241
3442
|
|