@miller-tech/uap 1.20.19 → 1.20.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -4103,6 +4103,8 @@ def _build_malformed_retry_body(
|
|
|
4103
4103
|
tool_choice: str = "required",
|
|
4104
4104
|
attempt: int = 1,
|
|
4105
4105
|
total_attempts: int = 1,
|
|
4106
|
+
is_garbled: bool = False,
|
|
4107
|
+
exclude_tools: list[str] | None = None,
|
|
4106
4108
|
) -> dict:
|
|
4107
4109
|
retry_body = dict(openai_body)
|
|
4108
4110
|
retry_body["stream"] = False
|
|
@@ -4137,7 +4139,16 @@ def _build_malformed_retry_body(
|
|
|
4137
4139
|
sanitized = _sanitize_assistant_messages_for_retry(existing_messages)
|
|
4138
4140
|
retry_body["messages"] = [*sanitized, malformed_retry_instruction]
|
|
4139
4141
|
|
|
4140
|
-
|
|
4142
|
+
# Option 1: Progressive garbled-cap within retries — use smaller max_tokens
|
|
4143
|
+
# when the issue involves garbled/degenerate args to limit degeneration room.
|
|
4144
|
+
if is_garbled and PROXY_TOOL_TURN_MAX_TOKENS_GARBLED > 0:
|
|
4145
|
+
retry_body["max_tokens"] = PROXY_TOOL_TURN_MAX_TOKENS_GARBLED
|
|
4146
|
+
logger.info(
|
|
4147
|
+
"RETRY GARBLED CAP: max_tokens=%d for garbled retry attempt=%d",
|
|
4148
|
+
PROXY_TOOL_TURN_MAX_TOKENS_GARBLED,
|
|
4149
|
+
attempt,
|
|
4150
|
+
)
|
|
4151
|
+
elif PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS > 0:
|
|
4141
4152
|
current_max = int(
|
|
4142
4153
|
retry_body.get("max_tokens", PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS)
|
|
4143
4154
|
)
|
|
@@ -4151,6 +4162,23 @@ def _build_malformed_retry_body(
|
|
|
4151
4162
|
anthropic_body.get("tools", [])
|
|
4152
4163
|
)
|
|
4153
4164
|
|
|
4165
|
+
# Option 3: Exclude specific failing tools from retry to let the model
|
|
4166
|
+
# pick an alternative when a tool consistently produces garbled args.
|
|
4167
|
+
if exclude_tools and retry_body.get("tools"):
|
|
4168
|
+
exclude_lower = {t.lower() for t in exclude_tools}
|
|
4169
|
+
original_count = len(retry_body["tools"])
|
|
4170
|
+
retry_body["tools"] = [
|
|
4171
|
+
t for t in retry_body["tools"]
|
|
4172
|
+
if t.get("function", {}).get("name", "").lower() not in exclude_lower
|
|
4173
|
+
]
|
|
4174
|
+
if len(retry_body["tools"]) < original_count:
|
|
4175
|
+
logger.info(
|
|
4176
|
+
"RETRY TOOL NARROWING: excluded %s, tools %d -> %d",
|
|
4177
|
+
exclude_tools,
|
|
4178
|
+
original_count,
|
|
4179
|
+
len(retry_body["tools"]),
|
|
4180
|
+
)
|
|
4181
|
+
|
|
4154
4182
|
if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
|
|
4155
4183
|
retry_body["enable_thinking"] = False
|
|
4156
4184
|
|
|
@@ -4373,8 +4401,16 @@ async def _apply_malformed_tool_guardrail(
|
|
|
4373
4401
|
|
|
4374
4402
|
monitor.maybe_activate_forced_tool_dampener(issue.kind)
|
|
4375
4403
|
excerpt = _openai_message_text(working_resp)[:220].replace("\n", " ")
|
|
4404
|
+
# Option 2: Log garbled argument content for diagnostics
|
|
4405
|
+
arg_excerpt = ""
|
|
4406
|
+
if issue.kind == "invalid_tool_args":
|
|
4407
|
+
for tc in (working_resp.get("choices", [{}])[0].get("message", {}).get("tool_calls", [])):
|
|
4408
|
+
raw_args = tc.get("function", {}).get("arguments", "")
|
|
4409
|
+
if raw_args and _is_garbled_tool_arguments(raw_args):
|
|
4410
|
+
arg_excerpt = raw_args[:200].replace("\n", " ")
|
|
4411
|
+
break
|
|
4376
4412
|
logger.warning(
|
|
4377
|
-
"TOOL RESPONSE ISSUE: session=%s kind=%s reason=%s malformed=%d invalid=%d required_miss=%d excerpt=%.220s",
|
|
4413
|
+
"TOOL RESPONSE ISSUE: session=%s kind=%s reason=%s malformed=%d invalid=%d required_miss=%d excerpt=%.220s args=%.200s",
|
|
4378
4414
|
session_id,
|
|
4379
4415
|
issue.kind,
|
|
4380
4416
|
issue.reason,
|
|
@@ -4382,16 +4418,27 @@ async def _apply_malformed_tool_guardrail(
|
|
|
4382
4418
|
monitor.invalid_tool_call_streak,
|
|
4383
4419
|
monitor.required_tool_miss_streak,
|
|
4384
4420
|
excerpt,
|
|
4421
|
+
arg_excerpt,
|
|
4385
4422
|
)
|
|
4386
4423
|
|
|
4387
4424
|
attempts = max(0, PROXY_MALFORMED_TOOL_RETRY_MAX)
|
|
4388
4425
|
current_issue = issue
|
|
4426
|
+
# Track failing tool names for Option 3 (tool narrowing on retry)
|
|
4427
|
+
failing_tools: set[str] = set()
|
|
4428
|
+
if issue.kind == "invalid_tool_args":
|
|
4429
|
+
for tc in (working_resp.get("choices", [{}])[0].get("message", {}).get("tool_calls", [])):
|
|
4430
|
+
fn_name = tc.get("function", {}).get("name", "")
|
|
4431
|
+
raw_args = tc.get("function", {}).get("arguments", "")
|
|
4432
|
+
if fn_name and raw_args and _is_garbled_tool_arguments(raw_args):
|
|
4433
|
+
failing_tools.add(fn_name)
|
|
4389
4434
|
for attempt in range(attempts):
|
|
4390
4435
|
attempt_tool_choice = _retry_tool_choice_for_attempt(
|
|
4391
4436
|
required_tool_choice,
|
|
4392
4437
|
attempt,
|
|
4393
4438
|
attempts,
|
|
4394
4439
|
)
|
|
4440
|
+
# Option 3: On attempt >= 2, exclude consistently failing tools
|
|
4441
|
+
exclude = list(failing_tools) if attempt >= 1 and failing_tools else None
|
|
4395
4442
|
retry_body = _build_malformed_retry_body(
|
|
4396
4443
|
openai_body,
|
|
4397
4444
|
anthropic_body,
|
|
@@ -4399,6 +4446,8 @@ async def _apply_malformed_tool_guardrail(
|
|
|
4399
4446
|
tool_choice=attempt_tool_choice,
|
|
4400
4447
|
attempt=attempt + 1,
|
|
4401
4448
|
total_attempts=attempts,
|
|
4449
|
+
is_garbled=current_issue.kind == "invalid_tool_args",
|
|
4450
|
+
exclude_tools=exclude,
|
|
4402
4451
|
)
|
|
4403
4452
|
retry_resp = await client.post(
|
|
4404
4453
|
f"{LLAMA_CPP_BASE}/chat/completions",
|
|
@@ -4471,6 +4520,12 @@ async def _apply_malformed_tool_guardrail(
|
|
|
4471
4520
|
elif retry_issue.kind == "invalid_tool_args":
|
|
4472
4521
|
monitor.invalid_tool_call_streak += 1
|
|
4473
4522
|
monitor.arg_preflight_rejections += 1
|
|
4523
|
+
# Track failing tools from retries for progressive narrowing
|
|
4524
|
+
for tc in (retry_working.get("choices", [{}])[0].get("message", {}).get("tool_calls", [])):
|
|
4525
|
+
fn_name = tc.get("function", {}).get("name", "")
|
|
4526
|
+
raw_args = tc.get("function", {}).get("arguments", "")
|
|
4527
|
+
if fn_name and raw_args and _is_garbled_tool_arguments(raw_args):
|
|
4528
|
+
failing_tools.add(fn_name)
|
|
4474
4529
|
|
|
4475
4530
|
monitor.maybe_activate_forced_tool_dampener(retry_issue.kind)
|
|
4476
4531
|
logger.warning(
|
|
@@ -3872,3 +3872,119 @@ class TestFinalizeTurnToolCallLeak(unittest.TestCase):
|
|
|
3872
3872
|
for block in text_blocks:
|
|
3873
3873
|
self.assertNotIn("<tool_call>", block["text"])
|
|
3874
3874
|
self.assertNotIn("</tool_call>", block["text"])
|
|
3875
|
+
|
|
3876
|
+
|
|
3877
|
+
class TestRetryGarbledImprovements(unittest.TestCase):
|
|
3878
|
+
"""Tests for progressive garbled cap, arg logging, and tool narrowing on retries."""
|
|
3879
|
+
|
|
3880
|
+
def test_garbled_cap_applied_in_retry_body(self):
|
|
3881
|
+
"""When is_garbled=True, retry body uses PROXY_TOOL_TURN_MAX_TOKENS_GARBLED."""
|
|
3882
|
+
openai_body = {
|
|
3883
|
+
"model": "test-model",
|
|
3884
|
+
"max_tokens": 8192,
|
|
3885
|
+
"messages": [{"role": "user", "content": "test"}],
|
|
3886
|
+
"tools": [],
|
|
3887
|
+
}
|
|
3888
|
+
anthropic_body = {"messages": [{"role": "user", "content": "test"}]}
|
|
3889
|
+
retry_body = proxy._build_malformed_retry_body(
|
|
3890
|
+
openai_body,
|
|
3891
|
+
anthropic_body,
|
|
3892
|
+
retry_hint="fix it",
|
|
3893
|
+
tool_choice="required",
|
|
3894
|
+
attempt=1,
|
|
3895
|
+
total_attempts=3,
|
|
3896
|
+
is_garbled=True,
|
|
3897
|
+
)
|
|
3898
|
+
self.assertEqual(retry_body["max_tokens"], proxy.PROXY_TOOL_TURN_MAX_TOKENS_GARBLED)
|
|
3899
|
+
|
|
3900
|
+
def test_non_garbled_uses_standard_retry_max(self):
|
|
3901
|
+
"""When is_garbled=False, retry body uses PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS."""
|
|
3902
|
+
openai_body = {
|
|
3903
|
+
"model": "test-model",
|
|
3904
|
+
"max_tokens": 8192,
|
|
3905
|
+
"messages": [{"role": "user", "content": "test"}],
|
|
3906
|
+
"tools": [],
|
|
3907
|
+
}
|
|
3908
|
+
anthropic_body = {"messages": [{"role": "user", "content": "test"}]}
|
|
3909
|
+
retry_body = proxy._build_malformed_retry_body(
|
|
3910
|
+
openai_body,
|
|
3911
|
+
anthropic_body,
|
|
3912
|
+
retry_hint="fix it",
|
|
3913
|
+
tool_choice="required",
|
|
3914
|
+
attempt=1,
|
|
3915
|
+
total_attempts=3,
|
|
3916
|
+
is_garbled=False,
|
|
3917
|
+
)
|
|
3918
|
+
if proxy.PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS > 0:
|
|
3919
|
+
self.assertLessEqual(retry_body["max_tokens"], proxy.PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS)
|
|
3920
|
+
|
|
3921
|
+
def test_exclude_tools_removes_from_retry(self):
|
|
3922
|
+
"""exclude_tools parameter removes specified tools from retry body."""
|
|
3923
|
+
openai_body = {
|
|
3924
|
+
"model": "test-model",
|
|
3925
|
+
"max_tokens": 8192,
|
|
3926
|
+
"messages": [{"role": "user", "content": "test"}],
|
|
3927
|
+
"tools": [
|
|
3928
|
+
{"type": "function", "function": {"name": "Grep", "description": "search", "parameters": {"type": "object"}}},
|
|
3929
|
+
{"type": "function", "function": {"name": "Read", "description": "read", "parameters": {"type": "object"}}},
|
|
3930
|
+
{"type": "function", "function": {"name": "Bash", "description": "run", "parameters": {"type": "object"}}},
|
|
3931
|
+
],
|
|
3932
|
+
}
|
|
3933
|
+
anthropic_body = {
|
|
3934
|
+
"messages": [{"role": "user", "content": "test"}],
|
|
3935
|
+
"tools": [
|
|
3936
|
+
{"name": "Grep", "description": "search", "input_schema": {"type": "object"}},
|
|
3937
|
+
{"name": "Read", "description": "read", "input_schema": {"type": "object"}},
|
|
3938
|
+
{"name": "Bash", "description": "run", "input_schema": {"type": "object"}},
|
|
3939
|
+
],
|
|
3940
|
+
}
|
|
3941
|
+
retry_body = proxy._build_malformed_retry_body(
|
|
3942
|
+
openai_body,
|
|
3943
|
+
anthropic_body,
|
|
3944
|
+
retry_hint="fix it",
|
|
3945
|
+
tool_choice="required",
|
|
3946
|
+
attempt=2,
|
|
3947
|
+
total_attempts=3,
|
|
3948
|
+
exclude_tools=["Grep"],
|
|
3949
|
+
)
|
|
3950
|
+
tool_names = [t["function"]["name"] for t in retry_body.get("tools", [])]
|
|
3951
|
+
self.assertNotIn("Grep", tool_names)
|
|
3952
|
+
self.assertIn("Read", tool_names)
|
|
3953
|
+
self.assertIn("Bash", tool_names)
|
|
3954
|
+
|
|
3955
|
+
def test_exclude_tools_none_keeps_all(self):
|
|
3956
|
+
"""When exclude_tools is None, all tools are retained."""
|
|
3957
|
+
openai_body = {
|
|
3958
|
+
"model": "test-model",
|
|
3959
|
+
"max_tokens": 8192,
|
|
3960
|
+
"messages": [{"role": "user", "content": "test"}],
|
|
3961
|
+
"tools": [
|
|
3962
|
+
{"type": "function", "function": {"name": "Grep", "description": "search", "parameters": {"type": "object"}}},
|
|
3963
|
+
],
|
|
3964
|
+
}
|
|
3965
|
+
anthropic_body = {
|
|
3966
|
+
"messages": [{"role": "user", "content": "test"}],
|
|
3967
|
+
"tools": [
|
|
3968
|
+
{"name": "Grep", "description": "search", "input_schema": {"type": "object"}},
|
|
3969
|
+
],
|
|
3970
|
+
}
|
|
3971
|
+
retry_body = proxy._build_malformed_retry_body(
|
|
3972
|
+
openai_body,
|
|
3973
|
+
anthropic_body,
|
|
3974
|
+
retry_hint="fix it",
|
|
3975
|
+
tool_choice="required",
|
|
3976
|
+
attempt=2,
|
|
3977
|
+
total_attempts=3,
|
|
3978
|
+
exclude_tools=None,
|
|
3979
|
+
)
|
|
3980
|
+
tool_names = [t["function"]["name"] for t in retry_body.get("tools", [])]
|
|
3981
|
+
self.assertIn("Grep", tool_names)
|
|
3982
|
+
|
|
3983
|
+
def test_garbled_args_excerpt_in_issue(self):
|
|
3984
|
+
"""_is_garbled_tool_arguments detects garbled content for logging."""
|
|
3985
|
+
# Garbled pattern: runaway braces
|
|
3986
|
+
garbled = '{"pattern": "test}}}}}}}}}}}}}}"}'
|
|
3987
|
+
self.assertTrue(proxy._is_garbled_tool_arguments(garbled))
|
|
3988
|
+
# Clean pattern
|
|
3989
|
+
clean = '{"pattern": "hello", "path": "/src"}'
|
|
3990
|
+
self.assertFalse(proxy._is_garbled_tool_arguments(clean))
|