@miller-tech/uap 1.15.1 → 1.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/templates/hooks/forgecode/session-start.sh +6 -22
- package/templates/hooks/session-start.sh +7 -31
- package/tools/agents/plugin/session-start.sh +6 -22
- package/tools/agents/scripts/anthropic_proxy.py +815 -51
- package/tools/agents/tests/test_anthropic_proxy_streaming.py +661 -0
|
@@ -178,6 +178,14 @@ PROXY_MALFORMED_TOOL_STREAM_STRICT = os.environ.get(
|
|
|
178
178
|
"off",
|
|
179
179
|
"no",
|
|
180
180
|
}
|
|
181
|
+
PROXY_TOOL_ARGS_PREFLIGHT = os.environ.get(
|
|
182
|
+
"PROXY_TOOL_ARGS_PREFLIGHT", "on"
|
|
183
|
+
).lower() not in {
|
|
184
|
+
"0",
|
|
185
|
+
"false",
|
|
186
|
+
"off",
|
|
187
|
+
"no",
|
|
188
|
+
}
|
|
181
189
|
PROXY_FORCE_NON_STREAM = os.environ.get(
|
|
182
190
|
"PROXY_FORCE_NON_STREAM", "off"
|
|
183
191
|
).lower() not in {
|
|
@@ -186,6 +194,29 @@ PROXY_FORCE_NON_STREAM = os.environ.get(
|
|
|
186
194
|
"off",
|
|
187
195
|
"no",
|
|
188
196
|
}
|
|
197
|
+
PROXY_FORCED_TOOL_DAMPENER = os.environ.get(
|
|
198
|
+
"PROXY_FORCED_TOOL_DAMPENER", "on"
|
|
199
|
+
).lower() not in {
|
|
200
|
+
"0",
|
|
201
|
+
"false",
|
|
202
|
+
"off",
|
|
203
|
+
"no",
|
|
204
|
+
}
|
|
205
|
+
PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED = int(
|
|
206
|
+
os.environ.get("PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED", "4")
|
|
207
|
+
)
|
|
208
|
+
PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK = int(
|
|
209
|
+
os.environ.get("PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK", "1")
|
|
210
|
+
)
|
|
211
|
+
PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK = int(
|
|
212
|
+
os.environ.get("PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK", "2")
|
|
213
|
+
)
|
|
214
|
+
PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS = int(
|
|
215
|
+
os.environ.get("PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS", "2")
|
|
216
|
+
)
|
|
217
|
+
PROXY_FORCED_TOOL_DAMPENER_REJECTIONS = int(
|
|
218
|
+
os.environ.get("PROXY_FORCED_TOOL_DAMPENER_REJECTIONS", "2")
|
|
219
|
+
)
|
|
189
220
|
PROXY_SESSION_CONTAMINATION_BREAKER = os.environ.get(
|
|
190
221
|
"PROXY_SESSION_CONTAMINATION_BREAKER", "on"
|
|
191
222
|
).lower() not in {
|
|
@@ -200,6 +231,12 @@ PROXY_SESSION_CONTAMINATION_THRESHOLD = int(
|
|
|
200
231
|
PROXY_SESSION_CONTAMINATION_KEEP_LAST = int(
|
|
201
232
|
os.environ.get("PROXY_SESSION_CONTAMINATION_KEEP_LAST", "8")
|
|
202
233
|
)
|
|
234
|
+
PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD = int(
|
|
235
|
+
os.environ.get("PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD", "8")
|
|
236
|
+
)
|
|
237
|
+
PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD = int(
|
|
238
|
+
os.environ.get("PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD", "2")
|
|
239
|
+
)
|
|
203
240
|
PROXY_AGENTIC_SUPPLEMENT_MODE = (
|
|
204
241
|
os.environ.get("PROXY_AGENTIC_SUPPLEMENT_MODE", "clean").strip().lower()
|
|
205
242
|
)
|
|
@@ -257,7 +294,13 @@ class SessionMonitor:
|
|
|
257
294
|
no_progress_streak: int = 0 # Forced tool turns without new tool_result
|
|
258
295
|
unexpected_end_turn_count: int = 0 # end_turn without tool_use in active loop
|
|
259
296
|
malformed_tool_streak: int = 0 # consecutive malformed pseudo tool payloads
|
|
297
|
+
invalid_tool_call_streak: int = 0 # consecutive invalid tool arg payloads
|
|
298
|
+
required_tool_miss_streak: int = 0 # required tool turns with no tool call
|
|
260
299
|
contamination_resets: int = 0 # how many contamination resets were applied
|
|
300
|
+
forced_auto_cooldown_turns: int = 0 # temporary auto override turns remaining
|
|
301
|
+
forced_dampener_triggers: int = 0 # number of dampener activations
|
|
302
|
+
arg_preflight_rejections: int = 0 # rejected tool calls from arg preflight
|
|
303
|
+
arg_preflight_repairs: int = 0 # sanitized tool call args accepted
|
|
261
304
|
last_seen_ts: float = 0.0
|
|
262
305
|
|
|
263
306
|
def record_request(self, estimated_tokens: int):
|
|
@@ -394,6 +437,55 @@ class SessionMonitor:
|
|
|
394
437
|
|
|
395
438
|
return False, 0
|
|
396
439
|
|
|
440
|
+
def guardrail_streak(self) -> int:
|
|
441
|
+
"""Highest current streak among malformed/invalid tool outputs."""
|
|
442
|
+
return max(self.malformed_tool_streak, self.invalid_tool_call_streak)
|
|
443
|
+
|
|
444
|
+
def consume_forced_auto_turn(self) -> bool:
|
|
445
|
+
"""Consume one dampener turn that temporarily sets tool_choice=auto."""
|
|
446
|
+
if self.forced_auto_cooldown_turns <= 0:
|
|
447
|
+
return False
|
|
448
|
+
self.forced_auto_cooldown_turns -= 1
|
|
449
|
+
return True
|
|
450
|
+
|
|
451
|
+
def maybe_activate_forced_tool_dampener(self, reason: str) -> bool:
|
|
452
|
+
"""Temporarily release forced tool choice when quality collapses."""
|
|
453
|
+
if not PROXY_FORCED_TOOL_DAMPENER:
|
|
454
|
+
return False
|
|
455
|
+
if self.forced_auto_cooldown_turns > 0:
|
|
456
|
+
return False
|
|
457
|
+
|
|
458
|
+
min_forced = max(1, PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED)
|
|
459
|
+
if self.consecutive_forced_count < min_forced:
|
|
460
|
+
return False
|
|
461
|
+
|
|
462
|
+
bad_streak = self.guardrail_streak()
|
|
463
|
+
bad_threshold = max(1, PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK)
|
|
464
|
+
empty_threshold = max(1, PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK)
|
|
465
|
+
rejection_threshold = max(1, PROXY_FORCED_TOOL_DAMPENER_REJECTIONS)
|
|
466
|
+
rejection_pressure = self.arg_preflight_rejections >= rejection_threshold
|
|
467
|
+
if (
|
|
468
|
+
bad_streak < bad_threshold
|
|
469
|
+
and self.required_tool_miss_streak < empty_threshold
|
|
470
|
+
and not rejection_pressure
|
|
471
|
+
):
|
|
472
|
+
return False
|
|
473
|
+
|
|
474
|
+
self.forced_auto_cooldown_turns = max(1, PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS)
|
|
475
|
+
self.forced_dampener_triggers += 1
|
|
476
|
+
if rejection_pressure:
|
|
477
|
+
self.arg_preflight_rejections = 0
|
|
478
|
+
logger.warning(
|
|
479
|
+
"FORCED-TOOL DAMPENER: activated reason=%s forced=%d bad_streak=%d required_miss=%d rejection_pressure=%s auto_turns=%d",
|
|
480
|
+
reason,
|
|
481
|
+
self.consecutive_forced_count,
|
|
482
|
+
bad_streak,
|
|
483
|
+
self.required_tool_miss_streak,
|
|
484
|
+
rejection_pressure,
|
|
485
|
+
self.forced_auto_cooldown_turns,
|
|
486
|
+
)
|
|
487
|
+
return True
|
|
488
|
+
|
|
397
489
|
def should_release_tool_choice(self) -> bool:
|
|
398
490
|
"""Determine if tool_choice should be relaxed to 'auto' to break a loop.
|
|
399
491
|
|
|
@@ -784,14 +876,23 @@ async def lifespan(app: FastAPI):
|
|
|
784
876
|
_resolve_prune_target_fraction() * 100,
|
|
785
877
|
)
|
|
786
878
|
logger.info(
|
|
787
|
-
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s tool_narrowing=%s thinking_off_on_tools=%s contamination_breaker=%s(%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d)",
|
|
879
|
+
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d)",
|
|
788
880
|
PROXY_MALFORMED_TOOL_GUARDRAIL,
|
|
789
881
|
PROXY_MALFORMED_TOOL_STREAM_STRICT,
|
|
790
882
|
PROXY_FORCE_NON_STREAM,
|
|
883
|
+
PROXY_TOOL_ARGS_PREFLIGHT,
|
|
791
884
|
PROXY_TOOL_NARROWING,
|
|
792
885
|
PROXY_DISABLE_THINKING_ON_TOOL_TURNS,
|
|
886
|
+
PROXY_FORCED_TOOL_DAMPENER,
|
|
887
|
+
PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED,
|
|
888
|
+
PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK,
|
|
889
|
+
PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK,
|
|
890
|
+
PROXY_FORCED_TOOL_DAMPENER_REJECTIONS,
|
|
891
|
+
PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS,
|
|
793
892
|
PROXY_SESSION_CONTAMINATION_BREAKER,
|
|
794
893
|
PROXY_SESSION_CONTAMINATION_THRESHOLD,
|
|
894
|
+
PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD,
|
|
895
|
+
PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD,
|
|
795
896
|
PROXY_ANALYSIS_ONLY_ROUTE,
|
|
796
897
|
PROXY_ANALYSIS_ONLY_MIN_TOOLS,
|
|
797
898
|
PROXY_ANALYSIS_ONLY_MAX_MESSAGES,
|
|
@@ -898,6 +999,28 @@ def _extract_text(content) -> str:
|
|
|
898
999
|
return str(content)
|
|
899
1000
|
|
|
900
1001
|
|
|
1002
|
+
_TOOL_CALL_APOLOGY_MARKERS = (
|
|
1003
|
+
"i could not produce a valid tool-call format in this turn",
|
|
1004
|
+
"i will issue exactly one valid tool call next",
|
|
1005
|
+
)
|
|
1006
|
+
|
|
1007
|
+
_TOOL_CALL_RETRY_MESSAGE = (
|
|
1008
|
+
"Tool-call formatting failed after automatic retries. "
|
|
1009
|
+
"Please retry the same request."
|
|
1010
|
+
)
|
|
1011
|
+
|
|
1012
|
+
|
|
1013
|
+
def _contains_tool_call_apology(text: str) -> bool:
|
|
1014
|
+
if not text:
|
|
1015
|
+
return False
|
|
1016
|
+
lowered = text.lower()
|
|
1017
|
+
return any(marker in lowered for marker in _TOOL_CALL_APOLOGY_MARKERS)
|
|
1018
|
+
|
|
1019
|
+
|
|
1020
|
+
def _sanitize_tool_call_apology_text(text: str) -> str:
|
|
1021
|
+
return _TOOL_CALL_RETRY_MESSAGE if _contains_tool_call_apology(text) else text
|
|
1022
|
+
|
|
1023
|
+
|
|
901
1024
|
def _has_tool_definitions(anthropic_body: dict) -> bool:
|
|
902
1025
|
tools = anthropic_body.get("tools")
|
|
903
1026
|
return isinstance(tools, list) and len(tools) > 0
|
|
@@ -1302,8 +1425,16 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1302
1425
|
_record_last_assistant_tool_calls(anthropic_body, monitor)
|
|
1303
1426
|
last_user_has_tool_result = _last_user_has_tool_result(anthropic_body)
|
|
1304
1427
|
|
|
1305
|
-
# Check if loop breaker should override tool_choice
|
|
1306
|
-
if monitor.
|
|
1428
|
+
# Check if forced-tool dampener or loop breaker should override tool_choice
|
|
1429
|
+
if monitor.consume_forced_auto_turn():
|
|
1430
|
+
openai_body["tool_choice"] = "auto"
|
|
1431
|
+
monitor.consecutive_forced_count = 0
|
|
1432
|
+
monitor.no_progress_streak = 0
|
|
1433
|
+
logger.warning(
|
|
1434
|
+
"tool_choice set to 'auto' by FORCED-TOOL DAMPENER (remaining=%d)",
|
|
1435
|
+
monitor.forced_auto_cooldown_turns,
|
|
1436
|
+
)
|
|
1437
|
+
elif monitor.should_release_tool_choice():
|
|
1307
1438
|
openai_body["tool_choice"] = "auto"
|
|
1308
1439
|
monitor.consecutive_forced_count = 0
|
|
1309
1440
|
monitor.no_progress_streak = 0
|
|
@@ -1642,16 +1773,515 @@ def _openai_has_valid_tool_calls(openai_resp: dict, anthropic_body: dict) -> boo
|
|
|
1642
1773
|
)
|
|
1643
1774
|
|
|
1644
1775
|
|
|
1776
|
+
@dataclass
|
|
1777
|
+
class ToolResponseIssue:
|
|
1778
|
+
kind: str = ""
|
|
1779
|
+
reason: str = ""
|
|
1780
|
+
retry_hint: str = ""
|
|
1781
|
+
|
|
1782
|
+
def has_issue(self) -> bool:
|
|
1783
|
+
return bool(self.kind)
|
|
1784
|
+
|
|
1785
|
+
|
|
1786
|
+
_TOOL_ARG_MARKERS = (
|
|
1787
|
+
"</parameter",
|
|
1788
|
+
"<parameter",
|
|
1789
|
+
"<tool_call",
|
|
1790
|
+
"</tool_call",
|
|
1791
|
+
"<function=",
|
|
1792
|
+
"</think>",
|
|
1793
|
+
)
|
|
1794
|
+
|
|
1795
|
+
|
|
1796
|
+
def _iter_string_leaves(value):
|
|
1797
|
+
if isinstance(value, str):
|
|
1798
|
+
yield value
|
|
1799
|
+
elif isinstance(value, list):
|
|
1800
|
+
for item in value:
|
|
1801
|
+
yield from _iter_string_leaves(item)
|
|
1802
|
+
elif isinstance(value, dict):
|
|
1803
|
+
for item in value.values():
|
|
1804
|
+
yield from _iter_string_leaves(item)
|
|
1805
|
+
|
|
1806
|
+
|
|
1807
|
+
def _contains_tool_markup(value) -> bool:
|
|
1808
|
+
for text in _iter_string_leaves(value):
|
|
1809
|
+
lowered = text.lower()
|
|
1810
|
+
if any(marker in lowered for marker in _TOOL_ARG_MARKERS):
|
|
1811
|
+
return True
|
|
1812
|
+
return False
|
|
1813
|
+
|
|
1814
|
+
|
|
1815
|
+
def _strip_tool_markup_artifacts(text: str) -> str:
|
|
1816
|
+
cleaned = re.sub(r"</?parameter[^>]*>", "", text, flags=re.IGNORECASE)
|
|
1817
|
+
cleaned = re.sub(r"</?tool_call[^>]*>", "", cleaned, flags=re.IGNORECASE)
|
|
1818
|
+
cleaned = re.sub(r"</?think>", "", cleaned, flags=re.IGNORECASE)
|
|
1819
|
+
cleaned = re.sub(r"<function=[^>]*>", "", cleaned, flags=re.IGNORECASE)
|
|
1820
|
+
return cleaned.strip()
|
|
1821
|
+
|
|
1822
|
+
|
|
1823
|
+
def _sanitize_markup_value(value):
|
|
1824
|
+
if isinstance(value, str):
|
|
1825
|
+
cleaned = _strip_tool_markup_artifacts(value)
|
|
1826
|
+
return cleaned, cleaned != value
|
|
1827
|
+
if isinstance(value, list):
|
|
1828
|
+
changed = False
|
|
1829
|
+
cleaned_items = []
|
|
1830
|
+
for item in value:
|
|
1831
|
+
cleaned_item, item_changed = _sanitize_markup_value(item)
|
|
1832
|
+
cleaned_items.append(cleaned_item)
|
|
1833
|
+
changed = changed or item_changed
|
|
1834
|
+
return cleaned_items, changed
|
|
1835
|
+
if isinstance(value, dict):
|
|
1836
|
+
changed = False
|
|
1837
|
+
cleaned_obj = {}
|
|
1838
|
+
for key, item in value.items():
|
|
1839
|
+
cleaned_item, item_changed = _sanitize_markup_value(item)
|
|
1840
|
+
cleaned_obj[key] = cleaned_item
|
|
1841
|
+
changed = changed or item_changed
|
|
1842
|
+
return cleaned_obj, changed
|
|
1843
|
+
return value, False
|
|
1844
|
+
|
|
1845
|
+
|
|
1846
|
+
def _repair_tool_call_markup(openai_resp: dict) -> tuple[dict, int]:
|
|
1847
|
+
if not _openai_has_tool_calls(openai_resp):
|
|
1848
|
+
return openai_resp, 0
|
|
1849
|
+
|
|
1850
|
+
choice, message = _extract_openai_choice(openai_resp)
|
|
1851
|
+
tool_calls = message.get("tool_calls") or []
|
|
1852
|
+
if not tool_calls:
|
|
1853
|
+
return openai_resp, 0
|
|
1854
|
+
|
|
1855
|
+
repaired_tool_calls = []
|
|
1856
|
+
repaired_count = 0
|
|
1857
|
+
|
|
1858
|
+
for tool_call in tool_calls:
|
|
1859
|
+
fn = tool_call.get("function") if isinstance(tool_call, dict) else {}
|
|
1860
|
+
if not isinstance(fn, dict):
|
|
1861
|
+
fn = {}
|
|
1862
|
+
raw_args = fn.get("arguments", "{}")
|
|
1863
|
+
|
|
1864
|
+
if isinstance(raw_args, (dict, list)):
|
|
1865
|
+
parsed_args = raw_args
|
|
1866
|
+
parse_recovered = False
|
|
1867
|
+
else:
|
|
1868
|
+
try:
|
|
1869
|
+
parsed_args = json.loads(str(raw_args))
|
|
1870
|
+
parse_recovered = False
|
|
1871
|
+
except json.JSONDecodeError:
|
|
1872
|
+
cleaned_text = _strip_tool_markup_artifacts(str(raw_args))
|
|
1873
|
+
candidate = cleaned_text
|
|
1874
|
+
if "{" in candidate and "}" in candidate:
|
|
1875
|
+
candidate = candidate[
|
|
1876
|
+
candidate.find("{") : candidate.rfind("}") + 1
|
|
1877
|
+
]
|
|
1878
|
+
try:
|
|
1879
|
+
parsed_args = json.loads(candidate)
|
|
1880
|
+
parse_recovered = True
|
|
1881
|
+
except json.JSONDecodeError:
|
|
1882
|
+
repaired_tool_calls.append(tool_call)
|
|
1883
|
+
continue
|
|
1884
|
+
|
|
1885
|
+
cleaned_args, changed = _sanitize_markup_value(parsed_args)
|
|
1886
|
+
if parse_recovered:
|
|
1887
|
+
changed = True
|
|
1888
|
+
if not changed:
|
|
1889
|
+
repaired_tool_calls.append(tool_call)
|
|
1890
|
+
continue
|
|
1891
|
+
|
|
1892
|
+
new_tool_call = dict(tool_call)
|
|
1893
|
+
new_fn = dict(fn)
|
|
1894
|
+
new_fn["arguments"] = json.dumps(cleaned_args, separators=(",", ":"))
|
|
1895
|
+
new_tool_call["function"] = new_fn
|
|
1896
|
+
repaired_tool_calls.append(new_tool_call)
|
|
1897
|
+
repaired_count += 1
|
|
1898
|
+
|
|
1899
|
+
if repaired_count == 0:
|
|
1900
|
+
return openai_resp, 0
|
|
1901
|
+
|
|
1902
|
+
repaired_response = dict(openai_resp)
|
|
1903
|
+
choices = list(openai_resp.get("choices") or [])
|
|
1904
|
+
if not choices:
|
|
1905
|
+
return openai_resp, 0
|
|
1906
|
+
|
|
1907
|
+
updated_choice = dict(choice)
|
|
1908
|
+
updated_message = dict(message)
|
|
1909
|
+
updated_message["tool_calls"] = repaired_tool_calls
|
|
1910
|
+
updated_choice["message"] = updated_message
|
|
1911
|
+
choices[0] = updated_choice
|
|
1912
|
+
repaired_response["choices"] = choices
|
|
1913
|
+
return repaired_response, repaired_count
|
|
1914
|
+
|
|
1915
|
+
|
|
1916
|
+
def _default_required_value(field_name: str, field_schema: dict):
|
|
1917
|
+
expected_type = field_schema.get("type") if isinstance(field_schema, dict) else None
|
|
1918
|
+
if isinstance(expected_type, list):
|
|
1919
|
+
expected_type = expected_type[0] if expected_type else "string"
|
|
1920
|
+
|
|
1921
|
+
if expected_type == "integer":
|
|
1922
|
+
return 0
|
|
1923
|
+
if expected_type == "number":
|
|
1924
|
+
return 0
|
|
1925
|
+
if expected_type == "boolean":
|
|
1926
|
+
return False
|
|
1927
|
+
if expected_type == "object":
|
|
1928
|
+
return {"value": "__uap_required__"}
|
|
1929
|
+
if expected_type == "array":
|
|
1930
|
+
return ["__uap_required__"]
|
|
1931
|
+
|
|
1932
|
+
key = (field_name or "").lower()
|
|
1933
|
+
if key in {"command", "cmd"}:
|
|
1934
|
+
return "pwd"
|
|
1935
|
+
if key == "cron":
|
|
1936
|
+
return "* * * * *"
|
|
1937
|
+
if key in {"pattern", "glob"}:
|
|
1938
|
+
return "*"
|
|
1939
|
+
if key == "subject":
|
|
1940
|
+
return "task"
|
|
1941
|
+
if key in {"path", "file", "filepath", "file_path"} or key.endswith("_path"):
|
|
1942
|
+
return "."
|
|
1943
|
+
return "__uap_required__"
|
|
1944
|
+
|
|
1945
|
+
|
|
1946
|
+
def _repair_required_tool_args(
|
|
1947
|
+
openai_resp: dict, anthropic_body: dict
|
|
1948
|
+
) -> tuple[dict, int]:
|
|
1949
|
+
if not _openai_has_tool_calls(openai_resp):
|
|
1950
|
+
return openai_resp, 0
|
|
1951
|
+
|
|
1952
|
+
tools_by_name = _anthropic_tools_by_name(anthropic_body)
|
|
1953
|
+
if not tools_by_name:
|
|
1954
|
+
return openai_resp, 0
|
|
1955
|
+
|
|
1956
|
+
choice, message = _extract_openai_choice(openai_resp)
|
|
1957
|
+
tool_calls = message.get("tool_calls") or []
|
|
1958
|
+
if not tool_calls:
|
|
1959
|
+
return openai_resp, 0
|
|
1960
|
+
|
|
1961
|
+
repaired_tool_calls = []
|
|
1962
|
+
repaired_count = 0
|
|
1963
|
+
|
|
1964
|
+
for tool_call in tool_calls:
|
|
1965
|
+
fn = tool_call.get("function") if isinstance(tool_call, dict) else {}
|
|
1966
|
+
if not isinstance(fn, dict):
|
|
1967
|
+
fn = {}
|
|
1968
|
+
tool_name = fn.get("name", "")
|
|
1969
|
+
schema = tools_by_name.get(tool_name, {})
|
|
1970
|
+
required = schema.get("required", []) if isinstance(schema, dict) else []
|
|
1971
|
+
if not isinstance(required, list) or not required:
|
|
1972
|
+
repaired_tool_calls.append(tool_call)
|
|
1973
|
+
continue
|
|
1974
|
+
|
|
1975
|
+
properties = schema.get("properties", {}) if isinstance(schema, dict) else {}
|
|
1976
|
+
if not isinstance(properties, dict):
|
|
1977
|
+
properties = {}
|
|
1978
|
+
|
|
1979
|
+
raw_args = fn.get("arguments", "{}")
|
|
1980
|
+
if isinstance(raw_args, dict):
|
|
1981
|
+
parsed_args = dict(raw_args)
|
|
1982
|
+
parse_failed = False
|
|
1983
|
+
else:
|
|
1984
|
+
try:
|
|
1985
|
+
parsed_args = json.loads(str(raw_args))
|
|
1986
|
+
parse_failed = False
|
|
1987
|
+
except json.JSONDecodeError:
|
|
1988
|
+
parsed_args = {}
|
|
1989
|
+
parse_failed = True
|
|
1990
|
+
|
|
1991
|
+
if not isinstance(parsed_args, dict):
|
|
1992
|
+
parsed_args = {}
|
|
1993
|
+
parse_failed = True
|
|
1994
|
+
|
|
1995
|
+
changed = parse_failed
|
|
1996
|
+
for field in required:
|
|
1997
|
+
if not isinstance(field, str):
|
|
1998
|
+
continue
|
|
1999
|
+
current = parsed_args.get(field)
|
|
2000
|
+
if field not in parsed_args or _required_value_is_empty(current):
|
|
2001
|
+
field_schema = (
|
|
2002
|
+
properties.get(field, {})
|
|
2003
|
+
if isinstance(properties.get(field), dict)
|
|
2004
|
+
else {}
|
|
2005
|
+
)
|
|
2006
|
+
parsed_args[field] = _default_required_value(field, field_schema)
|
|
2007
|
+
changed = True
|
|
2008
|
+
|
|
2009
|
+
if not changed:
|
|
2010
|
+
repaired_tool_calls.append(tool_call)
|
|
2011
|
+
continue
|
|
2012
|
+
|
|
2013
|
+
new_tool_call = dict(tool_call)
|
|
2014
|
+
new_fn = dict(fn)
|
|
2015
|
+
new_fn["arguments"] = json.dumps(parsed_args, separators=(",", ":"))
|
|
2016
|
+
new_tool_call["function"] = new_fn
|
|
2017
|
+
repaired_tool_calls.append(new_tool_call)
|
|
2018
|
+
repaired_count += 1
|
|
2019
|
+
|
|
2020
|
+
if repaired_count == 0:
|
|
2021
|
+
return openai_resp, 0
|
|
2022
|
+
|
|
2023
|
+
repaired_response = dict(openai_resp)
|
|
2024
|
+
choices = list(openai_resp.get("choices") or [])
|
|
2025
|
+
if not choices:
|
|
2026
|
+
return openai_resp, 0
|
|
2027
|
+
|
|
2028
|
+
updated_choice = dict(choice)
|
|
2029
|
+
updated_message = dict(message)
|
|
2030
|
+
updated_message["tool_calls"] = repaired_tool_calls
|
|
2031
|
+
updated_choice["message"] = updated_message
|
|
2032
|
+
choices[0] = updated_choice
|
|
2033
|
+
repaired_response["choices"] = choices
|
|
2034
|
+
return repaired_response, repaired_count
|
|
2035
|
+
|
|
2036
|
+
|
|
2037
|
+
def _required_value_is_empty(value) -> bool:
|
|
2038
|
+
if value is None:
|
|
2039
|
+
return True
|
|
2040
|
+
if isinstance(value, str):
|
|
2041
|
+
return not value.strip()
|
|
2042
|
+
if isinstance(value, (list, dict)):
|
|
2043
|
+
return len(value) == 0
|
|
2044
|
+
return False
|
|
2045
|
+
|
|
2046
|
+
|
|
2047
|
+
def _matches_json_schema_type(value, expected_type) -> bool:
|
|
2048
|
+
if not expected_type:
|
|
2049
|
+
return True
|
|
2050
|
+
|
|
2051
|
+
if isinstance(expected_type, list):
|
|
2052
|
+
return any(
|
|
2053
|
+
_matches_json_schema_type(value, candidate) for candidate in expected_type
|
|
2054
|
+
)
|
|
2055
|
+
|
|
2056
|
+
if expected_type == "string":
|
|
2057
|
+
return isinstance(value, str)
|
|
2058
|
+
if expected_type == "integer":
|
|
2059
|
+
return isinstance(value, int) and not isinstance(value, bool)
|
|
2060
|
+
if expected_type == "number":
|
|
2061
|
+
return (isinstance(value, int) and not isinstance(value, bool)) or isinstance(
|
|
2062
|
+
value, float
|
|
2063
|
+
)
|
|
2064
|
+
if expected_type == "boolean":
|
|
2065
|
+
return isinstance(value, bool)
|
|
2066
|
+
if expected_type == "object":
|
|
2067
|
+
return isinstance(value, dict)
|
|
2068
|
+
if expected_type == "array":
|
|
2069
|
+
return isinstance(value, list)
|
|
2070
|
+
return True
|
|
2071
|
+
|
|
2072
|
+
|
|
2073
|
+
def _anthropic_tools_by_name(anthropic_body: dict) -> dict[str, dict]:
|
|
2074
|
+
tool_map: dict[str, dict] = {}
|
|
2075
|
+
for tool in anthropic_body.get("tools", []) or []:
|
|
2076
|
+
if not isinstance(tool, dict):
|
|
2077
|
+
continue
|
|
2078
|
+
name = tool.get("name", "")
|
|
2079
|
+
if not name:
|
|
2080
|
+
continue
|
|
2081
|
+
schema = tool.get("input_schema")
|
|
2082
|
+
if not isinstance(schema, dict):
|
|
2083
|
+
schema = (
|
|
2084
|
+
tool.get("parameters")
|
|
2085
|
+
if isinstance(tool.get("parameters"), dict)
|
|
2086
|
+
else {}
|
|
2087
|
+
)
|
|
2088
|
+
tool_map[name] = schema or {}
|
|
2089
|
+
return tool_map
|
|
2090
|
+
|
|
2091
|
+
|
|
2092
|
+
def _validate_tool_call_arguments(
|
|
2093
|
+
tool_name: str,
|
|
2094
|
+
raw_arguments,
|
|
2095
|
+
tool_schema: dict,
|
|
2096
|
+
allowed_tools: set[str],
|
|
2097
|
+
) -> ToolResponseIssue:
|
|
2098
|
+
if allowed_tools and tool_name not in allowed_tools:
|
|
2099
|
+
return ToolResponseIssue(
|
|
2100
|
+
kind="invalid_tool_args",
|
|
2101
|
+
reason=f"unknown tool '{tool_name}'",
|
|
2102
|
+
retry_hint="Use exactly one tool from the provided tool list.",
|
|
2103
|
+
)
|
|
2104
|
+
|
|
2105
|
+
if isinstance(raw_arguments, (dict, list)):
|
|
2106
|
+
arg_text = json.dumps(raw_arguments)
|
|
2107
|
+
elif raw_arguments is None:
|
|
2108
|
+
arg_text = "{}"
|
|
2109
|
+
else:
|
|
2110
|
+
arg_text = str(raw_arguments)
|
|
2111
|
+
|
|
2112
|
+
try:
|
|
2113
|
+
parsed = json.loads(arg_text)
|
|
2114
|
+
except json.JSONDecodeError as exc:
|
|
2115
|
+
return ToolResponseIssue(
|
|
2116
|
+
kind="invalid_tool_args",
|
|
2117
|
+
reason=f"invalid JSON arguments for '{tool_name}': {exc.msg}",
|
|
2118
|
+
retry_hint=(
|
|
2119
|
+
f"Emit exactly one `{tool_name}` tool call with `arguments` as a strict JSON object. "
|
|
2120
|
+
"Do not include prose before or after JSON."
|
|
2121
|
+
),
|
|
2122
|
+
)
|
|
2123
|
+
|
|
2124
|
+
if not isinstance(parsed, dict):
|
|
2125
|
+
return ToolResponseIssue(
|
|
2126
|
+
kind="invalid_tool_args",
|
|
2127
|
+
reason=f"arguments for '{tool_name}' must be a JSON object",
|
|
2128
|
+
retry_hint=(
|
|
2129
|
+
f"Emit exactly one `{tool_name}` tool call with `arguments` set to a JSON object (not a string or list)."
|
|
2130
|
+
),
|
|
2131
|
+
)
|
|
2132
|
+
|
|
2133
|
+
if _contains_tool_markup(parsed):
|
|
2134
|
+
return ToolResponseIssue(
|
|
2135
|
+
kind="invalid_tool_args",
|
|
2136
|
+
reason=f"arguments for '{tool_name}' contain malformed markup fragments",
|
|
2137
|
+
retry_hint=(
|
|
2138
|
+
f"Remove tag fragments from `{tool_name}` arguments and emit only plain JSON key/value pairs."
|
|
2139
|
+
),
|
|
2140
|
+
)
|
|
2141
|
+
|
|
2142
|
+
if not isinstance(tool_schema, dict):
|
|
2143
|
+
tool_schema = {}
|
|
2144
|
+
|
|
2145
|
+
required = tool_schema.get("required", [])
|
|
2146
|
+
if not isinstance(required, list):
|
|
2147
|
+
required = []
|
|
2148
|
+
|
|
2149
|
+
properties = tool_schema.get("properties", {})
|
|
2150
|
+
if not isinstance(properties, dict):
|
|
2151
|
+
properties = {}
|
|
2152
|
+
|
|
2153
|
+
missing: list[str] = []
|
|
2154
|
+
empty: list[str] = []
|
|
2155
|
+
wrong_type: list[str] = []
|
|
2156
|
+
|
|
2157
|
+
for field in required:
|
|
2158
|
+
if not isinstance(field, str):
|
|
2159
|
+
continue
|
|
2160
|
+
|
|
2161
|
+
if field not in parsed:
|
|
2162
|
+
missing.append(field)
|
|
2163
|
+
continue
|
|
2164
|
+
|
|
2165
|
+
value = parsed.get(field)
|
|
2166
|
+
if _required_value_is_empty(value):
|
|
2167
|
+
empty.append(field)
|
|
2168
|
+
continue
|
|
2169
|
+
|
|
2170
|
+
schema = (
|
|
2171
|
+
properties.get(field, {}) if isinstance(properties.get(field), dict) else {}
|
|
2172
|
+
)
|
|
2173
|
+
expected_type = schema.get("type")
|
|
2174
|
+
if expected_type and not _matches_json_schema_type(value, expected_type):
|
|
2175
|
+
wrong_type.append(field)
|
|
2176
|
+
continue
|
|
2177
|
+
|
|
2178
|
+
min_length = schema.get("minLength")
|
|
2179
|
+
if (
|
|
2180
|
+
isinstance(min_length, int)
|
|
2181
|
+
and isinstance(value, str)
|
|
2182
|
+
and len(value.strip()) < min_length
|
|
2183
|
+
):
|
|
2184
|
+
empty.append(field)
|
|
2185
|
+
continue
|
|
2186
|
+
|
|
2187
|
+
min_items = schema.get("minItems")
|
|
2188
|
+
if (
|
|
2189
|
+
isinstance(min_items, int)
|
|
2190
|
+
and isinstance(value, list)
|
|
2191
|
+
and len(value) < min_items
|
|
2192
|
+
):
|
|
2193
|
+
empty.append(field)
|
|
2194
|
+
|
|
2195
|
+
if missing or empty or wrong_type:
|
|
2196
|
+
details = []
|
|
2197
|
+
if missing:
|
|
2198
|
+
details.append(f"missing: {', '.join(missing)}")
|
|
2199
|
+
if empty:
|
|
2200
|
+
details.append(f"empty: {', '.join(empty)}")
|
|
2201
|
+
if wrong_type:
|
|
2202
|
+
details.append(f"type mismatch: {', '.join(wrong_type)}")
|
|
2203
|
+
required_fields = ", ".join(str(f) for f in required if isinstance(f, str))
|
|
2204
|
+
required_hint = (
|
|
2205
|
+
f"Required fields must be non-empty: {required_fields}. "
|
|
2206
|
+
if required_fields
|
|
2207
|
+
else ""
|
|
2208
|
+
)
|
|
2209
|
+
return ToolResponseIssue(
|
|
2210
|
+
kind="invalid_tool_args",
|
|
2211
|
+
reason=f"invalid arguments for '{tool_name}' ({'; '.join(details)})",
|
|
2212
|
+
retry_hint=(
|
|
2213
|
+
f"Emit exactly one `{tool_name}` tool call with strict JSON arguments. "
|
|
2214
|
+
f"{required_hint}Do not include protocol tags or commentary."
|
|
2215
|
+
).strip(),
|
|
2216
|
+
)
|
|
2217
|
+
|
|
2218
|
+
return ToolResponseIssue()
|
|
2219
|
+
|
|
2220
|
+
|
|
2221
|
+
def _classify_tool_response_issue(
|
|
2222
|
+
openai_resp: dict,
|
|
2223
|
+
anthropic_body: dict,
|
|
2224
|
+
required_tool_choice: bool = False,
|
|
2225
|
+
) -> ToolResponseIssue:
|
|
2226
|
+
if "tools" not in anthropic_body:
|
|
2227
|
+
return ToolResponseIssue()
|
|
2228
|
+
|
|
2229
|
+
if _is_malformed_tool_response(openai_resp, anthropic_body):
|
|
2230
|
+
return ToolResponseIssue(
|
|
2231
|
+
kind="malformed_payload",
|
|
2232
|
+
reason="malformed pseudo tool payload detected in assistant text",
|
|
2233
|
+
retry_hint=(
|
|
2234
|
+
"Return exactly one valid tool call with strict JSON arguments. "
|
|
2235
|
+
"Do not output raw protocol tags, schema fragments, or apologies about formatting."
|
|
2236
|
+
),
|
|
2237
|
+
)
|
|
2238
|
+
|
|
2239
|
+
has_tool_calls = _openai_has_tool_calls(openai_resp)
|
|
2240
|
+
if not has_tool_calls:
|
|
2241
|
+
if required_tool_choice:
|
|
2242
|
+
text = _openai_message_text(openai_resp).strip()
|
|
2243
|
+
if not text or len(text) <= 48:
|
|
2244
|
+
return ToolResponseIssue(
|
|
2245
|
+
kind="required_tool_miss",
|
|
2246
|
+
reason="required tool turn returned no tool calls",
|
|
2247
|
+
retry_hint=(
|
|
2248
|
+
"A tool call is mandatory for this turn. Emit exactly one valid tool call now "
|
|
2249
|
+
"with a strict JSON object in `arguments`."
|
|
2250
|
+
),
|
|
2251
|
+
)
|
|
2252
|
+
return ToolResponseIssue()
|
|
2253
|
+
|
|
2254
|
+
if not PROXY_TOOL_ARGS_PREFLIGHT:
|
|
2255
|
+
return ToolResponseIssue()
|
|
2256
|
+
|
|
2257
|
+
_, message = _extract_openai_choice(openai_resp)
|
|
2258
|
+
tool_calls = message.get("tool_calls") or []
|
|
2259
|
+
tools_by_name = _anthropic_tools_by_name(anthropic_body)
|
|
2260
|
+
allowed_tools = set(tools_by_name.keys())
|
|
2261
|
+
|
|
2262
|
+
for tc in tool_calls:
|
|
2263
|
+
fn = tc.get("function") if isinstance(tc, dict) else {}
|
|
2264
|
+
if not isinstance(fn, dict):
|
|
2265
|
+
fn = {}
|
|
2266
|
+
tool_name = fn.get("name", "")
|
|
2267
|
+
issue = _validate_tool_call_arguments(
|
|
2268
|
+
tool_name,
|
|
2269
|
+
fn.get("arguments", "{}"),
|
|
2270
|
+
tools_by_name.get(tool_name, {}),
|
|
2271
|
+
allowed_tools,
|
|
2272
|
+
)
|
|
2273
|
+
if issue.has_issue():
|
|
2274
|
+
return issue
|
|
2275
|
+
|
|
2276
|
+
return ToolResponseIssue()
|
|
2277
|
+
|
|
2278
|
+
|
|
1645
2279
|
def _looks_malformed_tool_payload(text: str) -> bool:
|
|
1646
2280
|
if not text:
|
|
1647
2281
|
return False
|
|
1648
2282
|
|
|
1649
2283
|
lowered = text.lower()
|
|
1650
|
-
|
|
1651
|
-
"i could not produce a valid tool-call format in this turn",
|
|
1652
|
-
"i will issue exactly one valid tool call next",
|
|
1653
|
-
)
|
|
1654
|
-
if any(marker in lowered for marker in apology_markers):
|
|
2284
|
+
if _contains_tool_call_apology(text):
|
|
1655
2285
|
return True
|
|
1656
2286
|
|
|
1657
2287
|
primary_markers = ("</parameter", "<parameter", "<tool_call", "<function=")
|
|
@@ -1706,7 +2336,9 @@ def _is_malformed_tool_response(openai_resp: dict, anthropic_body: dict) -> bool
|
|
|
1706
2336
|
return _looks_malformed_tool_payload(_openai_message_text(openai_resp))
|
|
1707
2337
|
|
|
1708
2338
|
|
|
1709
|
-
def _build_malformed_retry_body(
|
|
2339
|
+
def _build_malformed_retry_body(
|
|
2340
|
+
openai_body: dict, anthropic_body: dict, retry_hint: str = ""
|
|
2341
|
+
) -> dict:
|
|
1710
2342
|
retry_body = dict(openai_body)
|
|
1711
2343
|
retry_body["stream"] = False
|
|
1712
2344
|
retry_body["tool_choice"] = "required"
|
|
@@ -1741,6 +2373,16 @@ def _build_malformed_retry_body(openai_body: dict, anthropic_body: dict) -> dict
|
|
|
1741
2373
|
if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
|
|
1742
2374
|
retry_body["enable_thinking"] = False
|
|
1743
2375
|
|
|
2376
|
+
if retry_hint:
|
|
2377
|
+
repair_prompt = (
|
|
2378
|
+
"[TOOL CALL REPAIR]\n"
|
|
2379
|
+
f"{retry_hint}\n"
|
|
2380
|
+
"Return exactly one valid tool call object and no explanatory prose."
|
|
2381
|
+
)
|
|
2382
|
+
retry_messages = list(retry_body.get("messages", []))
|
|
2383
|
+
retry_messages.append({"role": "system", "content": repair_prompt})
|
|
2384
|
+
retry_body["messages"] = retry_messages
|
|
2385
|
+
|
|
1744
2386
|
return retry_body
|
|
1745
2387
|
|
|
1746
2388
|
|
|
@@ -1756,10 +2398,7 @@ def _build_clean_guardrail_openai_response(openai_resp: dict) -> dict:
|
|
|
1756
2398
|
"finish_reason": "stop",
|
|
1757
2399
|
"message": {
|
|
1758
2400
|
"role": "assistant",
|
|
1759
|
-
"content":
|
|
1760
|
-
"Tool-call formatting failed after automatic retries. "
|
|
1761
|
-
"Please retry the same request."
|
|
1762
|
-
),
|
|
2401
|
+
"content": _TOOL_CALL_RETRY_MESSAGE,
|
|
1763
2402
|
},
|
|
1764
2403
|
}
|
|
1765
2404
|
],
|
|
@@ -1832,27 +2471,68 @@ async def _apply_malformed_tool_guardrail(
|
|
|
1832
2471
|
if not PROXY_MALFORMED_TOOL_GUARDRAIL:
|
|
1833
2472
|
return openai_resp
|
|
1834
2473
|
|
|
1835
|
-
|
|
1836
|
-
|
|
2474
|
+
working_resp = openai_resp
|
|
2475
|
+
repair_count = 0
|
|
2476
|
+
if PROXY_TOOL_ARGS_PREFLIGHT and _openai_has_tool_calls(openai_resp):
|
|
2477
|
+
working_resp, markup_repairs = _repair_tool_call_markup(openai_resp)
|
|
2478
|
+
working_resp, required_repairs = _repair_required_tool_args(
|
|
2479
|
+
working_resp, anthropic_body
|
|
2480
|
+
)
|
|
2481
|
+
repair_count = markup_repairs + required_repairs
|
|
2482
|
+
|
|
2483
|
+
required_tool_choice = openai_body.get("tool_choice") == "required"
|
|
2484
|
+
has_tool_calls = _openai_has_tool_calls(working_resp)
|
|
2485
|
+
if required_tool_choice and not has_tool_calls:
|
|
2486
|
+
monitor.required_tool_miss_streak += 1
|
|
2487
|
+
|
|
2488
|
+
issue = _classify_tool_response_issue(
|
|
2489
|
+
working_resp,
|
|
2490
|
+
anthropic_body,
|
|
2491
|
+
required_tool_choice=required_tool_choice,
|
|
2492
|
+
)
|
|
2493
|
+
if not issue.has_issue():
|
|
2494
|
+
if required_tool_choice and not has_tool_calls:
|
|
2495
|
+
monitor.maybe_activate_forced_tool_dampener("required_tool_miss")
|
|
2496
|
+
if has_tool_calls:
|
|
1837
2497
|
monitor.malformed_tool_streak = 0
|
|
1838
|
-
|
|
2498
|
+
monitor.invalid_tool_call_streak = 0
|
|
2499
|
+
monitor.required_tool_miss_streak = 0
|
|
2500
|
+
if repair_count > 0:
|
|
2501
|
+
monitor.arg_preflight_repairs += repair_count
|
|
2502
|
+
logger.info(
|
|
2503
|
+
"TOOL ARG REPAIR: session=%s repaired=%d source=initial",
|
|
2504
|
+
session_id,
|
|
2505
|
+
repair_count,
|
|
2506
|
+
)
|
|
2507
|
+
return working_resp
|
|
1839
2508
|
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
2509
|
+
if issue.kind == "malformed_payload":
|
|
2510
|
+
monitor.malformed_tool_streak += 1
|
|
2511
|
+
elif issue.kind == "invalid_tool_args":
|
|
2512
|
+
monitor.invalid_tool_call_streak += 1
|
|
2513
|
+
monitor.arg_preflight_rejections += 1
|
|
2514
|
+
|
|
2515
|
+
monitor.maybe_activate_forced_tool_dampener(issue.kind)
|
|
2516
|
+
excerpt = _openai_message_text(working_resp)[:220].replace("\n", " ")
|
|
1846
2517
|
logger.warning(
|
|
1847
|
-
"
|
|
2518
|
+
"TOOL RESPONSE ISSUE: session=%s kind=%s reason=%s malformed=%d invalid=%d required_miss=%d excerpt=%.220s",
|
|
1848
2519
|
session_id,
|
|
2520
|
+
issue.kind,
|
|
2521
|
+
issue.reason,
|
|
1849
2522
|
monitor.malformed_tool_streak,
|
|
2523
|
+
monitor.invalid_tool_call_streak,
|
|
2524
|
+
monitor.required_tool_miss_streak,
|
|
1850
2525
|
excerpt,
|
|
1851
2526
|
)
|
|
1852
2527
|
|
|
1853
2528
|
attempts = max(0, PROXY_MALFORMED_TOOL_RETRY_MAX)
|
|
2529
|
+
current_issue = issue
|
|
1854
2530
|
for attempt in range(attempts):
|
|
1855
|
-
retry_body = _build_malformed_retry_body(
|
|
2531
|
+
retry_body = _build_malformed_retry_body(
|
|
2532
|
+
openai_body,
|
|
2533
|
+
anthropic_body,
|
|
2534
|
+
retry_hint=current_issue.retry_hint,
|
|
2535
|
+
)
|
|
1856
2536
|
retry_resp = await client.post(
|
|
1857
2537
|
f"{LLAMA_CPP_BASE}/chat/completions",
|
|
1858
2538
|
json=retry_body,
|
|
@@ -1868,40 +2548,71 @@ async def _apply_malformed_tool_guardrail(
|
|
|
1868
2548
|
continue
|
|
1869
2549
|
|
|
1870
2550
|
retry_json = retry_resp.json()
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
2551
|
+
retry_working = retry_json
|
|
2552
|
+
retry_repairs = 0
|
|
2553
|
+
if PROXY_TOOL_ARGS_PREFLIGHT and _openai_has_tool_calls(retry_json):
|
|
2554
|
+
retry_working, retry_markup_repairs = _repair_tool_call_markup(retry_json)
|
|
2555
|
+
retry_working, retry_required_repairs = _repair_required_tool_args(
|
|
2556
|
+
retry_working, anthropic_body
|
|
1877
2557
|
)
|
|
1878
|
-
|
|
2558
|
+
retry_repairs = retry_markup_repairs + retry_required_repairs
|
|
1879
2559
|
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
|
|
2560
|
+
retry_has_tool_calls = _openai_has_tool_calls(retry_working)
|
|
2561
|
+
retry_required = retry_body.get("tool_choice") == "required"
|
|
2562
|
+
if retry_required and not retry_has_tool_calls:
|
|
2563
|
+
monitor.required_tool_miss_streak += 1
|
|
2564
|
+
|
|
2565
|
+
retry_issue = _classify_tool_response_issue(
|
|
2566
|
+
retry_working,
|
|
2567
|
+
anthropic_body,
|
|
2568
|
+
required_tool_choice=retry_required,
|
|
2569
|
+
)
|
|
1888
2570
|
|
|
1889
|
-
if not
|
|
2571
|
+
if not retry_issue.has_issue():
|
|
1890
2572
|
monitor.malformed_tool_streak = 0
|
|
2573
|
+
monitor.invalid_tool_call_streak = 0
|
|
2574
|
+
monitor.required_tool_miss_streak = 0
|
|
1891
2575
|
logger.info(
|
|
1892
|
-
"
|
|
2576
|
+
"TOOL RESPONSE RETRY success: kind=%s attempt=%d/%d",
|
|
2577
|
+
current_issue.kind,
|
|
1893
2578
|
attempt + 1,
|
|
1894
2579
|
attempts,
|
|
1895
2580
|
)
|
|
1896
|
-
|
|
2581
|
+
if retry_repairs > 0:
|
|
2582
|
+
monitor.arg_preflight_repairs += retry_repairs
|
|
2583
|
+
logger.info(
|
|
2584
|
+
"TOOL ARG REPAIR: session=%s repaired=%d source=retry",
|
|
2585
|
+
session_id,
|
|
2586
|
+
retry_repairs,
|
|
2587
|
+
)
|
|
2588
|
+
return retry_working
|
|
1897
2589
|
|
|
1898
|
-
|
|
2590
|
+
if retry_issue.kind == "malformed_payload":
|
|
2591
|
+
monitor.malformed_tool_streak += 1
|
|
2592
|
+
elif retry_issue.kind == "invalid_tool_args":
|
|
2593
|
+
monitor.invalid_tool_call_streak += 1
|
|
2594
|
+
monitor.arg_preflight_rejections += 1
|
|
2595
|
+
|
|
2596
|
+
monitor.maybe_activate_forced_tool_dampener(retry_issue.kind)
|
|
2597
|
+
logger.warning(
|
|
2598
|
+
"TOOL RESPONSE RETRY invalid: session=%s attempt=%d/%d kind=%s reason=%s",
|
|
2599
|
+
session_id,
|
|
2600
|
+
attempt + 1,
|
|
2601
|
+
attempts,
|
|
2602
|
+
retry_issue.kind,
|
|
2603
|
+
retry_issue.reason,
|
|
2604
|
+
)
|
|
2605
|
+
current_issue = retry_issue
|
|
1899
2606
|
|
|
1900
2607
|
logger.error(
|
|
1901
|
-
"
|
|
2608
|
+
"TOOL RESPONSE issue persisted after retries (session=%s kind=%s malformed=%d invalid=%d required_miss=%d); returning clean guardrail response",
|
|
1902
2609
|
session_id,
|
|
2610
|
+
current_issue.kind or issue.kind,
|
|
2611
|
+
monitor.malformed_tool_streak,
|
|
2612
|
+
monitor.invalid_tool_call_streak,
|
|
2613
|
+
monitor.required_tool_miss_streak,
|
|
1903
2614
|
)
|
|
1904
|
-
return _build_clean_guardrail_openai_response(
|
|
2615
|
+
return _build_clean_guardrail_openai_response(working_resp)
|
|
1905
2616
|
|
|
1906
2617
|
|
|
1907
2618
|
def _maybe_apply_session_contamination_breaker(
|
|
@@ -1911,13 +2622,28 @@ def _maybe_apply_session_contamination_breaker(
|
|
|
1911
2622
|
return anthropic_body
|
|
1912
2623
|
|
|
1913
2624
|
threshold = max(1, PROXY_SESSION_CONTAMINATION_THRESHOLD)
|
|
1914
|
-
|
|
2625
|
+
forced_threshold = max(1, PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD)
|
|
2626
|
+
required_miss_threshold = max(
|
|
2627
|
+
1, PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD
|
|
2628
|
+
)
|
|
2629
|
+
bad_streak = monitor.guardrail_streak()
|
|
2630
|
+
should_reset = (
|
|
2631
|
+
bad_streak >= threshold
|
|
2632
|
+
or (
|
|
2633
|
+
bad_streak >= max(1, threshold - 1)
|
|
2634
|
+
and monitor.consecutive_forced_count >= forced_threshold
|
|
2635
|
+
)
|
|
2636
|
+
or monitor.required_tool_miss_streak >= required_miss_threshold
|
|
2637
|
+
)
|
|
2638
|
+
if not should_reset:
|
|
1915
2639
|
return anthropic_body
|
|
1916
2640
|
|
|
1917
2641
|
messages = anthropic_body.get("messages", [])
|
|
1918
2642
|
keep_last = max(2, PROXY_SESSION_CONTAMINATION_KEEP_LAST)
|
|
1919
2643
|
if len(messages) <= keep_last + 1:
|
|
1920
2644
|
monitor.malformed_tool_streak = 0
|
|
2645
|
+
monitor.invalid_tool_call_streak = 0
|
|
2646
|
+
monitor.required_tool_miss_streak = 0
|
|
1921
2647
|
return anthropic_body
|
|
1922
2648
|
|
|
1923
2649
|
head = messages[:1]
|
|
@@ -1925,22 +2651,30 @@ def _maybe_apply_session_contamination_breaker(
|
|
|
1925
2651
|
reset_marker = {
|
|
1926
2652
|
"role": "user",
|
|
1927
2653
|
"content": (
|
|
1928
|
-
"[SESSION RESET:
|
|
1929
|
-
"
|
|
2654
|
+
"[SESSION RESET: tool-call quality degraded in earlier turns. "
|
|
2655
|
+
"Continue from the recent context and emit valid tool calls with strict JSON arguments only.]"
|
|
1930
2656
|
),
|
|
1931
2657
|
}
|
|
1932
2658
|
|
|
1933
2659
|
updated_body = dict(anthropic_body)
|
|
1934
2660
|
updated_body["messages"] = head + [reset_marker] + tail
|
|
1935
2661
|
|
|
2662
|
+
forced_before = monitor.consecutive_forced_count
|
|
2663
|
+
required_miss_before = monitor.required_tool_miss_streak
|
|
1936
2664
|
monitor.contamination_resets += 1
|
|
1937
2665
|
monitor.malformed_tool_streak = 0
|
|
2666
|
+
monitor.invalid_tool_call_streak = 0
|
|
2667
|
+
monitor.required_tool_miss_streak = 0
|
|
1938
2668
|
monitor.no_progress_streak = 0
|
|
1939
2669
|
monitor.consecutive_forced_count = 0
|
|
2670
|
+
monitor.forced_auto_cooldown_turns = 0
|
|
1940
2671
|
logger.warning(
|
|
1941
|
-
"SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages",
|
|
2672
|
+
"SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages (bad_streak=%d forced=%d required_miss=%d)",
|
|
1942
2673
|
session_id,
|
|
1943
2674
|
len(updated_body["messages"]),
|
|
2675
|
+
bad_streak,
|
|
2676
|
+
forced_before,
|
|
2677
|
+
required_miss_before,
|
|
1944
2678
|
)
|
|
1945
2679
|
|
|
1946
2680
|
return updated_body
|
|
@@ -1959,7 +2693,17 @@ def openai_to_anthropic_response(openai_resp: dict, model: str) -> dict:
|
|
|
1959
2693
|
|
|
1960
2694
|
content = []
|
|
1961
2695
|
if message.get("content"):
|
|
1962
|
-
|
|
2696
|
+
raw_text = (
|
|
2697
|
+
message["content"]
|
|
2698
|
+
if isinstance(message["content"], str)
|
|
2699
|
+
else str(message["content"])
|
|
2700
|
+
)
|
|
2701
|
+
sanitized_text = _sanitize_tool_call_apology_text(raw_text)
|
|
2702
|
+
if sanitized_text != raw_text:
|
|
2703
|
+
logger.warning(
|
|
2704
|
+
"SANITIZE: replaced known malformed tool-call apology text in assistant response"
|
|
2705
|
+
)
|
|
2706
|
+
content.append({"type": "text", "text": sanitized_text})
|
|
1963
2707
|
|
|
1964
2708
|
# Convert tool calls
|
|
1965
2709
|
for tc in message.get("tool_calls", []):
|
|
@@ -2297,8 +3041,17 @@ async def stream_anthropic_response(
|
|
|
2297
3041
|
]
|
|
2298
3042
|
}
|
|
2299
3043
|
|
|
2300
|
-
|
|
3044
|
+
stream_issue = _classify_tool_response_issue(
|
|
3045
|
+
synthetic_openai_resp,
|
|
3046
|
+
anthropic_body,
|
|
3047
|
+
required_tool_choice=False,
|
|
3048
|
+
)
|
|
3049
|
+
|
|
3050
|
+
if stream_issue.kind == "malformed_payload":
|
|
2301
3051
|
monitor.malformed_tool_streak += 1
|
|
3052
|
+
elif stream_issue.kind == "invalid_tool_args":
|
|
3053
|
+
monitor.invalid_tool_call_streak += 1
|
|
3054
|
+
monitor.arg_preflight_rejections += 1
|
|
2302
3055
|
elif (
|
|
2303
3056
|
"tools" in anthropic_body
|
|
2304
3057
|
and not tool_calls_by_index
|
|
@@ -2310,6 +3063,8 @@ async def stream_anthropic_response(
|
|
|
2310
3063
|
monitor.malformed_tool_streak += 1
|
|
2311
3064
|
elif tool_calls_by_index:
|
|
2312
3065
|
monitor.malformed_tool_streak = 0
|
|
3066
|
+
monitor.invalid_tool_call_streak = 0
|
|
3067
|
+
monitor.required_tool_miss_streak = 0
|
|
2313
3068
|
|
|
2314
3069
|
if _is_unexpected_end_turn(synthetic_openai_resp, anthropic_body):
|
|
2315
3070
|
monitor.unexpected_end_turn_count += 1
|
|
@@ -2717,6 +3472,8 @@ async def messages(request: Request):
|
|
|
2717
3472
|
monitor.malformed_tool_streak += 1
|
|
2718
3473
|
elif _openai_has_tool_calls(openai_resp):
|
|
2719
3474
|
monitor.malformed_tool_streak = 0
|
|
3475
|
+
monitor.invalid_tool_call_streak = 0
|
|
3476
|
+
monitor.required_tool_miss_streak = 0
|
|
2720
3477
|
|
|
2721
3478
|
anthropic_resp = openai_to_anthropic_response(openai_resp, model)
|
|
2722
3479
|
|
|
@@ -2807,6 +3564,13 @@ async def context_status(request: Request):
|
|
|
2807
3564
|
"loop_warnings_emitted": monitor.loop_warnings_emitted,
|
|
2808
3565
|
"unexpected_end_turn_count": monitor.unexpected_end_turn_count,
|
|
2809
3566
|
"malformed_tool_streak": monitor.malformed_tool_streak,
|
|
3567
|
+
"invalid_tool_call_streak": monitor.invalid_tool_call_streak,
|
|
3568
|
+
"required_tool_miss_streak": monitor.required_tool_miss_streak,
|
|
3569
|
+
"guardrail_streak": monitor.guardrail_streak(),
|
|
3570
|
+
"arg_preflight_rejections": monitor.arg_preflight_rejections,
|
|
3571
|
+
"arg_preflight_repairs": monitor.arg_preflight_repairs,
|
|
3572
|
+
"forced_auto_cooldown_turns": monitor.forced_auto_cooldown_turns,
|
|
3573
|
+
"forced_dampener_triggers": monitor.forced_dampener_triggers,
|
|
2810
3574
|
"contamination_resets": monitor.contamination_resets,
|
|
2811
3575
|
"tool_call_history_len": len(monitor.tool_call_history),
|
|
2812
3576
|
"is_looping": monitor.detect_tool_loop(window=PROXY_LOOP_WINDOW)[0],
|