@miller-tech/uap 1.15.2 → 1.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/templates/hooks/forgecode/session-start.sh +6 -22
- package/templates/hooks/session-start.sh +7 -31
- package/tools/agents/plugin/session-start.sh +6 -22
- package/tools/agents/scripts/anthropic_proxy.py +780 -41
- package/tools/agents/tests/test_anthropic_proxy_streaming.py +632 -0
|
@@ -178,6 +178,14 @@ PROXY_MALFORMED_TOOL_STREAM_STRICT = os.environ.get(
|
|
|
178
178
|
"off",
|
|
179
179
|
"no",
|
|
180
180
|
}
|
|
181
|
+
PROXY_TOOL_ARGS_PREFLIGHT = os.environ.get(
|
|
182
|
+
"PROXY_TOOL_ARGS_PREFLIGHT", "on"
|
|
183
|
+
).lower() not in {
|
|
184
|
+
"0",
|
|
185
|
+
"false",
|
|
186
|
+
"off",
|
|
187
|
+
"no",
|
|
188
|
+
}
|
|
181
189
|
PROXY_FORCE_NON_STREAM = os.environ.get(
|
|
182
190
|
"PROXY_FORCE_NON_STREAM", "off"
|
|
183
191
|
).lower() not in {
|
|
@@ -186,6 +194,29 @@ PROXY_FORCE_NON_STREAM = os.environ.get(
|
|
|
186
194
|
"off",
|
|
187
195
|
"no",
|
|
188
196
|
}
|
|
197
|
+
PROXY_FORCED_TOOL_DAMPENER = os.environ.get(
|
|
198
|
+
"PROXY_FORCED_TOOL_DAMPENER", "on"
|
|
199
|
+
).lower() not in {
|
|
200
|
+
"0",
|
|
201
|
+
"false",
|
|
202
|
+
"off",
|
|
203
|
+
"no",
|
|
204
|
+
}
|
|
205
|
+
PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED = int(
|
|
206
|
+
os.environ.get("PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED", "4")
|
|
207
|
+
)
|
|
208
|
+
PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK = int(
|
|
209
|
+
os.environ.get("PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK", "1")
|
|
210
|
+
)
|
|
211
|
+
PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK = int(
|
|
212
|
+
os.environ.get("PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK", "2")
|
|
213
|
+
)
|
|
214
|
+
PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS = int(
|
|
215
|
+
os.environ.get("PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS", "2")
|
|
216
|
+
)
|
|
217
|
+
PROXY_FORCED_TOOL_DAMPENER_REJECTIONS = int(
|
|
218
|
+
os.environ.get("PROXY_FORCED_TOOL_DAMPENER_REJECTIONS", "2")
|
|
219
|
+
)
|
|
189
220
|
PROXY_SESSION_CONTAMINATION_BREAKER = os.environ.get(
|
|
190
221
|
"PROXY_SESSION_CONTAMINATION_BREAKER", "on"
|
|
191
222
|
).lower() not in {
|
|
@@ -200,6 +231,12 @@ PROXY_SESSION_CONTAMINATION_THRESHOLD = int(
|
|
|
200
231
|
PROXY_SESSION_CONTAMINATION_KEEP_LAST = int(
|
|
201
232
|
os.environ.get("PROXY_SESSION_CONTAMINATION_KEEP_LAST", "8")
|
|
202
233
|
)
|
|
234
|
+
PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD = int(
|
|
235
|
+
os.environ.get("PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD", "8")
|
|
236
|
+
)
|
|
237
|
+
PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD = int(
|
|
238
|
+
os.environ.get("PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD", "2")
|
|
239
|
+
)
|
|
203
240
|
PROXY_AGENTIC_SUPPLEMENT_MODE = (
|
|
204
241
|
os.environ.get("PROXY_AGENTIC_SUPPLEMENT_MODE", "clean").strip().lower()
|
|
205
242
|
)
|
|
@@ -257,7 +294,13 @@ class SessionMonitor:
|
|
|
257
294
|
no_progress_streak: int = 0 # Forced tool turns without new tool_result
|
|
258
295
|
unexpected_end_turn_count: int = 0 # end_turn without tool_use in active loop
|
|
259
296
|
malformed_tool_streak: int = 0 # consecutive malformed pseudo tool payloads
|
|
297
|
+
invalid_tool_call_streak: int = 0 # consecutive invalid tool arg payloads
|
|
298
|
+
required_tool_miss_streak: int = 0 # required tool turns with no tool call
|
|
260
299
|
contamination_resets: int = 0 # how many contamination resets were applied
|
|
300
|
+
forced_auto_cooldown_turns: int = 0 # temporary auto override turns remaining
|
|
301
|
+
forced_dampener_triggers: int = 0 # number of dampener activations
|
|
302
|
+
arg_preflight_rejections: int = 0 # rejected tool calls from arg preflight
|
|
303
|
+
arg_preflight_repairs: int = 0 # sanitized tool call args accepted
|
|
261
304
|
last_seen_ts: float = 0.0
|
|
262
305
|
|
|
263
306
|
def record_request(self, estimated_tokens: int):
|
|
@@ -394,6 +437,55 @@ class SessionMonitor:
|
|
|
394
437
|
|
|
395
438
|
return False, 0
|
|
396
439
|
|
|
440
|
+
def guardrail_streak(self) -> int:
|
|
441
|
+
"""Highest current streak among malformed/invalid tool outputs."""
|
|
442
|
+
return max(self.malformed_tool_streak, self.invalid_tool_call_streak)
|
|
443
|
+
|
|
444
|
+
def consume_forced_auto_turn(self) -> bool:
|
|
445
|
+
"""Consume one dampener turn that temporarily sets tool_choice=auto."""
|
|
446
|
+
if self.forced_auto_cooldown_turns <= 0:
|
|
447
|
+
return False
|
|
448
|
+
self.forced_auto_cooldown_turns -= 1
|
|
449
|
+
return True
|
|
450
|
+
|
|
451
|
+
def maybe_activate_forced_tool_dampener(self, reason: str) -> bool:
|
|
452
|
+
"""Temporarily release forced tool choice when quality collapses."""
|
|
453
|
+
if not PROXY_FORCED_TOOL_DAMPENER:
|
|
454
|
+
return False
|
|
455
|
+
if self.forced_auto_cooldown_turns > 0:
|
|
456
|
+
return False
|
|
457
|
+
|
|
458
|
+
min_forced = max(1, PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED)
|
|
459
|
+
if self.consecutive_forced_count < min_forced:
|
|
460
|
+
return False
|
|
461
|
+
|
|
462
|
+
bad_streak = self.guardrail_streak()
|
|
463
|
+
bad_threshold = max(1, PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK)
|
|
464
|
+
empty_threshold = max(1, PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK)
|
|
465
|
+
rejection_threshold = max(1, PROXY_FORCED_TOOL_DAMPENER_REJECTIONS)
|
|
466
|
+
rejection_pressure = self.arg_preflight_rejections >= rejection_threshold
|
|
467
|
+
if (
|
|
468
|
+
bad_streak < bad_threshold
|
|
469
|
+
and self.required_tool_miss_streak < empty_threshold
|
|
470
|
+
and not rejection_pressure
|
|
471
|
+
):
|
|
472
|
+
return False
|
|
473
|
+
|
|
474
|
+
self.forced_auto_cooldown_turns = max(1, PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS)
|
|
475
|
+
self.forced_dampener_triggers += 1
|
|
476
|
+
if rejection_pressure:
|
|
477
|
+
self.arg_preflight_rejections = 0
|
|
478
|
+
logger.warning(
|
|
479
|
+
"FORCED-TOOL DAMPENER: activated reason=%s forced=%d bad_streak=%d required_miss=%d rejection_pressure=%s auto_turns=%d",
|
|
480
|
+
reason,
|
|
481
|
+
self.consecutive_forced_count,
|
|
482
|
+
bad_streak,
|
|
483
|
+
self.required_tool_miss_streak,
|
|
484
|
+
rejection_pressure,
|
|
485
|
+
self.forced_auto_cooldown_turns,
|
|
486
|
+
)
|
|
487
|
+
return True
|
|
488
|
+
|
|
397
489
|
def should_release_tool_choice(self) -> bool:
|
|
398
490
|
"""Determine if tool_choice should be relaxed to 'auto' to break a loop.
|
|
399
491
|
|
|
@@ -784,14 +876,23 @@ async def lifespan(app: FastAPI):
|
|
|
784
876
|
_resolve_prune_target_fraction() * 100,
|
|
785
877
|
)
|
|
786
878
|
logger.info(
|
|
787
|
-
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s tool_narrowing=%s thinking_off_on_tools=%s contamination_breaker=%s(%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d)",
|
|
879
|
+
"Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d)",
|
|
788
880
|
PROXY_MALFORMED_TOOL_GUARDRAIL,
|
|
789
881
|
PROXY_MALFORMED_TOOL_STREAM_STRICT,
|
|
790
882
|
PROXY_FORCE_NON_STREAM,
|
|
883
|
+
PROXY_TOOL_ARGS_PREFLIGHT,
|
|
791
884
|
PROXY_TOOL_NARROWING,
|
|
792
885
|
PROXY_DISABLE_THINKING_ON_TOOL_TURNS,
|
|
886
|
+
PROXY_FORCED_TOOL_DAMPENER,
|
|
887
|
+
PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED,
|
|
888
|
+
PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK,
|
|
889
|
+
PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK,
|
|
890
|
+
PROXY_FORCED_TOOL_DAMPENER_REJECTIONS,
|
|
891
|
+
PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS,
|
|
793
892
|
PROXY_SESSION_CONTAMINATION_BREAKER,
|
|
794
893
|
PROXY_SESSION_CONTAMINATION_THRESHOLD,
|
|
894
|
+
PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD,
|
|
895
|
+
PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD,
|
|
795
896
|
PROXY_ANALYSIS_ONLY_ROUTE,
|
|
796
897
|
PROXY_ANALYSIS_ONLY_MIN_TOOLS,
|
|
797
898
|
PROXY_ANALYSIS_ONLY_MAX_MESSAGES,
|
|
@@ -1324,8 +1425,16 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
|
|
|
1324
1425
|
_record_last_assistant_tool_calls(anthropic_body, monitor)
|
|
1325
1426
|
last_user_has_tool_result = _last_user_has_tool_result(anthropic_body)
|
|
1326
1427
|
|
|
1327
|
-
# Check if loop breaker should override tool_choice
|
|
1328
|
-
if monitor.
|
|
1428
|
+
# Check if forced-tool dampener or loop breaker should override tool_choice
|
|
1429
|
+
if monitor.consume_forced_auto_turn():
|
|
1430
|
+
openai_body["tool_choice"] = "auto"
|
|
1431
|
+
monitor.consecutive_forced_count = 0
|
|
1432
|
+
monitor.no_progress_streak = 0
|
|
1433
|
+
logger.warning(
|
|
1434
|
+
"tool_choice set to 'auto' by FORCED-TOOL DAMPENER (remaining=%d)",
|
|
1435
|
+
monitor.forced_auto_cooldown_turns,
|
|
1436
|
+
)
|
|
1437
|
+
elif monitor.should_release_tool_choice():
|
|
1329
1438
|
openai_body["tool_choice"] = "auto"
|
|
1330
1439
|
monitor.consecutive_forced_count = 0
|
|
1331
1440
|
monitor.no_progress_streak = 0
|
|
@@ -1664,6 +1773,509 @@ def _openai_has_valid_tool_calls(openai_resp: dict, anthropic_body: dict) -> boo
|
|
|
1664
1773
|
)
|
|
1665
1774
|
|
|
1666
1775
|
|
|
1776
|
+
@dataclass
|
|
1777
|
+
class ToolResponseIssue:
|
|
1778
|
+
kind: str = ""
|
|
1779
|
+
reason: str = ""
|
|
1780
|
+
retry_hint: str = ""
|
|
1781
|
+
|
|
1782
|
+
def has_issue(self) -> bool:
|
|
1783
|
+
return bool(self.kind)
|
|
1784
|
+
|
|
1785
|
+
|
|
1786
|
+
_TOOL_ARG_MARKERS = (
|
|
1787
|
+
"</parameter",
|
|
1788
|
+
"<parameter",
|
|
1789
|
+
"<tool_call",
|
|
1790
|
+
"</tool_call",
|
|
1791
|
+
"<function=",
|
|
1792
|
+
"</think>",
|
|
1793
|
+
)
|
|
1794
|
+
|
|
1795
|
+
|
|
1796
|
+
def _iter_string_leaves(value):
|
|
1797
|
+
if isinstance(value, str):
|
|
1798
|
+
yield value
|
|
1799
|
+
elif isinstance(value, list):
|
|
1800
|
+
for item in value:
|
|
1801
|
+
yield from _iter_string_leaves(item)
|
|
1802
|
+
elif isinstance(value, dict):
|
|
1803
|
+
for item in value.values():
|
|
1804
|
+
yield from _iter_string_leaves(item)
|
|
1805
|
+
|
|
1806
|
+
|
|
1807
|
+
def _contains_tool_markup(value) -> bool:
|
|
1808
|
+
for text in _iter_string_leaves(value):
|
|
1809
|
+
lowered = text.lower()
|
|
1810
|
+
if any(marker in lowered for marker in _TOOL_ARG_MARKERS):
|
|
1811
|
+
return True
|
|
1812
|
+
return False
|
|
1813
|
+
|
|
1814
|
+
|
|
1815
|
+
def _strip_tool_markup_artifacts(text: str) -> str:
|
|
1816
|
+
cleaned = re.sub(r"</?parameter[^>]*>", "", text, flags=re.IGNORECASE)
|
|
1817
|
+
cleaned = re.sub(r"</?tool_call[^>]*>", "", cleaned, flags=re.IGNORECASE)
|
|
1818
|
+
cleaned = re.sub(r"</?think>", "", cleaned, flags=re.IGNORECASE)
|
|
1819
|
+
cleaned = re.sub(r"<function=[^>]*>", "", cleaned, flags=re.IGNORECASE)
|
|
1820
|
+
return cleaned.strip()
|
|
1821
|
+
|
|
1822
|
+
|
|
1823
|
+
def _sanitize_markup_value(value):
|
|
1824
|
+
if isinstance(value, str):
|
|
1825
|
+
cleaned = _strip_tool_markup_artifacts(value)
|
|
1826
|
+
return cleaned, cleaned != value
|
|
1827
|
+
if isinstance(value, list):
|
|
1828
|
+
changed = False
|
|
1829
|
+
cleaned_items = []
|
|
1830
|
+
for item in value:
|
|
1831
|
+
cleaned_item, item_changed = _sanitize_markup_value(item)
|
|
1832
|
+
cleaned_items.append(cleaned_item)
|
|
1833
|
+
changed = changed or item_changed
|
|
1834
|
+
return cleaned_items, changed
|
|
1835
|
+
if isinstance(value, dict):
|
|
1836
|
+
changed = False
|
|
1837
|
+
cleaned_obj = {}
|
|
1838
|
+
for key, item in value.items():
|
|
1839
|
+
cleaned_item, item_changed = _sanitize_markup_value(item)
|
|
1840
|
+
cleaned_obj[key] = cleaned_item
|
|
1841
|
+
changed = changed or item_changed
|
|
1842
|
+
return cleaned_obj, changed
|
|
1843
|
+
return value, False
|
|
1844
|
+
|
|
1845
|
+
|
|
1846
|
+
def _repair_tool_call_markup(openai_resp: dict) -> tuple[dict, int]:
|
|
1847
|
+
if not _openai_has_tool_calls(openai_resp):
|
|
1848
|
+
return openai_resp, 0
|
|
1849
|
+
|
|
1850
|
+
choice, message = _extract_openai_choice(openai_resp)
|
|
1851
|
+
tool_calls = message.get("tool_calls") or []
|
|
1852
|
+
if not tool_calls:
|
|
1853
|
+
return openai_resp, 0
|
|
1854
|
+
|
|
1855
|
+
repaired_tool_calls = []
|
|
1856
|
+
repaired_count = 0
|
|
1857
|
+
|
|
1858
|
+
for tool_call in tool_calls:
|
|
1859
|
+
fn = tool_call.get("function") if isinstance(tool_call, dict) else {}
|
|
1860
|
+
if not isinstance(fn, dict):
|
|
1861
|
+
fn = {}
|
|
1862
|
+
raw_args = fn.get("arguments", "{}")
|
|
1863
|
+
|
|
1864
|
+
if isinstance(raw_args, (dict, list)):
|
|
1865
|
+
parsed_args = raw_args
|
|
1866
|
+
parse_recovered = False
|
|
1867
|
+
else:
|
|
1868
|
+
try:
|
|
1869
|
+
parsed_args = json.loads(str(raw_args))
|
|
1870
|
+
parse_recovered = False
|
|
1871
|
+
except json.JSONDecodeError:
|
|
1872
|
+
cleaned_text = _strip_tool_markup_artifacts(str(raw_args))
|
|
1873
|
+
candidate = cleaned_text
|
|
1874
|
+
if "{" in candidate and "}" in candidate:
|
|
1875
|
+
candidate = candidate[
|
|
1876
|
+
candidate.find("{") : candidate.rfind("}") + 1
|
|
1877
|
+
]
|
|
1878
|
+
try:
|
|
1879
|
+
parsed_args = json.loads(candidate)
|
|
1880
|
+
parse_recovered = True
|
|
1881
|
+
except json.JSONDecodeError:
|
|
1882
|
+
repaired_tool_calls.append(tool_call)
|
|
1883
|
+
continue
|
|
1884
|
+
|
|
1885
|
+
cleaned_args, changed = _sanitize_markup_value(parsed_args)
|
|
1886
|
+
if parse_recovered:
|
|
1887
|
+
changed = True
|
|
1888
|
+
if not changed:
|
|
1889
|
+
repaired_tool_calls.append(tool_call)
|
|
1890
|
+
continue
|
|
1891
|
+
|
|
1892
|
+
new_tool_call = dict(tool_call)
|
|
1893
|
+
new_fn = dict(fn)
|
|
1894
|
+
new_fn["arguments"] = json.dumps(cleaned_args, separators=(",", ":"))
|
|
1895
|
+
new_tool_call["function"] = new_fn
|
|
1896
|
+
repaired_tool_calls.append(new_tool_call)
|
|
1897
|
+
repaired_count += 1
|
|
1898
|
+
|
|
1899
|
+
if repaired_count == 0:
|
|
1900
|
+
return openai_resp, 0
|
|
1901
|
+
|
|
1902
|
+
repaired_response = dict(openai_resp)
|
|
1903
|
+
choices = list(openai_resp.get("choices") or [])
|
|
1904
|
+
if not choices:
|
|
1905
|
+
return openai_resp, 0
|
|
1906
|
+
|
|
1907
|
+
updated_choice = dict(choice)
|
|
1908
|
+
updated_message = dict(message)
|
|
1909
|
+
updated_message["tool_calls"] = repaired_tool_calls
|
|
1910
|
+
updated_choice["message"] = updated_message
|
|
1911
|
+
choices[0] = updated_choice
|
|
1912
|
+
repaired_response["choices"] = choices
|
|
1913
|
+
return repaired_response, repaired_count
|
|
1914
|
+
|
|
1915
|
+
|
|
1916
|
+
def _default_required_value(field_name: str, field_schema: dict):
|
|
1917
|
+
expected_type = field_schema.get("type") if isinstance(field_schema, dict) else None
|
|
1918
|
+
if isinstance(expected_type, list):
|
|
1919
|
+
expected_type = expected_type[0] if expected_type else "string"
|
|
1920
|
+
|
|
1921
|
+
if expected_type == "integer":
|
|
1922
|
+
return 0
|
|
1923
|
+
if expected_type == "number":
|
|
1924
|
+
return 0
|
|
1925
|
+
if expected_type == "boolean":
|
|
1926
|
+
return False
|
|
1927
|
+
if expected_type == "object":
|
|
1928
|
+
return {"value": "__uap_required__"}
|
|
1929
|
+
if expected_type == "array":
|
|
1930
|
+
return ["__uap_required__"]
|
|
1931
|
+
|
|
1932
|
+
key = (field_name or "").lower()
|
|
1933
|
+
if key in {"command", "cmd"}:
|
|
1934
|
+
return "pwd"
|
|
1935
|
+
if key == "cron":
|
|
1936
|
+
return "* * * * *"
|
|
1937
|
+
if key in {"pattern", "glob"}:
|
|
1938
|
+
return "*"
|
|
1939
|
+
if key == "subject":
|
|
1940
|
+
return "task"
|
|
1941
|
+
if key in {"path", "file", "filepath", "file_path"} or key.endswith("_path"):
|
|
1942
|
+
return "."
|
|
1943
|
+
return "__uap_required__"
|
|
1944
|
+
|
|
1945
|
+
|
|
1946
|
+
def _repair_required_tool_args(
|
|
1947
|
+
openai_resp: dict, anthropic_body: dict
|
|
1948
|
+
) -> tuple[dict, int]:
|
|
1949
|
+
if not _openai_has_tool_calls(openai_resp):
|
|
1950
|
+
return openai_resp, 0
|
|
1951
|
+
|
|
1952
|
+
tools_by_name = _anthropic_tools_by_name(anthropic_body)
|
|
1953
|
+
if not tools_by_name:
|
|
1954
|
+
return openai_resp, 0
|
|
1955
|
+
|
|
1956
|
+
choice, message = _extract_openai_choice(openai_resp)
|
|
1957
|
+
tool_calls = message.get("tool_calls") or []
|
|
1958
|
+
if not tool_calls:
|
|
1959
|
+
return openai_resp, 0
|
|
1960
|
+
|
|
1961
|
+
repaired_tool_calls = []
|
|
1962
|
+
repaired_count = 0
|
|
1963
|
+
|
|
1964
|
+
for tool_call in tool_calls:
|
|
1965
|
+
fn = tool_call.get("function") if isinstance(tool_call, dict) else {}
|
|
1966
|
+
if not isinstance(fn, dict):
|
|
1967
|
+
fn = {}
|
|
1968
|
+
tool_name = fn.get("name", "")
|
|
1969
|
+
schema = tools_by_name.get(tool_name, {})
|
|
1970
|
+
required = schema.get("required", []) if isinstance(schema, dict) else []
|
|
1971
|
+
if not isinstance(required, list) or not required:
|
|
1972
|
+
repaired_tool_calls.append(tool_call)
|
|
1973
|
+
continue
|
|
1974
|
+
|
|
1975
|
+
properties = schema.get("properties", {}) if isinstance(schema, dict) else {}
|
|
1976
|
+
if not isinstance(properties, dict):
|
|
1977
|
+
properties = {}
|
|
1978
|
+
|
|
1979
|
+
raw_args = fn.get("arguments", "{}")
|
|
1980
|
+
if isinstance(raw_args, dict):
|
|
1981
|
+
parsed_args = dict(raw_args)
|
|
1982
|
+
parse_failed = False
|
|
1983
|
+
else:
|
|
1984
|
+
try:
|
|
1985
|
+
parsed_args = json.loads(str(raw_args))
|
|
1986
|
+
parse_failed = False
|
|
1987
|
+
except json.JSONDecodeError:
|
|
1988
|
+
parsed_args = {}
|
|
1989
|
+
parse_failed = True
|
|
1990
|
+
|
|
1991
|
+
if not isinstance(parsed_args, dict):
|
|
1992
|
+
parsed_args = {}
|
|
1993
|
+
parse_failed = True
|
|
1994
|
+
|
|
1995
|
+
changed = parse_failed
|
|
1996
|
+
for field in required:
|
|
1997
|
+
if not isinstance(field, str):
|
|
1998
|
+
continue
|
|
1999
|
+
current = parsed_args.get(field)
|
|
2000
|
+
if field not in parsed_args or _required_value_is_empty(current):
|
|
2001
|
+
field_schema = (
|
|
2002
|
+
properties.get(field, {})
|
|
2003
|
+
if isinstance(properties.get(field), dict)
|
|
2004
|
+
else {}
|
|
2005
|
+
)
|
|
2006
|
+
parsed_args[field] = _default_required_value(field, field_schema)
|
|
2007
|
+
changed = True
|
|
2008
|
+
|
|
2009
|
+
if not changed:
|
|
2010
|
+
repaired_tool_calls.append(tool_call)
|
|
2011
|
+
continue
|
|
2012
|
+
|
|
2013
|
+
new_tool_call = dict(tool_call)
|
|
2014
|
+
new_fn = dict(fn)
|
|
2015
|
+
new_fn["arguments"] = json.dumps(parsed_args, separators=(",", ":"))
|
|
2016
|
+
new_tool_call["function"] = new_fn
|
|
2017
|
+
repaired_tool_calls.append(new_tool_call)
|
|
2018
|
+
repaired_count += 1
|
|
2019
|
+
|
|
2020
|
+
if repaired_count == 0:
|
|
2021
|
+
return openai_resp, 0
|
|
2022
|
+
|
|
2023
|
+
repaired_response = dict(openai_resp)
|
|
2024
|
+
choices = list(openai_resp.get("choices") or [])
|
|
2025
|
+
if not choices:
|
|
2026
|
+
return openai_resp, 0
|
|
2027
|
+
|
|
2028
|
+
updated_choice = dict(choice)
|
|
2029
|
+
updated_message = dict(message)
|
|
2030
|
+
updated_message["tool_calls"] = repaired_tool_calls
|
|
2031
|
+
updated_choice["message"] = updated_message
|
|
2032
|
+
choices[0] = updated_choice
|
|
2033
|
+
repaired_response["choices"] = choices
|
|
2034
|
+
return repaired_response, repaired_count
|
|
2035
|
+
|
|
2036
|
+
|
|
2037
|
+
def _required_value_is_empty(value) -> bool:
|
|
2038
|
+
if value is None:
|
|
2039
|
+
return True
|
|
2040
|
+
if isinstance(value, str):
|
|
2041
|
+
return not value.strip()
|
|
2042
|
+
if isinstance(value, (list, dict)):
|
|
2043
|
+
return len(value) == 0
|
|
2044
|
+
return False
|
|
2045
|
+
|
|
2046
|
+
|
|
2047
|
+
def _matches_json_schema_type(value, expected_type) -> bool:
|
|
2048
|
+
if not expected_type:
|
|
2049
|
+
return True
|
|
2050
|
+
|
|
2051
|
+
if isinstance(expected_type, list):
|
|
2052
|
+
return any(
|
|
2053
|
+
_matches_json_schema_type(value, candidate) for candidate in expected_type
|
|
2054
|
+
)
|
|
2055
|
+
|
|
2056
|
+
if expected_type == "string":
|
|
2057
|
+
return isinstance(value, str)
|
|
2058
|
+
if expected_type == "integer":
|
|
2059
|
+
return isinstance(value, int) and not isinstance(value, bool)
|
|
2060
|
+
if expected_type == "number":
|
|
2061
|
+
return (isinstance(value, int) and not isinstance(value, bool)) or isinstance(
|
|
2062
|
+
value, float
|
|
2063
|
+
)
|
|
2064
|
+
if expected_type == "boolean":
|
|
2065
|
+
return isinstance(value, bool)
|
|
2066
|
+
if expected_type == "object":
|
|
2067
|
+
return isinstance(value, dict)
|
|
2068
|
+
if expected_type == "array":
|
|
2069
|
+
return isinstance(value, list)
|
|
2070
|
+
return True
|
|
2071
|
+
|
|
2072
|
+
|
|
2073
|
+
def _anthropic_tools_by_name(anthropic_body: dict) -> dict[str, dict]:
|
|
2074
|
+
tool_map: dict[str, dict] = {}
|
|
2075
|
+
for tool in anthropic_body.get("tools", []) or []:
|
|
2076
|
+
if not isinstance(tool, dict):
|
|
2077
|
+
continue
|
|
2078
|
+
name = tool.get("name", "")
|
|
2079
|
+
if not name:
|
|
2080
|
+
continue
|
|
2081
|
+
schema = tool.get("input_schema")
|
|
2082
|
+
if not isinstance(schema, dict):
|
|
2083
|
+
schema = (
|
|
2084
|
+
tool.get("parameters")
|
|
2085
|
+
if isinstance(tool.get("parameters"), dict)
|
|
2086
|
+
else {}
|
|
2087
|
+
)
|
|
2088
|
+
tool_map[name] = schema or {}
|
|
2089
|
+
return tool_map
|
|
2090
|
+
|
|
2091
|
+
|
|
2092
|
+
def _validate_tool_call_arguments(
|
|
2093
|
+
tool_name: str,
|
|
2094
|
+
raw_arguments,
|
|
2095
|
+
tool_schema: dict,
|
|
2096
|
+
allowed_tools: set[str],
|
|
2097
|
+
) -> ToolResponseIssue:
|
|
2098
|
+
if allowed_tools and tool_name not in allowed_tools:
|
|
2099
|
+
return ToolResponseIssue(
|
|
2100
|
+
kind="invalid_tool_args",
|
|
2101
|
+
reason=f"unknown tool '{tool_name}'",
|
|
2102
|
+
retry_hint="Use exactly one tool from the provided tool list.",
|
|
2103
|
+
)
|
|
2104
|
+
|
|
2105
|
+
if isinstance(raw_arguments, (dict, list)):
|
|
2106
|
+
arg_text = json.dumps(raw_arguments)
|
|
2107
|
+
elif raw_arguments is None:
|
|
2108
|
+
arg_text = "{}"
|
|
2109
|
+
else:
|
|
2110
|
+
arg_text = str(raw_arguments)
|
|
2111
|
+
|
|
2112
|
+
try:
|
|
2113
|
+
parsed = json.loads(arg_text)
|
|
2114
|
+
except json.JSONDecodeError as exc:
|
|
2115
|
+
return ToolResponseIssue(
|
|
2116
|
+
kind="invalid_tool_args",
|
|
2117
|
+
reason=f"invalid JSON arguments for '{tool_name}': {exc.msg}",
|
|
2118
|
+
retry_hint=(
|
|
2119
|
+
f"Emit exactly one `{tool_name}` tool call with `arguments` as a strict JSON object. "
|
|
2120
|
+
"Do not include prose before or after JSON."
|
|
2121
|
+
),
|
|
2122
|
+
)
|
|
2123
|
+
|
|
2124
|
+
if not isinstance(parsed, dict):
|
|
2125
|
+
return ToolResponseIssue(
|
|
2126
|
+
kind="invalid_tool_args",
|
|
2127
|
+
reason=f"arguments for '{tool_name}' must be a JSON object",
|
|
2128
|
+
retry_hint=(
|
|
2129
|
+
f"Emit exactly one `{tool_name}` tool call with `arguments` set to a JSON object (not a string or list)."
|
|
2130
|
+
),
|
|
2131
|
+
)
|
|
2132
|
+
|
|
2133
|
+
if _contains_tool_markup(parsed):
|
|
2134
|
+
return ToolResponseIssue(
|
|
2135
|
+
kind="invalid_tool_args",
|
|
2136
|
+
reason=f"arguments for '{tool_name}' contain malformed markup fragments",
|
|
2137
|
+
retry_hint=(
|
|
2138
|
+
f"Remove tag fragments from `{tool_name}` arguments and emit only plain JSON key/value pairs."
|
|
2139
|
+
),
|
|
2140
|
+
)
|
|
2141
|
+
|
|
2142
|
+
if not isinstance(tool_schema, dict):
|
|
2143
|
+
tool_schema = {}
|
|
2144
|
+
|
|
2145
|
+
required = tool_schema.get("required", [])
|
|
2146
|
+
if not isinstance(required, list):
|
|
2147
|
+
required = []
|
|
2148
|
+
|
|
2149
|
+
properties = tool_schema.get("properties", {})
|
|
2150
|
+
if not isinstance(properties, dict):
|
|
2151
|
+
properties = {}
|
|
2152
|
+
|
|
2153
|
+
missing: list[str] = []
|
|
2154
|
+
empty: list[str] = []
|
|
2155
|
+
wrong_type: list[str] = []
|
|
2156
|
+
|
|
2157
|
+
for field in required:
|
|
2158
|
+
if not isinstance(field, str):
|
|
2159
|
+
continue
|
|
2160
|
+
|
|
2161
|
+
if field not in parsed:
|
|
2162
|
+
missing.append(field)
|
|
2163
|
+
continue
|
|
2164
|
+
|
|
2165
|
+
value = parsed.get(field)
|
|
2166
|
+
if _required_value_is_empty(value):
|
|
2167
|
+
empty.append(field)
|
|
2168
|
+
continue
|
|
2169
|
+
|
|
2170
|
+
schema = (
|
|
2171
|
+
properties.get(field, {}) if isinstance(properties.get(field), dict) else {}
|
|
2172
|
+
)
|
|
2173
|
+
expected_type = schema.get("type")
|
|
2174
|
+
if expected_type and not _matches_json_schema_type(value, expected_type):
|
|
2175
|
+
wrong_type.append(field)
|
|
2176
|
+
continue
|
|
2177
|
+
|
|
2178
|
+
min_length = schema.get("minLength")
|
|
2179
|
+
if (
|
|
2180
|
+
isinstance(min_length, int)
|
|
2181
|
+
and isinstance(value, str)
|
|
2182
|
+
and len(value.strip()) < min_length
|
|
2183
|
+
):
|
|
2184
|
+
empty.append(field)
|
|
2185
|
+
continue
|
|
2186
|
+
|
|
2187
|
+
min_items = schema.get("minItems")
|
|
2188
|
+
if (
|
|
2189
|
+
isinstance(min_items, int)
|
|
2190
|
+
and isinstance(value, list)
|
|
2191
|
+
and len(value) < min_items
|
|
2192
|
+
):
|
|
2193
|
+
empty.append(field)
|
|
2194
|
+
|
|
2195
|
+
if missing or empty or wrong_type:
|
|
2196
|
+
details = []
|
|
2197
|
+
if missing:
|
|
2198
|
+
details.append(f"missing: {', '.join(missing)}")
|
|
2199
|
+
if empty:
|
|
2200
|
+
details.append(f"empty: {', '.join(empty)}")
|
|
2201
|
+
if wrong_type:
|
|
2202
|
+
details.append(f"type mismatch: {', '.join(wrong_type)}")
|
|
2203
|
+
required_fields = ", ".join(str(f) for f in required if isinstance(f, str))
|
|
2204
|
+
required_hint = (
|
|
2205
|
+
f"Required fields must be non-empty: {required_fields}. "
|
|
2206
|
+
if required_fields
|
|
2207
|
+
else ""
|
|
2208
|
+
)
|
|
2209
|
+
return ToolResponseIssue(
|
|
2210
|
+
kind="invalid_tool_args",
|
|
2211
|
+
reason=f"invalid arguments for '{tool_name}' ({'; '.join(details)})",
|
|
2212
|
+
retry_hint=(
|
|
2213
|
+
f"Emit exactly one `{tool_name}` tool call with strict JSON arguments. "
|
|
2214
|
+
f"{required_hint}Do not include protocol tags or commentary."
|
|
2215
|
+
).strip(),
|
|
2216
|
+
)
|
|
2217
|
+
|
|
2218
|
+
return ToolResponseIssue()
|
|
2219
|
+
|
|
2220
|
+
|
|
2221
|
+
def _classify_tool_response_issue(
|
|
2222
|
+
openai_resp: dict,
|
|
2223
|
+
anthropic_body: dict,
|
|
2224
|
+
required_tool_choice: bool = False,
|
|
2225
|
+
) -> ToolResponseIssue:
|
|
2226
|
+
if "tools" not in anthropic_body:
|
|
2227
|
+
return ToolResponseIssue()
|
|
2228
|
+
|
|
2229
|
+
if _is_malformed_tool_response(openai_resp, anthropic_body):
|
|
2230
|
+
return ToolResponseIssue(
|
|
2231
|
+
kind="malformed_payload",
|
|
2232
|
+
reason="malformed pseudo tool payload detected in assistant text",
|
|
2233
|
+
retry_hint=(
|
|
2234
|
+
"Return exactly one valid tool call with strict JSON arguments. "
|
|
2235
|
+
"Do not output raw protocol tags, schema fragments, or apologies about formatting."
|
|
2236
|
+
),
|
|
2237
|
+
)
|
|
2238
|
+
|
|
2239
|
+
has_tool_calls = _openai_has_tool_calls(openai_resp)
|
|
2240
|
+
if not has_tool_calls:
|
|
2241
|
+
if required_tool_choice:
|
|
2242
|
+
text = _openai_message_text(openai_resp).strip()
|
|
2243
|
+
if not text or len(text) <= 48:
|
|
2244
|
+
return ToolResponseIssue(
|
|
2245
|
+
kind="required_tool_miss",
|
|
2246
|
+
reason="required tool turn returned no tool calls",
|
|
2247
|
+
retry_hint=(
|
|
2248
|
+
"A tool call is mandatory for this turn. Emit exactly one valid tool call now "
|
|
2249
|
+
"with a strict JSON object in `arguments`."
|
|
2250
|
+
),
|
|
2251
|
+
)
|
|
2252
|
+
return ToolResponseIssue()
|
|
2253
|
+
|
|
2254
|
+
if not PROXY_TOOL_ARGS_PREFLIGHT:
|
|
2255
|
+
return ToolResponseIssue()
|
|
2256
|
+
|
|
2257
|
+
_, message = _extract_openai_choice(openai_resp)
|
|
2258
|
+
tool_calls = message.get("tool_calls") or []
|
|
2259
|
+
tools_by_name = _anthropic_tools_by_name(anthropic_body)
|
|
2260
|
+
allowed_tools = set(tools_by_name.keys())
|
|
2261
|
+
|
|
2262
|
+
for tc in tool_calls:
|
|
2263
|
+
fn = tc.get("function") if isinstance(tc, dict) else {}
|
|
2264
|
+
if not isinstance(fn, dict):
|
|
2265
|
+
fn = {}
|
|
2266
|
+
tool_name = fn.get("name", "")
|
|
2267
|
+
issue = _validate_tool_call_arguments(
|
|
2268
|
+
tool_name,
|
|
2269
|
+
fn.get("arguments", "{}"),
|
|
2270
|
+
tools_by_name.get(tool_name, {}),
|
|
2271
|
+
allowed_tools,
|
|
2272
|
+
)
|
|
2273
|
+
if issue.has_issue():
|
|
2274
|
+
return issue
|
|
2275
|
+
|
|
2276
|
+
return ToolResponseIssue()
|
|
2277
|
+
|
|
2278
|
+
|
|
1667
2279
|
def _looks_malformed_tool_payload(text: str) -> bool:
|
|
1668
2280
|
if not text:
|
|
1669
2281
|
return False
|
|
@@ -1724,7 +2336,9 @@ def _is_malformed_tool_response(openai_resp: dict, anthropic_body: dict) -> bool
|
|
|
1724
2336
|
return _looks_malformed_tool_payload(_openai_message_text(openai_resp))
|
|
1725
2337
|
|
|
1726
2338
|
|
|
1727
|
-
def _build_malformed_retry_body(
|
|
2339
|
+
def _build_malformed_retry_body(
|
|
2340
|
+
openai_body: dict, anthropic_body: dict, retry_hint: str = ""
|
|
2341
|
+
) -> dict:
|
|
1728
2342
|
retry_body = dict(openai_body)
|
|
1729
2343
|
retry_body["stream"] = False
|
|
1730
2344
|
retry_body["tool_choice"] = "required"
|
|
@@ -1759,6 +2373,16 @@ def _build_malformed_retry_body(openai_body: dict, anthropic_body: dict) -> dict
|
|
|
1759
2373
|
if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
|
|
1760
2374
|
retry_body["enable_thinking"] = False
|
|
1761
2375
|
|
|
2376
|
+
if retry_hint:
|
|
2377
|
+
repair_prompt = (
|
|
2378
|
+
"[TOOL CALL REPAIR]\n"
|
|
2379
|
+
f"{retry_hint}\n"
|
|
2380
|
+
"Return exactly one valid tool call object and no explanatory prose."
|
|
2381
|
+
)
|
|
2382
|
+
retry_messages = list(retry_body.get("messages", []))
|
|
2383
|
+
retry_messages.append({"role": "system", "content": repair_prompt})
|
|
2384
|
+
retry_body["messages"] = retry_messages
|
|
2385
|
+
|
|
1762
2386
|
return retry_body
|
|
1763
2387
|
|
|
1764
2388
|
|
|
@@ -1847,27 +2471,68 @@ async def _apply_malformed_tool_guardrail(
|
|
|
1847
2471
|
if not PROXY_MALFORMED_TOOL_GUARDRAIL:
|
|
1848
2472
|
return openai_resp
|
|
1849
2473
|
|
|
1850
|
-
|
|
1851
|
-
|
|
2474
|
+
working_resp = openai_resp
|
|
2475
|
+
repair_count = 0
|
|
2476
|
+
if PROXY_TOOL_ARGS_PREFLIGHT and _openai_has_tool_calls(openai_resp):
|
|
2477
|
+
working_resp, markup_repairs = _repair_tool_call_markup(openai_resp)
|
|
2478
|
+
working_resp, required_repairs = _repair_required_tool_args(
|
|
2479
|
+
working_resp, anthropic_body
|
|
2480
|
+
)
|
|
2481
|
+
repair_count = markup_repairs + required_repairs
|
|
2482
|
+
|
|
2483
|
+
required_tool_choice = openai_body.get("tool_choice") == "required"
|
|
2484
|
+
has_tool_calls = _openai_has_tool_calls(working_resp)
|
|
2485
|
+
if required_tool_choice and not has_tool_calls:
|
|
2486
|
+
monitor.required_tool_miss_streak += 1
|
|
2487
|
+
|
|
2488
|
+
issue = _classify_tool_response_issue(
|
|
2489
|
+
working_resp,
|
|
2490
|
+
anthropic_body,
|
|
2491
|
+
required_tool_choice=required_tool_choice,
|
|
2492
|
+
)
|
|
2493
|
+
if not issue.has_issue():
|
|
2494
|
+
if required_tool_choice and not has_tool_calls:
|
|
2495
|
+
monitor.maybe_activate_forced_tool_dampener("required_tool_miss")
|
|
2496
|
+
if has_tool_calls:
|
|
1852
2497
|
monitor.malformed_tool_streak = 0
|
|
1853
|
-
|
|
2498
|
+
monitor.invalid_tool_call_streak = 0
|
|
2499
|
+
monitor.required_tool_miss_streak = 0
|
|
2500
|
+
if repair_count > 0:
|
|
2501
|
+
monitor.arg_preflight_repairs += repair_count
|
|
2502
|
+
logger.info(
|
|
2503
|
+
"TOOL ARG REPAIR: session=%s repaired=%d source=initial",
|
|
2504
|
+
session_id,
|
|
2505
|
+
repair_count,
|
|
2506
|
+
)
|
|
2507
|
+
return working_resp
|
|
1854
2508
|
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
2509
|
+
if issue.kind == "malformed_payload":
|
|
2510
|
+
monitor.malformed_tool_streak += 1
|
|
2511
|
+
elif issue.kind == "invalid_tool_args":
|
|
2512
|
+
monitor.invalid_tool_call_streak += 1
|
|
2513
|
+
monitor.arg_preflight_rejections += 1
|
|
2514
|
+
|
|
2515
|
+
monitor.maybe_activate_forced_tool_dampener(issue.kind)
|
|
2516
|
+
excerpt = _openai_message_text(working_resp)[:220].replace("\n", " ")
|
|
1861
2517
|
logger.warning(
|
|
1862
|
-
"
|
|
2518
|
+
"TOOL RESPONSE ISSUE: session=%s kind=%s reason=%s malformed=%d invalid=%d required_miss=%d excerpt=%.220s",
|
|
1863
2519
|
session_id,
|
|
2520
|
+
issue.kind,
|
|
2521
|
+
issue.reason,
|
|
1864
2522
|
monitor.malformed_tool_streak,
|
|
2523
|
+
monitor.invalid_tool_call_streak,
|
|
2524
|
+
monitor.required_tool_miss_streak,
|
|
1865
2525
|
excerpt,
|
|
1866
2526
|
)
|
|
1867
2527
|
|
|
1868
2528
|
attempts = max(0, PROXY_MALFORMED_TOOL_RETRY_MAX)
|
|
2529
|
+
current_issue = issue
|
|
1869
2530
|
for attempt in range(attempts):
|
|
1870
|
-
retry_body = _build_malformed_retry_body(
|
|
2531
|
+
retry_body = _build_malformed_retry_body(
|
|
2532
|
+
openai_body,
|
|
2533
|
+
anthropic_body,
|
|
2534
|
+
retry_hint=current_issue.retry_hint,
|
|
2535
|
+
)
|
|
1871
2536
|
retry_resp = await client.post(
|
|
1872
2537
|
f"{LLAMA_CPP_BASE}/chat/completions",
|
|
1873
2538
|
json=retry_body,
|
|
@@ -1883,40 +2548,71 @@ async def _apply_malformed_tool_guardrail(
|
|
|
1883
2548
|
continue
|
|
1884
2549
|
|
|
1885
2550
|
retry_json = retry_resp.json()
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
2551
|
+
retry_working = retry_json
|
|
2552
|
+
retry_repairs = 0
|
|
2553
|
+
if PROXY_TOOL_ARGS_PREFLIGHT and _openai_has_tool_calls(retry_json):
|
|
2554
|
+
retry_working, retry_markup_repairs = _repair_tool_call_markup(retry_json)
|
|
2555
|
+
retry_working, retry_required_repairs = _repair_required_tool_args(
|
|
2556
|
+
retry_working, anthropic_body
|
|
1892
2557
|
)
|
|
1893
|
-
|
|
2558
|
+
retry_repairs = retry_markup_repairs + retry_required_repairs
|
|
1894
2559
|
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
attempt + 1,
|
|
1900
|
-
attempts,
|
|
1901
|
-
retry_invalid_reason,
|
|
1902
|
-
)
|
|
2560
|
+
retry_has_tool_calls = _openai_has_tool_calls(retry_working)
|
|
2561
|
+
retry_required = retry_body.get("tool_choice") == "required"
|
|
2562
|
+
if retry_required and not retry_has_tool_calls:
|
|
2563
|
+
monitor.required_tool_miss_streak += 1
|
|
1903
2564
|
|
|
1904
|
-
|
|
2565
|
+
retry_issue = _classify_tool_response_issue(
|
|
2566
|
+
retry_working,
|
|
2567
|
+
anthropic_body,
|
|
2568
|
+
required_tool_choice=retry_required,
|
|
2569
|
+
)
|
|
2570
|
+
|
|
2571
|
+
if not retry_issue.has_issue():
|
|
1905
2572
|
monitor.malformed_tool_streak = 0
|
|
2573
|
+
monitor.invalid_tool_call_streak = 0
|
|
2574
|
+
monitor.required_tool_miss_streak = 0
|
|
1906
2575
|
logger.info(
|
|
1907
|
-
"
|
|
2576
|
+
"TOOL RESPONSE RETRY success: kind=%s attempt=%d/%d",
|
|
2577
|
+
current_issue.kind,
|
|
1908
2578
|
attempt + 1,
|
|
1909
2579
|
attempts,
|
|
1910
2580
|
)
|
|
1911
|
-
|
|
2581
|
+
if retry_repairs > 0:
|
|
2582
|
+
monitor.arg_preflight_repairs += retry_repairs
|
|
2583
|
+
logger.info(
|
|
2584
|
+
"TOOL ARG REPAIR: session=%s repaired=%d source=retry",
|
|
2585
|
+
session_id,
|
|
2586
|
+
retry_repairs,
|
|
2587
|
+
)
|
|
2588
|
+
return retry_working
|
|
1912
2589
|
|
|
1913
|
-
|
|
2590
|
+
if retry_issue.kind == "malformed_payload":
|
|
2591
|
+
monitor.malformed_tool_streak += 1
|
|
2592
|
+
elif retry_issue.kind == "invalid_tool_args":
|
|
2593
|
+
monitor.invalid_tool_call_streak += 1
|
|
2594
|
+
monitor.arg_preflight_rejections += 1
|
|
2595
|
+
|
|
2596
|
+
monitor.maybe_activate_forced_tool_dampener(retry_issue.kind)
|
|
2597
|
+
logger.warning(
|
|
2598
|
+
"TOOL RESPONSE RETRY invalid: session=%s attempt=%d/%d kind=%s reason=%s",
|
|
2599
|
+
session_id,
|
|
2600
|
+
attempt + 1,
|
|
2601
|
+
attempts,
|
|
2602
|
+
retry_issue.kind,
|
|
2603
|
+
retry_issue.reason,
|
|
2604
|
+
)
|
|
2605
|
+
current_issue = retry_issue
|
|
1914
2606
|
|
|
1915
2607
|
logger.error(
|
|
1916
|
-
"
|
|
2608
|
+
"TOOL RESPONSE issue persisted after retries (session=%s kind=%s malformed=%d invalid=%d required_miss=%d); returning clean guardrail response",
|
|
1917
2609
|
session_id,
|
|
2610
|
+
current_issue.kind or issue.kind,
|
|
2611
|
+
monitor.malformed_tool_streak,
|
|
2612
|
+
monitor.invalid_tool_call_streak,
|
|
2613
|
+
monitor.required_tool_miss_streak,
|
|
1918
2614
|
)
|
|
1919
|
-
return _build_clean_guardrail_openai_response(
|
|
2615
|
+
return _build_clean_guardrail_openai_response(working_resp)
|
|
1920
2616
|
|
|
1921
2617
|
|
|
1922
2618
|
def _maybe_apply_session_contamination_breaker(
|
|
@@ -1926,13 +2622,28 @@ def _maybe_apply_session_contamination_breaker(
|
|
|
1926
2622
|
return anthropic_body
|
|
1927
2623
|
|
|
1928
2624
|
threshold = max(1, PROXY_SESSION_CONTAMINATION_THRESHOLD)
|
|
1929
|
-
|
|
2625
|
+
forced_threshold = max(1, PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD)
|
|
2626
|
+
required_miss_threshold = max(
|
|
2627
|
+
1, PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD
|
|
2628
|
+
)
|
|
2629
|
+
bad_streak = monitor.guardrail_streak()
|
|
2630
|
+
should_reset = (
|
|
2631
|
+
bad_streak >= threshold
|
|
2632
|
+
or (
|
|
2633
|
+
bad_streak >= max(1, threshold - 1)
|
|
2634
|
+
and monitor.consecutive_forced_count >= forced_threshold
|
|
2635
|
+
)
|
|
2636
|
+
or monitor.required_tool_miss_streak >= required_miss_threshold
|
|
2637
|
+
)
|
|
2638
|
+
if not should_reset:
|
|
1930
2639
|
return anthropic_body
|
|
1931
2640
|
|
|
1932
2641
|
messages = anthropic_body.get("messages", [])
|
|
1933
2642
|
keep_last = max(2, PROXY_SESSION_CONTAMINATION_KEEP_LAST)
|
|
1934
2643
|
if len(messages) <= keep_last + 1:
|
|
1935
2644
|
monitor.malformed_tool_streak = 0
|
|
2645
|
+
monitor.invalid_tool_call_streak = 0
|
|
2646
|
+
monitor.required_tool_miss_streak = 0
|
|
1936
2647
|
return anthropic_body
|
|
1937
2648
|
|
|
1938
2649
|
head = messages[:1]
|
|
@@ -1940,22 +2651,30 @@ def _maybe_apply_session_contamination_breaker(
|
|
|
1940
2651
|
reset_marker = {
|
|
1941
2652
|
"role": "user",
|
|
1942
2653
|
"content": (
|
|
1943
|
-
"[SESSION RESET:
|
|
1944
|
-
"
|
|
2654
|
+
"[SESSION RESET: tool-call quality degraded in earlier turns. "
|
|
2655
|
+
"Continue from the recent context and emit valid tool calls with strict JSON arguments only.]"
|
|
1945
2656
|
),
|
|
1946
2657
|
}
|
|
1947
2658
|
|
|
1948
2659
|
updated_body = dict(anthropic_body)
|
|
1949
2660
|
updated_body["messages"] = head + [reset_marker] + tail
|
|
1950
2661
|
|
|
2662
|
+
forced_before = monitor.consecutive_forced_count
|
|
2663
|
+
required_miss_before = monitor.required_tool_miss_streak
|
|
1951
2664
|
monitor.contamination_resets += 1
|
|
1952
2665
|
monitor.malformed_tool_streak = 0
|
|
2666
|
+
monitor.invalid_tool_call_streak = 0
|
|
2667
|
+
monitor.required_tool_miss_streak = 0
|
|
1953
2668
|
monitor.no_progress_streak = 0
|
|
1954
2669
|
monitor.consecutive_forced_count = 0
|
|
2670
|
+
monitor.forced_auto_cooldown_turns = 0
|
|
1955
2671
|
logger.warning(
|
|
1956
|
-
"SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages",
|
|
2672
|
+
"SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages (bad_streak=%d forced=%d required_miss=%d)",
|
|
1957
2673
|
session_id,
|
|
1958
2674
|
len(updated_body["messages"]),
|
|
2675
|
+
bad_streak,
|
|
2676
|
+
forced_before,
|
|
2677
|
+
required_miss_before,
|
|
1959
2678
|
)
|
|
1960
2679
|
|
|
1961
2680
|
return updated_body
|
|
@@ -2322,8 +3041,17 @@ async def stream_anthropic_response(
|
|
|
2322
3041
|
]
|
|
2323
3042
|
}
|
|
2324
3043
|
|
|
2325
|
-
|
|
3044
|
+
stream_issue = _classify_tool_response_issue(
|
|
3045
|
+
synthetic_openai_resp,
|
|
3046
|
+
anthropic_body,
|
|
3047
|
+
required_tool_choice=False,
|
|
3048
|
+
)
|
|
3049
|
+
|
|
3050
|
+
if stream_issue.kind == "malformed_payload":
|
|
2326
3051
|
monitor.malformed_tool_streak += 1
|
|
3052
|
+
elif stream_issue.kind == "invalid_tool_args":
|
|
3053
|
+
monitor.invalid_tool_call_streak += 1
|
|
3054
|
+
monitor.arg_preflight_rejections += 1
|
|
2327
3055
|
elif (
|
|
2328
3056
|
"tools" in anthropic_body
|
|
2329
3057
|
and not tool_calls_by_index
|
|
@@ -2335,6 +3063,8 @@ async def stream_anthropic_response(
|
|
|
2335
3063
|
monitor.malformed_tool_streak += 1
|
|
2336
3064
|
elif tool_calls_by_index:
|
|
2337
3065
|
monitor.malformed_tool_streak = 0
|
|
3066
|
+
monitor.invalid_tool_call_streak = 0
|
|
3067
|
+
monitor.required_tool_miss_streak = 0
|
|
2338
3068
|
|
|
2339
3069
|
if _is_unexpected_end_turn(synthetic_openai_resp, anthropic_body):
|
|
2340
3070
|
monitor.unexpected_end_turn_count += 1
|
|
@@ -2742,6 +3472,8 @@ async def messages(request: Request):
|
|
|
2742
3472
|
monitor.malformed_tool_streak += 1
|
|
2743
3473
|
elif _openai_has_tool_calls(openai_resp):
|
|
2744
3474
|
monitor.malformed_tool_streak = 0
|
|
3475
|
+
monitor.invalid_tool_call_streak = 0
|
|
3476
|
+
monitor.required_tool_miss_streak = 0
|
|
2745
3477
|
|
|
2746
3478
|
anthropic_resp = openai_to_anthropic_response(openai_resp, model)
|
|
2747
3479
|
|
|
@@ -2832,6 +3564,13 @@ async def context_status(request: Request):
|
|
|
2832
3564
|
"loop_warnings_emitted": monitor.loop_warnings_emitted,
|
|
2833
3565
|
"unexpected_end_turn_count": monitor.unexpected_end_turn_count,
|
|
2834
3566
|
"malformed_tool_streak": monitor.malformed_tool_streak,
|
|
3567
|
+
"invalid_tool_call_streak": monitor.invalid_tool_call_streak,
|
|
3568
|
+
"required_tool_miss_streak": monitor.required_tool_miss_streak,
|
|
3569
|
+
"guardrail_streak": monitor.guardrail_streak(),
|
|
3570
|
+
"arg_preflight_rejections": monitor.arg_preflight_rejections,
|
|
3571
|
+
"arg_preflight_repairs": monitor.arg_preflight_repairs,
|
|
3572
|
+
"forced_auto_cooldown_turns": monitor.forced_auto_cooldown_turns,
|
|
3573
|
+
"forced_dampener_triggers": monitor.forced_dampener_triggers,
|
|
2835
3574
|
"contamination_resets": monitor.contamination_resets,
|
|
2836
3575
|
"tool_call_history_len": len(monitor.tool_call_history),
|
|
2837
3576
|
"is_looping": monitor.detect_tool_loop(window=PROXY_LOOP_WINDOW)[0],
|