@miller-tech/uap 1.15.2 → 1.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -178,6 +178,14 @@ PROXY_MALFORMED_TOOL_STREAM_STRICT = os.environ.get(
178
178
  "off",
179
179
  "no",
180
180
  }
181
+ PROXY_TOOL_ARGS_PREFLIGHT = os.environ.get(
182
+ "PROXY_TOOL_ARGS_PREFLIGHT", "on"
183
+ ).lower() not in {
184
+ "0",
185
+ "false",
186
+ "off",
187
+ "no",
188
+ }
181
189
  PROXY_FORCE_NON_STREAM = os.environ.get(
182
190
  "PROXY_FORCE_NON_STREAM", "off"
183
191
  ).lower() not in {
@@ -186,6 +194,29 @@ PROXY_FORCE_NON_STREAM = os.environ.get(
186
194
  "off",
187
195
  "no",
188
196
  }
197
+ PROXY_FORCED_TOOL_DAMPENER = os.environ.get(
198
+ "PROXY_FORCED_TOOL_DAMPENER", "on"
199
+ ).lower() not in {
200
+ "0",
201
+ "false",
202
+ "off",
203
+ "no",
204
+ }
205
+ PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED = int(
206
+ os.environ.get("PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED", "4")
207
+ )
208
+ PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK = int(
209
+ os.environ.get("PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK", "1")
210
+ )
211
+ PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK = int(
212
+ os.environ.get("PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK", "2")
213
+ )
214
+ PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS = int(
215
+ os.environ.get("PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS", "2")
216
+ )
217
+ PROXY_FORCED_TOOL_DAMPENER_REJECTIONS = int(
218
+ os.environ.get("PROXY_FORCED_TOOL_DAMPENER_REJECTIONS", "2")
219
+ )
189
220
  PROXY_SESSION_CONTAMINATION_BREAKER = os.environ.get(
190
221
  "PROXY_SESSION_CONTAMINATION_BREAKER", "on"
191
222
  ).lower() not in {
@@ -200,6 +231,12 @@ PROXY_SESSION_CONTAMINATION_THRESHOLD = int(
200
231
  PROXY_SESSION_CONTAMINATION_KEEP_LAST = int(
201
232
  os.environ.get("PROXY_SESSION_CONTAMINATION_KEEP_LAST", "8")
202
233
  )
234
+ PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD = int(
235
+ os.environ.get("PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD", "8")
236
+ )
237
+ PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD = int(
238
+ os.environ.get("PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD", "2")
239
+ )
203
240
  PROXY_AGENTIC_SUPPLEMENT_MODE = (
204
241
  os.environ.get("PROXY_AGENTIC_SUPPLEMENT_MODE", "clean").strip().lower()
205
242
  )
@@ -257,7 +294,13 @@ class SessionMonitor:
257
294
  no_progress_streak: int = 0 # Forced tool turns without new tool_result
258
295
  unexpected_end_turn_count: int = 0 # end_turn without tool_use in active loop
259
296
  malformed_tool_streak: int = 0 # consecutive malformed pseudo tool payloads
297
+ invalid_tool_call_streak: int = 0 # consecutive invalid tool arg payloads
298
+ required_tool_miss_streak: int = 0 # required tool turns with no tool call
260
299
  contamination_resets: int = 0 # how many contamination resets were applied
300
+ forced_auto_cooldown_turns: int = 0 # temporary auto override turns remaining
301
+ forced_dampener_triggers: int = 0 # number of dampener activations
302
+ arg_preflight_rejections: int = 0 # rejected tool calls from arg preflight
303
+ arg_preflight_repairs: int = 0 # sanitized tool call args accepted
261
304
  last_seen_ts: float = 0.0
262
305
 
263
306
  def record_request(self, estimated_tokens: int):
@@ -394,6 +437,55 @@ class SessionMonitor:
394
437
 
395
438
  return False, 0
396
439
 
440
+ def guardrail_streak(self) -> int:
441
+ """Highest current streak among malformed/invalid tool outputs."""
442
+ return max(self.malformed_tool_streak, self.invalid_tool_call_streak)
443
+
444
+ def consume_forced_auto_turn(self) -> bool:
445
+ """Consume one dampener turn that temporarily sets tool_choice=auto."""
446
+ if self.forced_auto_cooldown_turns <= 0:
447
+ return False
448
+ self.forced_auto_cooldown_turns -= 1
449
+ return True
450
+
451
+ def maybe_activate_forced_tool_dampener(self, reason: str) -> bool:
452
+ """Temporarily release forced tool choice when quality collapses."""
453
+ if not PROXY_FORCED_TOOL_DAMPENER:
454
+ return False
455
+ if self.forced_auto_cooldown_turns > 0:
456
+ return False
457
+
458
+ min_forced = max(1, PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED)
459
+ if self.consecutive_forced_count < min_forced:
460
+ return False
461
+
462
+ bad_streak = self.guardrail_streak()
463
+ bad_threshold = max(1, PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK)
464
+ empty_threshold = max(1, PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK)
465
+ rejection_threshold = max(1, PROXY_FORCED_TOOL_DAMPENER_REJECTIONS)
466
+ rejection_pressure = self.arg_preflight_rejections >= rejection_threshold
467
+ if (
468
+ bad_streak < bad_threshold
469
+ and self.required_tool_miss_streak < empty_threshold
470
+ and not rejection_pressure
471
+ ):
472
+ return False
473
+
474
+ self.forced_auto_cooldown_turns = max(1, PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS)
475
+ self.forced_dampener_triggers += 1
476
+ if rejection_pressure:
477
+ self.arg_preflight_rejections = 0
478
+ logger.warning(
479
+ "FORCED-TOOL DAMPENER: activated reason=%s forced=%d bad_streak=%d required_miss=%d rejection_pressure=%s auto_turns=%d",
480
+ reason,
481
+ self.consecutive_forced_count,
482
+ bad_streak,
483
+ self.required_tool_miss_streak,
484
+ rejection_pressure,
485
+ self.forced_auto_cooldown_turns,
486
+ )
487
+ return True
488
+
397
489
  def should_release_tool_choice(self) -> bool:
398
490
  """Determine if tool_choice should be relaxed to 'auto' to break a loop.
399
491
 
@@ -784,14 +876,23 @@ async def lifespan(app: FastAPI):
784
876
  _resolve_prune_target_fraction() * 100,
785
877
  )
786
878
  logger.info(
787
- "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s tool_narrowing=%s thinking_off_on_tools=%s contamination_breaker=%s(%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d)",
879
+ "Guardrails: malformed=%s stream_strict=%s force_non_stream=%s args_preflight=%s tool_narrowing=%s thinking_off_on_tools=%s dampener=%s(%d/%d/%d/%d->%d) contamination_breaker=%s(%d forced=%d required_miss=%d) analysis_only_route=%s(min_tools=%d,max_msgs=%d)",
788
880
  PROXY_MALFORMED_TOOL_GUARDRAIL,
789
881
  PROXY_MALFORMED_TOOL_STREAM_STRICT,
790
882
  PROXY_FORCE_NON_STREAM,
883
+ PROXY_TOOL_ARGS_PREFLIGHT,
791
884
  PROXY_TOOL_NARROWING,
792
885
  PROXY_DISABLE_THINKING_ON_TOOL_TURNS,
886
+ PROXY_FORCED_TOOL_DAMPENER,
887
+ PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED,
888
+ PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK,
889
+ PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK,
890
+ PROXY_FORCED_TOOL_DAMPENER_REJECTIONS,
891
+ PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS,
793
892
  PROXY_SESSION_CONTAMINATION_BREAKER,
794
893
  PROXY_SESSION_CONTAMINATION_THRESHOLD,
894
+ PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD,
895
+ PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD,
795
896
  PROXY_ANALYSIS_ONLY_ROUTE,
796
897
  PROXY_ANALYSIS_ONLY_MIN_TOOLS,
797
898
  PROXY_ANALYSIS_ONLY_MAX_MESSAGES,
@@ -1324,8 +1425,16 @@ def build_openai_request(anthropic_body: dict, monitor: SessionMonitor) -> dict:
1324
1425
  _record_last_assistant_tool_calls(anthropic_body, monitor)
1325
1426
  last_user_has_tool_result = _last_user_has_tool_result(anthropic_body)
1326
1427
 
1327
- # Check if loop breaker should override tool_choice
1328
- if monitor.should_release_tool_choice():
1428
+ # Check if forced-tool dampener or loop breaker should override tool_choice
1429
+ if monitor.consume_forced_auto_turn():
1430
+ openai_body["tool_choice"] = "auto"
1431
+ monitor.consecutive_forced_count = 0
1432
+ monitor.no_progress_streak = 0
1433
+ logger.warning(
1434
+ "tool_choice set to 'auto' by FORCED-TOOL DAMPENER (remaining=%d)",
1435
+ monitor.forced_auto_cooldown_turns,
1436
+ )
1437
+ elif monitor.should_release_tool_choice():
1329
1438
  openai_body["tool_choice"] = "auto"
1330
1439
  monitor.consecutive_forced_count = 0
1331
1440
  monitor.no_progress_streak = 0
@@ -1664,6 +1773,509 @@ def _openai_has_valid_tool_calls(openai_resp: dict, anthropic_body: dict) -> boo
1664
1773
  )
1665
1774
 
1666
1775
 
1776
+ @dataclass
1777
+ class ToolResponseIssue:
1778
+ kind: str = ""
1779
+ reason: str = ""
1780
+ retry_hint: str = ""
1781
+
1782
+ def has_issue(self) -> bool:
1783
+ return bool(self.kind)
1784
+
1785
+
1786
+ _TOOL_ARG_MARKERS = (
1787
+ "</parameter",
1788
+ "<parameter",
1789
+ "<tool_call",
1790
+ "</tool_call",
1791
+ "<function=",
1792
+ "</think>",
1793
+ )
1794
+
1795
+
1796
+ def _iter_string_leaves(value):
1797
+ if isinstance(value, str):
1798
+ yield value
1799
+ elif isinstance(value, list):
1800
+ for item in value:
1801
+ yield from _iter_string_leaves(item)
1802
+ elif isinstance(value, dict):
1803
+ for item in value.values():
1804
+ yield from _iter_string_leaves(item)
1805
+
1806
+
1807
+ def _contains_tool_markup(value) -> bool:
1808
+ for text in _iter_string_leaves(value):
1809
+ lowered = text.lower()
1810
+ if any(marker in lowered for marker in _TOOL_ARG_MARKERS):
1811
+ return True
1812
+ return False
1813
+
1814
+
1815
+ def _strip_tool_markup_artifacts(text: str) -> str:
1816
+ cleaned = re.sub(r"</?parameter[^>]*>", "", text, flags=re.IGNORECASE)
1817
+ cleaned = re.sub(r"</?tool_call[^>]*>", "", cleaned, flags=re.IGNORECASE)
1818
+ cleaned = re.sub(r"</?think>", "", cleaned, flags=re.IGNORECASE)
1819
+ cleaned = re.sub(r"<function=[^>]*>", "", cleaned, flags=re.IGNORECASE)
1820
+ return cleaned.strip()
1821
+
1822
+
1823
+ def _sanitize_markup_value(value):
1824
+ if isinstance(value, str):
1825
+ cleaned = _strip_tool_markup_artifacts(value)
1826
+ return cleaned, cleaned != value
1827
+ if isinstance(value, list):
1828
+ changed = False
1829
+ cleaned_items = []
1830
+ for item in value:
1831
+ cleaned_item, item_changed = _sanitize_markup_value(item)
1832
+ cleaned_items.append(cleaned_item)
1833
+ changed = changed or item_changed
1834
+ return cleaned_items, changed
1835
+ if isinstance(value, dict):
1836
+ changed = False
1837
+ cleaned_obj = {}
1838
+ for key, item in value.items():
1839
+ cleaned_item, item_changed = _sanitize_markup_value(item)
1840
+ cleaned_obj[key] = cleaned_item
1841
+ changed = changed or item_changed
1842
+ return cleaned_obj, changed
1843
+ return value, False
1844
+
1845
+
1846
+ def _repair_tool_call_markup(openai_resp: dict) -> tuple[dict, int]:
1847
+ if not _openai_has_tool_calls(openai_resp):
1848
+ return openai_resp, 0
1849
+
1850
+ choice, message = _extract_openai_choice(openai_resp)
1851
+ tool_calls = message.get("tool_calls") or []
1852
+ if not tool_calls:
1853
+ return openai_resp, 0
1854
+
1855
+ repaired_tool_calls = []
1856
+ repaired_count = 0
1857
+
1858
+ for tool_call in tool_calls:
1859
+ fn = tool_call.get("function") if isinstance(tool_call, dict) else {}
1860
+ if not isinstance(fn, dict):
1861
+ fn = {}
1862
+ raw_args = fn.get("arguments", "{}")
1863
+
1864
+ if isinstance(raw_args, (dict, list)):
1865
+ parsed_args = raw_args
1866
+ parse_recovered = False
1867
+ else:
1868
+ try:
1869
+ parsed_args = json.loads(str(raw_args))
1870
+ parse_recovered = False
1871
+ except json.JSONDecodeError:
1872
+ cleaned_text = _strip_tool_markup_artifacts(str(raw_args))
1873
+ candidate = cleaned_text
1874
+ if "{" in candidate and "}" in candidate:
1875
+ candidate = candidate[
1876
+ candidate.find("{") : candidate.rfind("}") + 1
1877
+ ]
1878
+ try:
1879
+ parsed_args = json.loads(candidate)
1880
+ parse_recovered = True
1881
+ except json.JSONDecodeError:
1882
+ repaired_tool_calls.append(tool_call)
1883
+ continue
1884
+
1885
+ cleaned_args, changed = _sanitize_markup_value(parsed_args)
1886
+ if parse_recovered:
1887
+ changed = True
1888
+ if not changed:
1889
+ repaired_tool_calls.append(tool_call)
1890
+ continue
1891
+
1892
+ new_tool_call = dict(tool_call)
1893
+ new_fn = dict(fn)
1894
+ new_fn["arguments"] = json.dumps(cleaned_args, separators=(",", ":"))
1895
+ new_tool_call["function"] = new_fn
1896
+ repaired_tool_calls.append(new_tool_call)
1897
+ repaired_count += 1
1898
+
1899
+ if repaired_count == 0:
1900
+ return openai_resp, 0
1901
+
1902
+ repaired_response = dict(openai_resp)
1903
+ choices = list(openai_resp.get("choices") or [])
1904
+ if not choices:
1905
+ return openai_resp, 0
1906
+
1907
+ updated_choice = dict(choice)
1908
+ updated_message = dict(message)
1909
+ updated_message["tool_calls"] = repaired_tool_calls
1910
+ updated_choice["message"] = updated_message
1911
+ choices[0] = updated_choice
1912
+ repaired_response["choices"] = choices
1913
+ return repaired_response, repaired_count
1914
+
1915
+
1916
+ def _default_required_value(field_name: str, field_schema: dict):
1917
+ expected_type = field_schema.get("type") if isinstance(field_schema, dict) else None
1918
+ if isinstance(expected_type, list):
1919
+ expected_type = expected_type[0] if expected_type else "string"
1920
+
1921
+ if expected_type == "integer":
1922
+ return 0
1923
+ if expected_type == "number":
1924
+ return 0
1925
+ if expected_type == "boolean":
1926
+ return False
1927
+ if expected_type == "object":
1928
+ return {"value": "__uap_required__"}
1929
+ if expected_type == "array":
1930
+ return ["__uap_required__"]
1931
+
1932
+ key = (field_name or "").lower()
1933
+ if key in {"command", "cmd"}:
1934
+ return "pwd"
1935
+ if key == "cron":
1936
+ return "* * * * *"
1937
+ if key in {"pattern", "glob"}:
1938
+ return "*"
1939
+ if key == "subject":
1940
+ return "task"
1941
+ if key in {"path", "file", "filepath", "file_path"} or key.endswith("_path"):
1942
+ return "."
1943
+ return "__uap_required__"
1944
+
1945
+
1946
+ def _repair_required_tool_args(
1947
+ openai_resp: dict, anthropic_body: dict
1948
+ ) -> tuple[dict, int]:
1949
+ if not _openai_has_tool_calls(openai_resp):
1950
+ return openai_resp, 0
1951
+
1952
+ tools_by_name = _anthropic_tools_by_name(anthropic_body)
1953
+ if not tools_by_name:
1954
+ return openai_resp, 0
1955
+
1956
+ choice, message = _extract_openai_choice(openai_resp)
1957
+ tool_calls = message.get("tool_calls") or []
1958
+ if not tool_calls:
1959
+ return openai_resp, 0
1960
+
1961
+ repaired_tool_calls = []
1962
+ repaired_count = 0
1963
+
1964
+ for tool_call in tool_calls:
1965
+ fn = tool_call.get("function") if isinstance(tool_call, dict) else {}
1966
+ if not isinstance(fn, dict):
1967
+ fn = {}
1968
+ tool_name = fn.get("name", "")
1969
+ schema = tools_by_name.get(tool_name, {})
1970
+ required = schema.get("required", []) if isinstance(schema, dict) else []
1971
+ if not isinstance(required, list) or not required:
1972
+ repaired_tool_calls.append(tool_call)
1973
+ continue
1974
+
1975
+ properties = schema.get("properties", {}) if isinstance(schema, dict) else {}
1976
+ if not isinstance(properties, dict):
1977
+ properties = {}
1978
+
1979
+ raw_args = fn.get("arguments", "{}")
1980
+ if isinstance(raw_args, dict):
1981
+ parsed_args = dict(raw_args)
1982
+ parse_failed = False
1983
+ else:
1984
+ try:
1985
+ parsed_args = json.loads(str(raw_args))
1986
+ parse_failed = False
1987
+ except json.JSONDecodeError:
1988
+ parsed_args = {}
1989
+ parse_failed = True
1990
+
1991
+ if not isinstance(parsed_args, dict):
1992
+ parsed_args = {}
1993
+ parse_failed = True
1994
+
1995
+ changed = parse_failed
1996
+ for field in required:
1997
+ if not isinstance(field, str):
1998
+ continue
1999
+ current = parsed_args.get(field)
2000
+ if field not in parsed_args or _required_value_is_empty(current):
2001
+ field_schema = (
2002
+ properties.get(field, {})
2003
+ if isinstance(properties.get(field), dict)
2004
+ else {}
2005
+ )
2006
+ parsed_args[field] = _default_required_value(field, field_schema)
2007
+ changed = True
2008
+
2009
+ if not changed:
2010
+ repaired_tool_calls.append(tool_call)
2011
+ continue
2012
+
2013
+ new_tool_call = dict(tool_call)
2014
+ new_fn = dict(fn)
2015
+ new_fn["arguments"] = json.dumps(parsed_args, separators=(",", ":"))
2016
+ new_tool_call["function"] = new_fn
2017
+ repaired_tool_calls.append(new_tool_call)
2018
+ repaired_count += 1
2019
+
2020
+ if repaired_count == 0:
2021
+ return openai_resp, 0
2022
+
2023
+ repaired_response = dict(openai_resp)
2024
+ choices = list(openai_resp.get("choices") or [])
2025
+ if not choices:
2026
+ return openai_resp, 0
2027
+
2028
+ updated_choice = dict(choice)
2029
+ updated_message = dict(message)
2030
+ updated_message["tool_calls"] = repaired_tool_calls
2031
+ updated_choice["message"] = updated_message
2032
+ choices[0] = updated_choice
2033
+ repaired_response["choices"] = choices
2034
+ return repaired_response, repaired_count
2035
+
2036
+
2037
+ def _required_value_is_empty(value) -> bool:
2038
+ if value is None:
2039
+ return True
2040
+ if isinstance(value, str):
2041
+ return not value.strip()
2042
+ if isinstance(value, (list, dict)):
2043
+ return len(value) == 0
2044
+ return False
2045
+
2046
+
2047
+ def _matches_json_schema_type(value, expected_type) -> bool:
2048
+ if not expected_type:
2049
+ return True
2050
+
2051
+ if isinstance(expected_type, list):
2052
+ return any(
2053
+ _matches_json_schema_type(value, candidate) for candidate in expected_type
2054
+ )
2055
+
2056
+ if expected_type == "string":
2057
+ return isinstance(value, str)
2058
+ if expected_type == "integer":
2059
+ return isinstance(value, int) and not isinstance(value, bool)
2060
+ if expected_type == "number":
2061
+ return (isinstance(value, int) and not isinstance(value, bool)) or isinstance(
2062
+ value, float
2063
+ )
2064
+ if expected_type == "boolean":
2065
+ return isinstance(value, bool)
2066
+ if expected_type == "object":
2067
+ return isinstance(value, dict)
2068
+ if expected_type == "array":
2069
+ return isinstance(value, list)
2070
+ return True
2071
+
2072
+
2073
+ def _anthropic_tools_by_name(anthropic_body: dict) -> dict[str, dict]:
2074
+ tool_map: dict[str, dict] = {}
2075
+ for tool in anthropic_body.get("tools", []) or []:
2076
+ if not isinstance(tool, dict):
2077
+ continue
2078
+ name = tool.get("name", "")
2079
+ if not name:
2080
+ continue
2081
+ schema = tool.get("input_schema")
2082
+ if not isinstance(schema, dict):
2083
+ schema = (
2084
+ tool.get("parameters")
2085
+ if isinstance(tool.get("parameters"), dict)
2086
+ else {}
2087
+ )
2088
+ tool_map[name] = schema or {}
2089
+ return tool_map
2090
+
2091
+
2092
+ def _validate_tool_call_arguments(
2093
+ tool_name: str,
2094
+ raw_arguments,
2095
+ tool_schema: dict,
2096
+ allowed_tools: set[str],
2097
+ ) -> ToolResponseIssue:
2098
+ if allowed_tools and tool_name not in allowed_tools:
2099
+ return ToolResponseIssue(
2100
+ kind="invalid_tool_args",
2101
+ reason=f"unknown tool '{tool_name}'",
2102
+ retry_hint="Use exactly one tool from the provided tool list.",
2103
+ )
2104
+
2105
+ if isinstance(raw_arguments, (dict, list)):
2106
+ arg_text = json.dumps(raw_arguments)
2107
+ elif raw_arguments is None:
2108
+ arg_text = "{}"
2109
+ else:
2110
+ arg_text = str(raw_arguments)
2111
+
2112
+ try:
2113
+ parsed = json.loads(arg_text)
2114
+ except json.JSONDecodeError as exc:
2115
+ return ToolResponseIssue(
2116
+ kind="invalid_tool_args",
2117
+ reason=f"invalid JSON arguments for '{tool_name}': {exc.msg}",
2118
+ retry_hint=(
2119
+ f"Emit exactly one `{tool_name}` tool call with `arguments` as a strict JSON object. "
2120
+ "Do not include prose before or after JSON."
2121
+ ),
2122
+ )
2123
+
2124
+ if not isinstance(parsed, dict):
2125
+ return ToolResponseIssue(
2126
+ kind="invalid_tool_args",
2127
+ reason=f"arguments for '{tool_name}' must be a JSON object",
2128
+ retry_hint=(
2129
+ f"Emit exactly one `{tool_name}` tool call with `arguments` set to a JSON object (not a string or list)."
2130
+ ),
2131
+ )
2132
+
2133
+ if _contains_tool_markup(parsed):
2134
+ return ToolResponseIssue(
2135
+ kind="invalid_tool_args",
2136
+ reason=f"arguments for '{tool_name}' contain malformed markup fragments",
2137
+ retry_hint=(
2138
+ f"Remove tag fragments from `{tool_name}` arguments and emit only plain JSON key/value pairs."
2139
+ ),
2140
+ )
2141
+
2142
+ if not isinstance(tool_schema, dict):
2143
+ tool_schema = {}
2144
+
2145
+ required = tool_schema.get("required", [])
2146
+ if not isinstance(required, list):
2147
+ required = []
2148
+
2149
+ properties = tool_schema.get("properties", {})
2150
+ if not isinstance(properties, dict):
2151
+ properties = {}
2152
+
2153
+ missing: list[str] = []
2154
+ empty: list[str] = []
2155
+ wrong_type: list[str] = []
2156
+
2157
+ for field in required:
2158
+ if not isinstance(field, str):
2159
+ continue
2160
+
2161
+ if field not in parsed:
2162
+ missing.append(field)
2163
+ continue
2164
+
2165
+ value = parsed.get(field)
2166
+ if _required_value_is_empty(value):
2167
+ empty.append(field)
2168
+ continue
2169
+
2170
+ schema = (
2171
+ properties.get(field, {}) if isinstance(properties.get(field), dict) else {}
2172
+ )
2173
+ expected_type = schema.get("type")
2174
+ if expected_type and not _matches_json_schema_type(value, expected_type):
2175
+ wrong_type.append(field)
2176
+ continue
2177
+
2178
+ min_length = schema.get("minLength")
2179
+ if (
2180
+ isinstance(min_length, int)
2181
+ and isinstance(value, str)
2182
+ and len(value.strip()) < min_length
2183
+ ):
2184
+ empty.append(field)
2185
+ continue
2186
+
2187
+ min_items = schema.get("minItems")
2188
+ if (
2189
+ isinstance(min_items, int)
2190
+ and isinstance(value, list)
2191
+ and len(value) < min_items
2192
+ ):
2193
+ empty.append(field)
2194
+
2195
+ if missing or empty or wrong_type:
2196
+ details = []
2197
+ if missing:
2198
+ details.append(f"missing: {', '.join(missing)}")
2199
+ if empty:
2200
+ details.append(f"empty: {', '.join(empty)}")
2201
+ if wrong_type:
2202
+ details.append(f"type mismatch: {', '.join(wrong_type)}")
2203
+ required_fields = ", ".join(str(f) for f in required if isinstance(f, str))
2204
+ required_hint = (
2205
+ f"Required fields must be non-empty: {required_fields}. "
2206
+ if required_fields
2207
+ else ""
2208
+ )
2209
+ return ToolResponseIssue(
2210
+ kind="invalid_tool_args",
2211
+ reason=f"invalid arguments for '{tool_name}' ({'; '.join(details)})",
2212
+ retry_hint=(
2213
+ f"Emit exactly one `{tool_name}` tool call with strict JSON arguments. "
2214
+ f"{required_hint}Do not include protocol tags or commentary."
2215
+ ).strip(),
2216
+ )
2217
+
2218
+ return ToolResponseIssue()
2219
+
2220
+
2221
+ def _classify_tool_response_issue(
2222
+ openai_resp: dict,
2223
+ anthropic_body: dict,
2224
+ required_tool_choice: bool = False,
2225
+ ) -> ToolResponseIssue:
2226
+ if "tools" not in anthropic_body:
2227
+ return ToolResponseIssue()
2228
+
2229
+ if _is_malformed_tool_response(openai_resp, anthropic_body):
2230
+ return ToolResponseIssue(
2231
+ kind="malformed_payload",
2232
+ reason="malformed pseudo tool payload detected in assistant text",
2233
+ retry_hint=(
2234
+ "Return exactly one valid tool call with strict JSON arguments. "
2235
+ "Do not output raw protocol tags, schema fragments, or apologies about formatting."
2236
+ ),
2237
+ )
2238
+
2239
+ has_tool_calls = _openai_has_tool_calls(openai_resp)
2240
+ if not has_tool_calls:
2241
+ if required_tool_choice:
2242
+ text = _openai_message_text(openai_resp).strip()
2243
+ if not text or len(text) <= 48:
2244
+ return ToolResponseIssue(
2245
+ kind="required_tool_miss",
2246
+ reason="required tool turn returned no tool calls",
2247
+ retry_hint=(
2248
+ "A tool call is mandatory for this turn. Emit exactly one valid tool call now "
2249
+ "with a strict JSON object in `arguments`."
2250
+ ),
2251
+ )
2252
+ return ToolResponseIssue()
2253
+
2254
+ if not PROXY_TOOL_ARGS_PREFLIGHT:
2255
+ return ToolResponseIssue()
2256
+
2257
+ _, message = _extract_openai_choice(openai_resp)
2258
+ tool_calls = message.get("tool_calls") or []
2259
+ tools_by_name = _anthropic_tools_by_name(anthropic_body)
2260
+ allowed_tools = set(tools_by_name.keys())
2261
+
2262
+ for tc in tool_calls:
2263
+ fn = tc.get("function") if isinstance(tc, dict) else {}
2264
+ if not isinstance(fn, dict):
2265
+ fn = {}
2266
+ tool_name = fn.get("name", "")
2267
+ issue = _validate_tool_call_arguments(
2268
+ tool_name,
2269
+ fn.get("arguments", "{}"),
2270
+ tools_by_name.get(tool_name, {}),
2271
+ allowed_tools,
2272
+ )
2273
+ if issue.has_issue():
2274
+ return issue
2275
+
2276
+ return ToolResponseIssue()
2277
+
2278
+
1667
2279
  def _looks_malformed_tool_payload(text: str) -> bool:
1668
2280
  if not text:
1669
2281
  return False
@@ -1724,7 +2336,9 @@ def _is_malformed_tool_response(openai_resp: dict, anthropic_body: dict) -> bool
1724
2336
  return _looks_malformed_tool_payload(_openai_message_text(openai_resp))
1725
2337
 
1726
2338
 
1727
- def _build_malformed_retry_body(openai_body: dict, anthropic_body: dict) -> dict:
2339
+ def _build_malformed_retry_body(
2340
+ openai_body: dict, anthropic_body: dict, retry_hint: str = ""
2341
+ ) -> dict:
1728
2342
  retry_body = dict(openai_body)
1729
2343
  retry_body["stream"] = False
1730
2344
  retry_body["tool_choice"] = "required"
@@ -1759,6 +2373,16 @@ def _build_malformed_retry_body(openai_body: dict, anthropic_body: dict) -> dict
1759
2373
  if PROXY_DISABLE_THINKING_ON_TOOL_TURNS:
1760
2374
  retry_body["enable_thinking"] = False
1761
2375
 
2376
+ if retry_hint:
2377
+ repair_prompt = (
2378
+ "[TOOL CALL REPAIR]\n"
2379
+ f"{retry_hint}\n"
2380
+ "Return exactly one valid tool call object and no explanatory prose."
2381
+ )
2382
+ retry_messages = list(retry_body.get("messages", []))
2383
+ retry_messages.append({"role": "system", "content": repair_prompt})
2384
+ retry_body["messages"] = retry_messages
2385
+
1762
2386
  return retry_body
1763
2387
 
1764
2388
 
@@ -1847,27 +2471,68 @@ async def _apply_malformed_tool_guardrail(
1847
2471
  if not PROXY_MALFORMED_TOOL_GUARDRAIL:
1848
2472
  return openai_resp
1849
2473
 
1850
- if not _is_malformed_tool_response(openai_resp, anthropic_body):
1851
- if _openai_has_valid_tool_calls(openai_resp, anthropic_body):
2474
+ working_resp = openai_resp
2475
+ repair_count = 0
2476
+ if PROXY_TOOL_ARGS_PREFLIGHT and _openai_has_tool_calls(openai_resp):
2477
+ working_resp, markup_repairs = _repair_tool_call_markup(openai_resp)
2478
+ working_resp, required_repairs = _repair_required_tool_args(
2479
+ working_resp, anthropic_body
2480
+ )
2481
+ repair_count = markup_repairs + required_repairs
2482
+
2483
+ required_tool_choice = openai_body.get("tool_choice") == "required"
2484
+ has_tool_calls = _openai_has_tool_calls(working_resp)
2485
+ if required_tool_choice and not has_tool_calls:
2486
+ monitor.required_tool_miss_streak += 1
2487
+
2488
+ issue = _classify_tool_response_issue(
2489
+ working_resp,
2490
+ anthropic_body,
2491
+ required_tool_choice=required_tool_choice,
2492
+ )
2493
+ if not issue.has_issue():
2494
+ if required_tool_choice and not has_tool_calls:
2495
+ monitor.maybe_activate_forced_tool_dampener("required_tool_miss")
2496
+ if has_tool_calls:
1852
2497
  monitor.malformed_tool_streak = 0
1853
- return openai_resp
2498
+ monitor.invalid_tool_call_streak = 0
2499
+ monitor.required_tool_miss_streak = 0
2500
+ if repair_count > 0:
2501
+ monitor.arg_preflight_repairs += repair_count
2502
+ logger.info(
2503
+ "TOOL ARG REPAIR: session=%s repaired=%d source=initial",
2504
+ session_id,
2505
+ repair_count,
2506
+ )
2507
+ return working_resp
1854
2508
 
1855
- monitor.malformed_tool_streak += 1
1856
- invalid_reason = _invalid_tool_call_reason(openai_resp, anthropic_body)
1857
- if invalid_reason:
1858
- excerpt = invalid_reason[:220]
1859
- else:
1860
- excerpt = _openai_message_text(openai_resp)[:220].replace("\n", " ")
2509
+ if issue.kind == "malformed_payload":
2510
+ monitor.malformed_tool_streak += 1
2511
+ elif issue.kind == "invalid_tool_args":
2512
+ monitor.invalid_tool_call_streak += 1
2513
+ monitor.arg_preflight_rejections += 1
2514
+
2515
+ monitor.maybe_activate_forced_tool_dampener(issue.kind)
2516
+ excerpt = _openai_message_text(working_resp)[:220].replace("\n", " ")
1861
2517
  logger.warning(
1862
- "MALFORMED TOOL PAYLOAD: session=%s streak=%d excerpt=%.220s",
2518
+ "TOOL RESPONSE ISSUE: session=%s kind=%s reason=%s malformed=%d invalid=%d required_miss=%d excerpt=%.220s",
1863
2519
  session_id,
2520
+ issue.kind,
2521
+ issue.reason,
1864
2522
  monitor.malformed_tool_streak,
2523
+ monitor.invalid_tool_call_streak,
2524
+ monitor.required_tool_miss_streak,
1865
2525
  excerpt,
1866
2526
  )
1867
2527
 
1868
2528
  attempts = max(0, PROXY_MALFORMED_TOOL_RETRY_MAX)
2529
+ current_issue = issue
1869
2530
  for attempt in range(attempts):
1870
- retry_body = _build_malformed_retry_body(openai_body, anthropic_body)
2531
+ retry_body = _build_malformed_retry_body(
2532
+ openai_body,
2533
+ anthropic_body,
2534
+ retry_hint=current_issue.retry_hint,
2535
+ )
1871
2536
  retry_resp = await client.post(
1872
2537
  f"{LLAMA_CPP_BASE}/chat/completions",
1873
2538
  json=retry_body,
@@ -1883,40 +2548,71 @@ async def _apply_malformed_tool_guardrail(
1883
2548
  continue
1884
2549
 
1885
2550
  retry_json = retry_resp.json()
1886
- if _openai_has_valid_tool_calls(retry_json, anthropic_body):
1887
- monitor.malformed_tool_streak = 0
1888
- logger.info(
1889
- "MALFORMED RETRY success: produced tool_use (attempt %d/%d)",
1890
- attempt + 1,
1891
- attempts,
2551
+ retry_working = retry_json
2552
+ retry_repairs = 0
2553
+ if PROXY_TOOL_ARGS_PREFLIGHT and _openai_has_tool_calls(retry_json):
2554
+ retry_working, retry_markup_repairs = _repair_tool_call_markup(retry_json)
2555
+ retry_working, retry_required_repairs = _repair_required_tool_args(
2556
+ retry_working, anthropic_body
1892
2557
  )
1893
- return retry_json
2558
+ retry_repairs = retry_markup_repairs + retry_required_repairs
1894
2559
 
1895
- retry_invalid_reason = _invalid_tool_call_reason(retry_json, anthropic_body)
1896
- if retry_invalid_reason:
1897
- logger.warning(
1898
- "MALFORMED RETRY invalid tool_call payload (attempt %d/%d): %s",
1899
- attempt + 1,
1900
- attempts,
1901
- retry_invalid_reason,
1902
- )
2560
+ retry_has_tool_calls = _openai_has_tool_calls(retry_working)
2561
+ retry_required = retry_body.get("tool_choice") == "required"
2562
+ if retry_required and not retry_has_tool_calls:
2563
+ monitor.required_tool_miss_streak += 1
1903
2564
 
1904
- if not _is_malformed_tool_response(retry_json, anthropic_body):
2565
+ retry_issue = _classify_tool_response_issue(
2566
+ retry_working,
2567
+ anthropic_body,
2568
+ required_tool_choice=retry_required,
2569
+ )
2570
+
2571
+ if not retry_issue.has_issue():
1905
2572
  monitor.malformed_tool_streak = 0
2573
+ monitor.invalid_tool_call_streak = 0
2574
+ monitor.required_tool_miss_streak = 0
1906
2575
  logger.info(
1907
- "MALFORMED RETRY produced clean text response (attempt %d/%d)",
2576
+ "TOOL RESPONSE RETRY success: kind=%s attempt=%d/%d",
2577
+ current_issue.kind,
1908
2578
  attempt + 1,
1909
2579
  attempts,
1910
2580
  )
1911
- return retry_json
2581
+ if retry_repairs > 0:
2582
+ monitor.arg_preflight_repairs += retry_repairs
2583
+ logger.info(
2584
+ "TOOL ARG REPAIR: session=%s repaired=%d source=retry",
2585
+ session_id,
2586
+ retry_repairs,
2587
+ )
2588
+ return retry_working
1912
2589
 
1913
- monitor.malformed_tool_streak += 1
2590
+ if retry_issue.kind == "malformed_payload":
2591
+ monitor.malformed_tool_streak += 1
2592
+ elif retry_issue.kind == "invalid_tool_args":
2593
+ monitor.invalid_tool_call_streak += 1
2594
+ monitor.arg_preflight_rejections += 1
2595
+
2596
+ monitor.maybe_activate_forced_tool_dampener(retry_issue.kind)
2597
+ logger.warning(
2598
+ "TOOL RESPONSE RETRY invalid: session=%s attempt=%d/%d kind=%s reason=%s",
2599
+ session_id,
2600
+ attempt + 1,
2601
+ attempts,
2602
+ retry_issue.kind,
2603
+ retry_issue.reason,
2604
+ )
2605
+ current_issue = retry_issue
1914
2606
 
1915
2607
  logger.error(
1916
- "MALFORMED TOOL PAYLOAD persisted after retries (session=%s); returning clean guardrail response",
2608
+ "TOOL RESPONSE issue persisted after retries (session=%s kind=%s malformed=%d invalid=%d required_miss=%d); returning clean guardrail response",
1917
2609
  session_id,
2610
+ current_issue.kind or issue.kind,
2611
+ monitor.malformed_tool_streak,
2612
+ monitor.invalid_tool_call_streak,
2613
+ monitor.required_tool_miss_streak,
1918
2614
  )
1919
- return _build_clean_guardrail_openai_response(openai_resp)
2615
+ return _build_clean_guardrail_openai_response(working_resp)
1920
2616
 
1921
2617
 
1922
2618
  def _maybe_apply_session_contamination_breaker(
@@ -1926,13 +2622,28 @@ def _maybe_apply_session_contamination_breaker(
1926
2622
  return anthropic_body
1927
2623
 
1928
2624
  threshold = max(1, PROXY_SESSION_CONTAMINATION_THRESHOLD)
1929
- if monitor.malformed_tool_streak < threshold:
2625
+ forced_threshold = max(1, PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD)
2626
+ required_miss_threshold = max(
2627
+ 1, PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD
2628
+ )
2629
+ bad_streak = monitor.guardrail_streak()
2630
+ should_reset = (
2631
+ bad_streak >= threshold
2632
+ or (
2633
+ bad_streak >= max(1, threshold - 1)
2634
+ and monitor.consecutive_forced_count >= forced_threshold
2635
+ )
2636
+ or monitor.required_tool_miss_streak >= required_miss_threshold
2637
+ )
2638
+ if not should_reset:
1930
2639
  return anthropic_body
1931
2640
 
1932
2641
  messages = anthropic_body.get("messages", [])
1933
2642
  keep_last = max(2, PROXY_SESSION_CONTAMINATION_KEEP_LAST)
1934
2643
  if len(messages) <= keep_last + 1:
1935
2644
  monitor.malformed_tool_streak = 0
2645
+ monitor.invalid_tool_call_streak = 0
2646
+ monitor.required_tool_miss_streak = 0
1936
2647
  return anthropic_body
1937
2648
 
1938
2649
  head = messages[:1]
@@ -1940,22 +2651,30 @@ def _maybe_apply_session_contamination_breaker(
1940
2651
  reset_marker = {
1941
2652
  "role": "user",
1942
2653
  "content": (
1943
- "[SESSION RESET: previous turns contained malformed tool-call formatting "
1944
- "artifacts. Continue from the recent context below and emit valid tool calls only.]"
2654
+ "[SESSION RESET: tool-call quality degraded in earlier turns. "
2655
+ "Continue from the recent context and emit valid tool calls with strict JSON arguments only.]"
1945
2656
  ),
1946
2657
  }
1947
2658
 
1948
2659
  updated_body = dict(anthropic_body)
1949
2660
  updated_body["messages"] = head + [reset_marker] + tail
1950
2661
 
2662
+ forced_before = monitor.consecutive_forced_count
2663
+ required_miss_before = monitor.required_tool_miss_streak
1951
2664
  monitor.contamination_resets += 1
1952
2665
  monitor.malformed_tool_streak = 0
2666
+ monitor.invalid_tool_call_streak = 0
2667
+ monitor.required_tool_miss_streak = 0
1953
2668
  monitor.no_progress_streak = 0
1954
2669
  monitor.consecutive_forced_count = 0
2670
+ monitor.forced_auto_cooldown_turns = 0
1955
2671
  logger.warning(
1956
- "SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages",
2672
+ "SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages (bad_streak=%d forced=%d required_miss=%d)",
1957
2673
  session_id,
1958
2674
  len(updated_body["messages"]),
2675
+ bad_streak,
2676
+ forced_before,
2677
+ required_miss_before,
1959
2678
  )
1960
2679
 
1961
2680
  return updated_body
@@ -2322,8 +3041,17 @@ async def stream_anthropic_response(
2322
3041
  ]
2323
3042
  }
2324
3043
 
2325
- if _is_malformed_tool_response(synthetic_openai_resp, anthropic_body):
3044
+ stream_issue = _classify_tool_response_issue(
3045
+ synthetic_openai_resp,
3046
+ anthropic_body,
3047
+ required_tool_choice=False,
3048
+ )
3049
+
3050
+ if stream_issue.kind == "malformed_payload":
2326
3051
  monitor.malformed_tool_streak += 1
3052
+ elif stream_issue.kind == "invalid_tool_args":
3053
+ monitor.invalid_tool_call_streak += 1
3054
+ monitor.arg_preflight_rejections += 1
2327
3055
  elif (
2328
3056
  "tools" in anthropic_body
2329
3057
  and not tool_calls_by_index
@@ -2335,6 +3063,8 @@ async def stream_anthropic_response(
2335
3063
  monitor.malformed_tool_streak += 1
2336
3064
  elif tool_calls_by_index:
2337
3065
  monitor.malformed_tool_streak = 0
3066
+ monitor.invalid_tool_call_streak = 0
3067
+ monitor.required_tool_miss_streak = 0
2338
3068
 
2339
3069
  if _is_unexpected_end_turn(synthetic_openai_resp, anthropic_body):
2340
3070
  monitor.unexpected_end_turn_count += 1
@@ -2742,6 +3472,8 @@ async def messages(request: Request):
2742
3472
  monitor.malformed_tool_streak += 1
2743
3473
  elif _openai_has_tool_calls(openai_resp):
2744
3474
  monitor.malformed_tool_streak = 0
3475
+ monitor.invalid_tool_call_streak = 0
3476
+ monitor.required_tool_miss_streak = 0
2745
3477
 
2746
3478
  anthropic_resp = openai_to_anthropic_response(openai_resp, model)
2747
3479
 
@@ -2832,6 +3564,13 @@ async def context_status(request: Request):
2832
3564
  "loop_warnings_emitted": monitor.loop_warnings_emitted,
2833
3565
  "unexpected_end_turn_count": monitor.unexpected_end_turn_count,
2834
3566
  "malformed_tool_streak": monitor.malformed_tool_streak,
3567
+ "invalid_tool_call_streak": monitor.invalid_tool_call_streak,
3568
+ "required_tool_miss_streak": monitor.required_tool_miss_streak,
3569
+ "guardrail_streak": monitor.guardrail_streak(),
3570
+ "arg_preflight_rejections": monitor.arg_preflight_rejections,
3571
+ "arg_preflight_repairs": monitor.arg_preflight_repairs,
3572
+ "forced_auto_cooldown_turns": monitor.forced_auto_cooldown_turns,
3573
+ "forced_dampener_triggers": monitor.forced_dampener_triggers,
2835
3574
  "contamination_resets": monitor.contamination_resets,
2836
3575
  "tool_call_history_len": len(monitor.tool_call_history),
2837
3576
  "is_looping": monitor.detect_tool_loop(window=PROXY_LOOP_WINDOW)[0],