openhands-sdk 1.5.0__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. openhands/sdk/__init__.py +9 -1
  2. openhands/sdk/agent/agent.py +35 -12
  3. openhands/sdk/agent/base.py +53 -7
  4. openhands/sdk/agent/prompts/model_specific/anthropic_claude.j2 +3 -0
  5. openhands/sdk/agent/prompts/model_specific/google_gemini.j2 +1 -0
  6. openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5-codex.j2 +2 -0
  7. openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5.j2 +3 -0
  8. openhands/sdk/agent/prompts/self_documentation.j2 +15 -0
  9. openhands/sdk/agent/prompts/system_prompt.j2 +29 -1
  10. openhands/sdk/agent/utils.py +18 -4
  11. openhands/sdk/context/__init__.py +2 -0
  12. openhands/sdk/context/agent_context.py +42 -10
  13. openhands/sdk/context/condenser/base.py +11 -6
  14. openhands/sdk/context/condenser/llm_summarizing_condenser.py +169 -20
  15. openhands/sdk/context/condenser/no_op_condenser.py +2 -1
  16. openhands/sdk/context/condenser/pipeline_condenser.py +10 -9
  17. openhands/sdk/context/condenser/utils.py +149 -0
  18. openhands/sdk/context/prompts/prompt.py +40 -2
  19. openhands/sdk/context/prompts/templates/system_message_suffix.j2 +3 -3
  20. openhands/sdk/context/skills/__init__.py +2 -0
  21. openhands/sdk/context/skills/skill.py +152 -1
  22. openhands/sdk/context/view.py +287 -27
  23. openhands/sdk/conversation/base.py +17 -0
  24. openhands/sdk/conversation/conversation.py +19 -0
  25. openhands/sdk/conversation/exceptions.py +29 -4
  26. openhands/sdk/conversation/impl/local_conversation.py +126 -9
  27. openhands/sdk/conversation/impl/remote_conversation.py +152 -3
  28. openhands/sdk/conversation/state.py +42 -1
  29. openhands/sdk/conversation/stuck_detector.py +81 -45
  30. openhands/sdk/conversation/types.py +30 -0
  31. openhands/sdk/event/llm_convertible/system.py +16 -20
  32. openhands/sdk/hooks/__init__.py +30 -0
  33. openhands/sdk/hooks/config.py +180 -0
  34. openhands/sdk/hooks/conversation_hooks.py +227 -0
  35. openhands/sdk/hooks/executor.py +155 -0
  36. openhands/sdk/hooks/manager.py +170 -0
  37. openhands/sdk/hooks/types.py +40 -0
  38. openhands/sdk/io/cache.py +85 -0
  39. openhands/sdk/io/local.py +39 -2
  40. openhands/sdk/llm/llm.py +3 -2
  41. openhands/sdk/llm/message.py +4 -3
  42. openhands/sdk/llm/mixins/fn_call_converter.py +61 -16
  43. openhands/sdk/llm/mixins/non_native_fc.py +5 -1
  44. openhands/sdk/llm/utils/model_features.py +64 -24
  45. openhands/sdk/llm/utils/model_prompt_spec.py +98 -0
  46. openhands/sdk/llm/utils/verified_models.py +6 -4
  47. openhands/sdk/logger/logger.py +1 -1
  48. openhands/sdk/tool/schema.py +10 -0
  49. openhands/sdk/tool/tool.py +2 -2
  50. openhands/sdk/utils/async_executor.py +76 -67
  51. openhands/sdk/utils/models.py +1 -1
  52. openhands/sdk/utils/paging.py +63 -0
  53. {openhands_sdk-1.5.0.dist-info → openhands_sdk-1.7.2.dist-info}/METADATA +3 -3
  54. {openhands_sdk-1.5.0.dist-info → openhands_sdk-1.7.2.dist-info}/RECORD +56 -41
  55. {openhands_sdk-1.5.0.dist-info → openhands_sdk-1.7.2.dist-info}/WHEEL +0 -0
  56. {openhands_sdk-1.5.0.dist-info → openhands_sdk-1.7.2.dist-info}/top_level.txt +0 -0
openhands/sdk/io/local.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  import shutil
3
3
 
4
+ from openhands.sdk.io.cache import MemoryLRUCache
4
5
  from openhands.sdk.logger import get_logger
5
6
  from openhands.sdk.observability.laminar import observe
6
7
 
@@ -12,13 +13,31 @@ logger = get_logger(__name__)
12
13
 
13
14
  class LocalFileStore(FileStore):
14
15
  root: str
16
+ cache: MemoryLRUCache
15
17
 
16
- def __init__(self, root: str):
18
+ def __init__(
19
+ self,
20
+ root: str,
21
+ cache_limit_size: int = 500,
22
+ cache_memory_size: int = 20 * 1024 * 1024,
23
+ ) -> None:
24
+ """Initialize a LocalFileStore with caching.
25
+
26
+ Args:
27
+ root: Root directory for file storage.
28
+ cache_limit_size: Maximum number of cached entries (default: 500).
29
+ cache_memory_size: Maximum cache memory in bytes (default: 20MB).
30
+
31
+ Note:
32
+ The cache assumes exclusive access to files. External modifications
33
+ to files will not be detected and may result in stale cache reads.
34
+ """
17
35
  if root.startswith("~"):
18
36
  root = os.path.expanduser(root)
19
37
  root = os.path.abspath(os.path.normpath(root))
20
38
  self.root = root
21
39
  os.makedirs(self.root, exist_ok=True)
40
+ self.cache = MemoryLRUCache(cache_memory_size, cache_limit_size)
22
41
 
23
42
  def get_full_path(self, path: str) -> str:
24
43
  # strip leading slash to keep relative under root
@@ -32,6 +51,7 @@ class LocalFileStore(FileStore):
32
51
  # ensure sandboxing
33
52
  if os.path.commonpath([self.root, full]) != self.root:
34
53
  raise ValueError(f"path escapes filestore root: {path}")
54
+
35
55
  return full
36
56
 
37
57
  @observe(name="LocalFileStore.write", span_type="TOOL")
@@ -41,14 +61,27 @@ class LocalFileStore(FileStore):
41
61
  if isinstance(contents, str):
42
62
  with open(full_path, "w", encoding="utf-8") as f:
43
63
  f.write(contents)
64
+ self.cache[full_path] = contents
44
65
  else:
45
66
  with open(full_path, "wb") as f:
46
67
  f.write(contents)
68
+ # Don't cache binary content - LocalFileStore is meant for JSON data
69
+ # If binary data is written and then read, it will error on read
47
70
 
48
71
  def read(self, path: str) -> str:
49
72
  full_path = self.get_full_path(path)
73
+
74
+ if full_path in self.cache:
75
+ return self.cache[full_path]
76
+
77
+ if not os.path.exists(full_path):
78
+ raise FileNotFoundError(path)
79
+
50
80
  with open(full_path, encoding="utf-8") as f:
51
- return f.read()
81
+ result = f.read()
82
+
83
+ self.cache[full_path] = result
84
+ return result
52
85
 
53
86
  @observe(name="LocalFileStore.list", span_type="TOOL")
54
87
  def list(self, path: str) -> list[str]:
@@ -72,11 +105,15 @@ class LocalFileStore(FileStore):
72
105
  if not os.path.exists(full_path):
73
106
  logger.debug(f"Local path does not exist: {full_path}")
74
107
  return
108
+
75
109
  if os.path.isfile(full_path):
76
110
  os.remove(full_path)
111
+ del self.cache[full_path]
77
112
  logger.debug(f"Removed local file: {full_path}")
78
113
  elif os.path.isdir(full_path):
79
114
  shutil.rmtree(full_path)
115
+ self.cache.clear()
80
116
  logger.debug(f"Removed local directory: {full_path}")
117
+
81
118
  except Exception as e:
82
119
  logger.error(f"Error clearing local file store: {str(e)}")
openhands/sdk/llm/llm.py CHANGED
@@ -247,10 +247,11 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
247
247
  "like HuggingFace and Groq."
248
248
  ),
249
249
  )
250
- reasoning_effort: Literal["low", "medium", "high", "none"] | None = Field(
250
+ reasoning_effort: Literal["low", "medium", "high", "xhigh", "none"] | None = Field(
251
251
  default="high",
252
252
  description="The effort to put into reasoning. "
253
- "This is a string that can be one of 'low', 'medium', 'high', or 'none'. "
253
+ "This is a string that can be one of 'low', 'medium', 'high', 'xhigh', "
254
+ "or 'none'. "
254
255
  "Can apply to all reasoning models.",
255
256
  )
256
257
  reasoning_summary: Literal["auto", "concise", "detailed"] | None = Field(
@@ -120,8 +120,8 @@ class ThinkingBlock(BaseModel):
120
120
 
121
121
  type: Literal["thinking"] = "thinking"
122
122
  thinking: str = Field(..., description="The thinking content")
123
- signature: str = Field(
124
- ..., description="Cryptographic signature for the thinking block"
123
+ signature: str | None = Field(
124
+ default=None, description="Cryptographic signature for the thinking block"
125
125
  )
126
126
 
127
127
 
@@ -169,11 +169,12 @@ class TextContent(BaseContent):
169
169
  model_config: ClassVar[ConfigDict] = ConfigDict(
170
170
  extra="forbid", populate_by_name=True
171
171
  )
172
+ enable_truncation: bool = True
172
173
 
173
174
  def to_llm_dict(self) -> list[dict[str, str | dict[str, str]]]:
174
175
  """Convert to LLM API format."""
175
176
  text = self.text
176
- if len(text) > DEFAULT_TEXT_CONTENT_LIMIT:
177
+ if self.enable_truncation and len(text) > DEFAULT_TEXT_CONTENT_LIMIT:
177
178
  logger.warning(
178
179
  f"TextContent text length ({len(text)}) exceeds limit "
179
180
  f"({DEFAULT_TEXT_CONTENT_LIMIT}), truncating"
@@ -450,7 +450,8 @@ PLEASE follow the format strictly! PLEASE EMIT ONE AND ONLY ONE FUNCTION CALL PE
450
450
  """ # noqa: E501
451
451
 
452
452
  # Regex patterns for function call parsing
453
- FN_REGEX_PATTERN = r"<function=([^>]+)>\n(.*?)</function>"
453
+ # Note: newline after function name is optional for compatibility with various models
454
+ FN_REGEX_PATTERN = r"<function=([^>]+)>\n?(.*?)</function>"
454
455
  FN_PARAM_REGEX_PATTERN = r"<parameter=([^>]+)>(.*?)</parameter>"
455
456
 
456
457
  # Add new regex pattern for tool execution results
@@ -702,7 +703,7 @@ def convert_fncall_messages_to_non_fncall_messages(
702
703
  first_user_message_encountered = False
703
704
  for message in messages:
704
705
  role = message["role"]
705
- content: Content = message["content"]
706
+ content: Content = message.get("content") or ""
706
707
 
707
708
  # 1. SYSTEM MESSAGES
708
709
  # append system prompt suffix to content
@@ -880,6 +881,9 @@ def _extract_and_validate_params(
880
881
  for param_match in param_matches:
881
882
  param_name = param_match.group(1)
882
883
  param_value = param_match.group(2)
884
+ # Normalize whitespace: some models add extra newlines around values
885
+ if isinstance(param_value, str):
886
+ param_value = param_value.strip()
883
887
 
884
888
  # Validate parameter is allowed
885
889
  if allowed_params and param_name not in allowed_params:
@@ -927,7 +931,11 @@ def _extract_and_validate_params(
927
931
  found_params.add(param_name)
928
932
 
929
933
  # Check all required parameters are present
930
- missing_params = required_params - found_params
934
+ # Note: security_risk is excluded here because its validation happens later
935
+ # in Agent._extract_security_risk(), which has context about whether a security
936
+ # analyzer is configured. This allows weaker models to omit it when no analyzer
937
+ # is active, while still enforcing it for stronger models with LLMSecurityAnalyzer.
938
+ missing_params = required_params - found_params - {"security_risk"}
931
939
  if missing_params:
932
940
  raise FunctionCallValidationError(
933
941
  f"Missing required parameters for function '{fn_name}': {missing_params}"
@@ -935,12 +943,31 @@ def _extract_and_validate_params(
935
943
  return params
936
944
 
937
945
 
946
+ def _preprocess_model_output(content: str) -> str:
947
+ """Clean up model-specific formatting before parsing function calls.
948
+
949
+ Removes wrapper tags that some models (like Nemotron) emit around function calls:
950
+ - </think> before the function call
951
+ - <tool_call>...</tool_call> around the function call
952
+
953
+ Only strips tags at boundaries, not inside parameter values.
954
+ """
955
+ # Strip </think> when it appears before <function= (Nemotron reasoning end)
956
+ content = re.sub(r"</think>\s*(?=<function=)", "", content)
957
+ # Strip <tool_call> when it appears right before <function=
958
+ content = re.sub(r"<tool_call>\s*(?=<function=)", "", content)
959
+ # Strip </tool_call> when it appears right after </function>
960
+ content = re.sub(r"(?<=</function>)\s*</tool_call>", "", content)
961
+ return content
962
+
963
+
938
964
  def _fix_stopword(content: str) -> str:
939
965
  """Fix the issue when some LLM would NOT return the stopword."""
966
+ content = _preprocess_model_output(content)
940
967
  if "<function=" in content and content.count("<function=") == 1:
941
968
  if content.endswith("</"):
942
969
  content = content.rstrip() + "function>"
943
- else:
970
+ elif not content.rstrip().endswith("</function>"):
944
971
  content = content + "\n</function>"
945
972
  return content
946
973
 
@@ -981,8 +1008,8 @@ def convert_non_fncall_messages_to_fncall_messages(
981
1008
 
982
1009
  first_user_message_encountered = False
983
1010
  for message in messages:
984
- role, content = message["role"], message["content"]
985
- content = content or "" # handle cases where content is None
1011
+ role = message["role"]
1012
+ content = message.get("content") or ""
986
1013
  # For system messages, remove the added suffix
987
1014
  if role == "system":
988
1015
  if isinstance(content, str):
@@ -1124,15 +1151,32 @@ def convert_non_fncall_messages_to_fncall_messages(
1124
1151
  if fn_match:
1125
1152
  fn_name = fn_match.group(1)
1126
1153
  fn_body = _normalize_parameter_tags(fn_match.group(2))
1127
- matching_tool: ChatCompletionToolParamFunctionChunk | None = next(
1128
- (
1129
- tool["function"]
1130
- for tool in tools
1131
- if tool["type"] == "function"
1132
- and tool["function"]["name"] == fn_name
1133
- ),
1134
- None,
1135
- )
1154
+
1155
+ def _find_tool(
1156
+ name: str,
1157
+ ) -> ChatCompletionToolParamFunctionChunk | None:
1158
+ return next(
1159
+ (
1160
+ tool["function"]
1161
+ for tool in tools
1162
+ if tool["type"] == "function"
1163
+ and tool["function"]["name"] == name
1164
+ ),
1165
+ None,
1166
+ )
1167
+
1168
+ matching_tool = _find_tool(fn_name)
1169
+ # Try aliases if tool not found (some models use legacy names)
1170
+ if not matching_tool:
1171
+ TOOL_NAME_ALIASES = {
1172
+ "str_replace_editor": "file_editor",
1173
+ "bash": "terminal",
1174
+ "execute_bash": "terminal",
1175
+ "str_replace": "file_editor",
1176
+ }
1177
+ if fn_name in TOOL_NAME_ALIASES:
1178
+ fn_name = TOOL_NAME_ALIASES[fn_name]
1179
+ matching_tool = _find_tool(fn_name)
1136
1180
  # Validate function exists in tools
1137
1181
  if not matching_tool:
1138
1182
  available_tools = [
@@ -1203,7 +1247,8 @@ def convert_from_multiple_tool_calls_to_single_tool_call_messages(
1203
1247
  for message in messages:
1204
1248
  role: str
1205
1249
  content: Content
1206
- role, content = message["role"], message["content"]
1250
+ role = message["role"]
1251
+ content = message.get("content") or ""
1207
1252
  if role == "assistant":
1208
1253
  if message.get("tool_calls") and len(message["tool_calls"]) > 1:
1209
1254
  # handle multiple tool calls by breaking them into multiple messages
@@ -41,7 +41,11 @@ class NonNativeToolCallingMixin:
41
41
  kwargs: dict,
42
42
  ) -> tuple[list[dict], dict]:
43
43
  """Convert to non-fncall prompting when native tool-calling is off."""
44
- add_iclex = not any(s in self.model for s in ("openhands-lm", "devstral"))
44
+ # Skip in-context learning examples for models that understand the format
45
+ # or have limited context windows
46
+ add_iclex = not any(
47
+ s in self.model for s in ("openhands-lm", "devstral", "nemotron")
48
+ )
45
49
  messages = convert_fncall_messages_to_non_fncall_messages(
46
50
  messages, tools, add_in_context_learning_example=add_iclex
47
51
  )
@@ -15,6 +15,28 @@ def model_matches(model: str, patterns: list[str]) -> bool:
15
15
  return False
16
16
 
17
17
 
18
+ def apply_ordered_model_rules(model: str, rules: list[str]) -> bool:
19
+ """Apply ordered include/exclude model rules to determine final support.
20
+
21
+ Rules semantics:
22
+ - Each entry is a substring token. '!' prefix marks an exclude rule.
23
+ - Case-insensitive substring matching against the raw model string.
24
+ - Evaluated in order; the last matching rule wins.
25
+ - If no rule matches, returns False.
26
+ """
27
+ raw = (model or "").strip().lower()
28
+ decided: bool | None = None
29
+ for rule in rules:
30
+ token = rule.strip().lower()
31
+ if not token:
32
+ continue
33
+ is_exclude = token.startswith("!")
34
+ core = token[1:] if is_exclude else token
35
+ if core and core in raw:
36
+ decided = not is_exclude
37
+ return bool(decided)
38
+
39
+
18
40
  @dataclass(frozen=True)
19
41
  class ModelFeatures:
20
42
  supports_reasoning_effort: bool
@@ -27,9 +49,9 @@ class ModelFeatures:
27
49
  supports_prompt_cache_retention: bool
28
50
 
29
51
 
30
- # Pattern tables capturing current behavior. Keep patterns lowercase.
52
+ # Model lists capturing current behavior. Keep entries lowercase.
31
53
 
32
- REASONING_EFFORT_PATTERNS: list[str] = [
54
+ REASONING_EFFORT_MODELS: list[str] = [
33
55
  # Mirror main behavior exactly (no unintended expansion)
34
56
  "o1-2024-12-17",
35
57
  "o1",
@@ -45,9 +67,11 @@ REASONING_EFFORT_PATTERNS: list[str] = [
45
67
  "gpt-5",
46
68
  # Anthropic Opus 4.5
47
69
  "claude-opus-4-5",
70
+ # Nova 2 Lite
71
+ "nova-2-lite",
48
72
  ]
49
73
 
50
- EXTENDED_THINKING_PATTERNS: list[str] = [
74
+ EXTENDED_THINKING_MODELS: list[str] = [
51
75
  # Anthropic model family
52
76
  # We did not include sonnet 3.7 and 4 here as they don't brings
53
77
  # significant performance improvements for agents
@@ -55,7 +79,7 @@ EXTENDED_THINKING_PATTERNS: list[str] = [
55
79
  "claude-haiku-4-5",
56
80
  ]
57
81
 
58
- PROMPT_CACHE_PATTERNS: list[str] = [
82
+ PROMPT_CACHE_MODELS: list[str] = [
59
83
  "claude-3-7-sonnet",
60
84
  "claude-sonnet-3-7-latest",
61
85
  "claude-3-5-sonnet",
@@ -70,14 +94,27 @@ PROMPT_CACHE_PATTERNS: list[str] = [
70
94
  ]
71
95
 
72
96
  # Models that support a top-level prompt_cache_retention parameter
73
- PROMPT_CACHE_RETENTION_PATTERNS: list[str] = [
74
- # OpenAI GPT-5+ family
97
+ # Source: OpenAI Prompt Caching docs (extended retention), which list:
98
+ # - gpt-5.2
99
+ # - gpt-5.1
100
+ # - gpt-5.1-codex
101
+ # - gpt-5.1-codex-mini
102
+ # - gpt-5.1-chat-latest
103
+ # - gpt-5
104
+ # - gpt-5-codex
105
+ # - gpt-4.1
106
+ # Use ordered include/exclude rules (last wins) to naturally express exceptions.
107
+ PROMPT_CACHE_RETENTION_MODELS: list[str] = [
108
+ # Broad allow for GPT-5 family and GPT-4.1 (covers gpt-5.2 and variants)
75
109
  "gpt-5",
76
- # GPT-4.1 too
77
110
  "gpt-4.1",
111
+ # Exclude all mini variants by default
112
+ "!mini",
113
+ # Re-allow the explicitly documented supported mini variant
114
+ "gpt-5.1-codex-mini",
78
115
  ]
79
116
 
80
- SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [
117
+ SUPPORTS_STOP_WORDS_FALSE_MODELS: list[str] = [
81
118
  # o-series families don't support stop words
82
119
  "o1",
83
120
  "o3",
@@ -89,7 +126,7 @@ SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [
89
126
  ]
90
127
 
91
128
  # Models that should use the OpenAI Responses API path by default
92
- RESPONSES_API_PATTERNS: list[str] = [
129
+ RESPONSES_API_MODELS: list[str] = [
93
130
  # OpenAI GPT-5 family (includes mini variants)
94
131
  "gpt-5",
95
132
  # OpenAI Codex (uses Responses API)
@@ -101,41 +138,44 @@ RESPONSES_API_PATTERNS: list[str] = [
101
138
  # and need plain strings instead
102
139
  # NOTE: model_matches uses case-insensitive substring matching, not globbing.
103
140
  # Keep these entries as bare substrings without wildcards.
104
- FORCE_STRING_SERIALIZER_PATTERNS: list[str] = [
141
+ FORCE_STRING_SERIALIZER_MODELS: list[str] = [
105
142
  "deepseek", # e.g., DeepSeek-V3.2-Exp
106
143
  "glm", # e.g., GLM-4.5 / GLM-4.6
107
144
  # Kimi K2-Instruct requires string serialization only on Groq
108
145
  "groq/kimi-k2-instruct", # explicit provider-prefixed IDs
146
+ # MiniMax-M2 via OpenRouter rejects array content with
147
+ # "Input should be a valid string" for ChatCompletionToolMessage.content
148
+ "openrouter/minimax",
109
149
  ]
110
150
 
111
151
  # Models that we should send full reasoning content
112
152
  # in the message input
113
- SEND_REASONING_CONTENT_PATTERNS: list[str] = [
153
+ SEND_REASONING_CONTENT_MODELS: list[str] = [
114
154
  "kimi-k2-thinking",
155
+ "deepseek/deepseek-reasoner",
115
156
  ]
116
157
 
117
158
 
118
159
  def get_features(model: str) -> ModelFeatures:
119
160
  """Get model features."""
120
161
  return ModelFeatures(
121
- supports_reasoning_effort=model_matches(model, REASONING_EFFORT_PATTERNS),
122
- supports_extended_thinking=model_matches(model, EXTENDED_THINKING_PATTERNS),
123
- supports_prompt_cache=model_matches(model, PROMPT_CACHE_PATTERNS),
124
- supports_stop_words=not model_matches(
125
- model, SUPPORTS_STOP_WORDS_FALSE_PATTERNS
126
- ),
127
- supports_responses_api=model_matches(model, RESPONSES_API_PATTERNS),
128
- force_string_serializer=model_matches(model, FORCE_STRING_SERIALIZER_PATTERNS),
129
- send_reasoning_content=model_matches(model, SEND_REASONING_CONTENT_PATTERNS),
130
- supports_prompt_cache_retention=model_matches(
131
- model, PROMPT_CACHE_RETENTION_PATTERNS
162
+ supports_reasoning_effort=model_matches(model, REASONING_EFFORT_MODELS),
163
+ supports_extended_thinking=model_matches(model, EXTENDED_THINKING_MODELS),
164
+ supports_prompt_cache=model_matches(model, PROMPT_CACHE_MODELS),
165
+ supports_stop_words=not model_matches(model, SUPPORTS_STOP_WORDS_FALSE_MODELS),
166
+ supports_responses_api=model_matches(model, RESPONSES_API_MODELS),
167
+ force_string_serializer=model_matches(model, FORCE_STRING_SERIALIZER_MODELS),
168
+ send_reasoning_content=model_matches(model, SEND_REASONING_CONTENT_MODELS),
169
+ # Extended prompt_cache_retention support follows ordered include/exclude rules.
170
+ supports_prompt_cache_retention=apply_ordered_model_rules(
171
+ model, PROMPT_CACHE_RETENTION_MODELS
132
172
  ),
133
173
  )
134
174
 
135
175
 
136
176
  # Default temperature mapping.
137
177
  # Each entry: (pattern, default_temperature)
138
- DEFAULT_TEMPERATURE_PATTERNS: list[tuple[str, float]] = [
178
+ DEFAULT_TEMPERATURE_MODELS: list[tuple[str, float]] = [
139
179
  ("kimi-k2-thinking", 1.0),
140
180
  ]
141
181
 
@@ -145,7 +185,7 @@ def get_default_temperature(model: str) -> float:
145
185
 
146
186
  Uses case-insensitive substring matching via model_matches.
147
187
  """
148
- for pattern, value in DEFAULT_TEMPERATURE_PATTERNS:
188
+ for pattern, value in DEFAULT_TEMPERATURE_MODELS:
149
189
  if model_matches(model, [pattern]):
150
190
  return value
151
191
  return 0.0
@@ -0,0 +1,98 @@
1
+ """Utilities for detecting model families and variants.
2
+
3
+ These helpers allow prompts and other systems to tailor behavior for specific
4
+ LLM providers while keeping naming heuristics centralized.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pydantic import BaseModel, ConfigDict
10
+
11
+
12
+ class ModelPromptSpec(BaseModel):
13
+ """Detected prompt metadata for a given model configuration."""
14
+
15
+ model_config = ConfigDict(frozen=True)
16
+
17
+ family: str | None = None
18
+ variant: str | None = None
19
+
20
+
21
+ _MODEL_FAMILY_PATTERNS: dict[str, tuple[str, ...]] = {
22
+ "openai_gpt": (
23
+ "gpt-",
24
+ "o1",
25
+ "o3",
26
+ "o4",
27
+ ),
28
+ "anthropic_claude": ("claude",),
29
+ "google_gemini": ("gemini",),
30
+ "meta_llama": ("llama",),
31
+ "mistral": ("mistral",),
32
+ "deepseek": ("deepseek",),
33
+ "alibaba_qwen": ("qwen",),
34
+ }
35
+
36
+ # Ordered heuristics to pick the most specific variant available for a family.
37
+ _MODEL_VARIANT_PATTERNS: dict[str, tuple[tuple[str, tuple[str, ...]], ...]] = {
38
+ "openai_gpt": (
39
+ ("gpt-5-codex", ("gpt-5-codex", "gpt-5.1-codex")),
40
+ ("gpt-5", ("gpt-5", "gpt-5.1")),
41
+ ),
42
+ }
43
+
44
+
45
+ def _normalize(name: str | None) -> str:
46
+ return (name or "").strip().lower()
47
+
48
+
49
+ def _match_family(model_name: str) -> str | None:
50
+ normalized = _normalize(model_name)
51
+ if not normalized:
52
+ return None
53
+
54
+ for family, patterns in _MODEL_FAMILY_PATTERNS.items():
55
+ if any(pattern in normalized for pattern in patterns):
56
+ return family
57
+ return None
58
+
59
+
60
+ def _match_variant(
61
+ family: str,
62
+ model_name: str,
63
+ canonical_name: str | None = None,
64
+ ) -> str | None:
65
+ patterns = _MODEL_VARIANT_PATTERNS.get(family)
66
+ if not patterns:
67
+ return None
68
+
69
+ # Choose canonical_name if available, otherwise fall back to model_name
70
+ candidate = _normalize(canonical_name) or _normalize(model_name)
71
+ if not candidate:
72
+ return None
73
+
74
+ for variant, substrings in patterns:
75
+ if any(sub in candidate for sub in substrings):
76
+ return variant
77
+
78
+ return None
79
+
80
+
81
+ def get_model_prompt_spec(
82
+ model_name: str,
83
+ canonical_name: str | None = None,
84
+ ) -> ModelPromptSpec:
85
+ """Return family and variant prompt metadata for the given identifiers."""
86
+
87
+ family = _match_family(model_name)
88
+ if family is None and canonical_name:
89
+ family = _match_family(canonical_name)
90
+
91
+ variant = None
92
+ if family is not None:
93
+ variant = _match_variant(family, model_name, canonical_name)
94
+
95
+ return ModelPromptSpec(family=family, variant=variant)
96
+
97
+
98
+ __all__ = ["ModelPromptSpec", "get_model_prompt_spec"]
@@ -1,11 +1,12 @@
1
1
  VERIFIED_OPENAI_MODELS = [
2
- "gpt-5-codex",
3
- "gpt-5-2025-08-07",
4
- "gpt-5-mini-2025-08-07",
2
+ "gpt-5.2",
5
3
  "gpt-5.1",
6
- "gpt-5.1-codex",
7
4
  "gpt-5.1-codex-max",
5
+ "gpt-5.1-codex",
8
6
  "gpt-5.1-codex-mini",
7
+ "gpt-5-codex",
8
+ "gpt-5-2025-08-07",
9
+ "gpt-5-mini-2025-08-07",
9
10
  "o4-mini",
10
11
  "gpt-4o",
11
12
  "gpt-4o-mini",
@@ -44,6 +45,7 @@ VERIFIED_MISTRAL_MODELS = [
44
45
  VERIFIED_OPENHANDS_MODELS = [
45
46
  "claude-opus-4-5-20251101",
46
47
  "claude-sonnet-4-5-20250929",
48
+ "gpt-5.2",
47
49
  "gpt-5.1-codex-max",
48
50
  "gpt-5.1-codex",
49
51
  "gpt-5.1",
@@ -83,7 +83,7 @@ def disable_logger(name: str, level: int = logging.CRITICAL) -> None:
83
83
 
84
84
  # Quiet chatty third-party loggers
85
85
  for name in ["litellm", "LiteLLM", "openai"]:
86
- disable_logger(name, logging.DEBUG if _ENABLE_LITELLM_DEBUG else logging.WARNING)
86
+ disable_logger(name, logging.DEBUG if _ENABLE_LITELLM_DEBUG else logging.ERROR)
87
87
  for name in ["httpcore", "httpx", "libtmux"]:
88
88
  disable_logger(name, logging.WARNING)
89
89
 
@@ -22,6 +22,16 @@ S = TypeVar("S", bound="Schema")
22
22
  def py_type(spec: dict[str, Any]) -> Any:
23
23
  """Map JSON schema types to Python types."""
24
24
  t = spec.get("type")
25
+
26
+ # Normalize union types like ["string", "null"] to a single representative type.
27
+ # MCP schemas often mark optional fields this way; we keep the non-null type.
28
+ if isinstance(t, (list, tuple, set)):
29
+ types = list(t)
30
+ non_null = [tp for tp in types if tp != "null"]
31
+ if len(non_null) == 1:
32
+ t = non_null[0]
33
+ else:
34
+ return Any
25
35
  if t == "array":
26
36
  items = spec.get("items", {})
27
37
  inner = py_type(items) if isinstance(items, dict) else Any
@@ -364,7 +364,7 @@ class ToolDefinition[ActionT, ObservationT](DiscriminatedUnionMixin, ABC):
364
364
  action_type: type[Schema] | None = None,
365
365
  ) -> dict[str, Any]:
366
366
  action_type = action_type or self.action_type
367
- action_type_with_risk = _create_action_type_with_risk(action_type)
367
+ action_type_with_risk = create_action_type_with_risk(action_type)
368
368
 
369
369
  add_security_risk_prediction = add_security_risk_prediction and (
370
370
  self.annotations is None or (not self.annotations.readOnlyHint)
@@ -460,7 +460,7 @@ class ToolDefinition[ActionT, ObservationT](DiscriminatedUnionMixin, ABC):
460
460
  raise ValueError(error_msg)
461
461
 
462
462
 
463
- def _create_action_type_with_risk(action_type: type[Schema]) -> type[Schema]:
463
+ def create_action_type_with_risk(action_type: type[Schema]) -> type[Schema]:
464
464
  action_type_with_risk = _action_types_with_risk.get(action_type)
465
465
  if action_type_with_risk:
466
466
  return action_type_with_risk