openhands-sdk 1.5.0__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openhands/sdk/__init__.py +9 -1
- openhands/sdk/agent/agent.py +35 -12
- openhands/sdk/agent/base.py +53 -7
- openhands/sdk/agent/prompts/model_specific/anthropic_claude.j2 +3 -0
- openhands/sdk/agent/prompts/model_specific/google_gemini.j2 +1 -0
- openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5-codex.j2 +2 -0
- openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5.j2 +3 -0
- openhands/sdk/agent/prompts/self_documentation.j2 +15 -0
- openhands/sdk/agent/prompts/system_prompt.j2 +29 -1
- openhands/sdk/agent/utils.py +18 -4
- openhands/sdk/context/__init__.py +2 -0
- openhands/sdk/context/agent_context.py +42 -10
- openhands/sdk/context/condenser/base.py +11 -6
- openhands/sdk/context/condenser/llm_summarizing_condenser.py +169 -20
- openhands/sdk/context/condenser/no_op_condenser.py +2 -1
- openhands/sdk/context/condenser/pipeline_condenser.py +10 -9
- openhands/sdk/context/condenser/utils.py +149 -0
- openhands/sdk/context/prompts/prompt.py +40 -2
- openhands/sdk/context/prompts/templates/system_message_suffix.j2 +3 -3
- openhands/sdk/context/skills/__init__.py +2 -0
- openhands/sdk/context/skills/skill.py +152 -1
- openhands/sdk/context/view.py +287 -27
- openhands/sdk/conversation/base.py +17 -0
- openhands/sdk/conversation/conversation.py +19 -0
- openhands/sdk/conversation/exceptions.py +29 -4
- openhands/sdk/conversation/impl/local_conversation.py +126 -9
- openhands/sdk/conversation/impl/remote_conversation.py +152 -3
- openhands/sdk/conversation/state.py +42 -1
- openhands/sdk/conversation/stuck_detector.py +81 -45
- openhands/sdk/conversation/types.py +30 -0
- openhands/sdk/event/llm_convertible/system.py +16 -20
- openhands/sdk/hooks/__init__.py +30 -0
- openhands/sdk/hooks/config.py +180 -0
- openhands/sdk/hooks/conversation_hooks.py +227 -0
- openhands/sdk/hooks/executor.py +155 -0
- openhands/sdk/hooks/manager.py +170 -0
- openhands/sdk/hooks/types.py +40 -0
- openhands/sdk/io/cache.py +85 -0
- openhands/sdk/io/local.py +39 -2
- openhands/sdk/llm/llm.py +3 -2
- openhands/sdk/llm/message.py +4 -3
- openhands/sdk/llm/mixins/fn_call_converter.py +61 -16
- openhands/sdk/llm/mixins/non_native_fc.py +5 -1
- openhands/sdk/llm/utils/model_features.py +64 -24
- openhands/sdk/llm/utils/model_prompt_spec.py +98 -0
- openhands/sdk/llm/utils/verified_models.py +6 -4
- openhands/sdk/logger/logger.py +1 -1
- openhands/sdk/tool/schema.py +10 -0
- openhands/sdk/tool/tool.py +2 -2
- openhands/sdk/utils/async_executor.py +76 -67
- openhands/sdk/utils/models.py +1 -1
- openhands/sdk/utils/paging.py +63 -0
- {openhands_sdk-1.5.0.dist-info → openhands_sdk-1.7.2.dist-info}/METADATA +3 -3
- {openhands_sdk-1.5.0.dist-info → openhands_sdk-1.7.2.dist-info}/RECORD +56 -41
- {openhands_sdk-1.5.0.dist-info → openhands_sdk-1.7.2.dist-info}/WHEEL +0 -0
- {openhands_sdk-1.5.0.dist-info → openhands_sdk-1.7.2.dist-info}/top_level.txt +0 -0
openhands/sdk/io/local.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import shutil
|
|
3
3
|
|
|
4
|
+
from openhands.sdk.io.cache import MemoryLRUCache
|
|
4
5
|
from openhands.sdk.logger import get_logger
|
|
5
6
|
from openhands.sdk.observability.laminar import observe
|
|
6
7
|
|
|
@@ -12,13 +13,31 @@ logger = get_logger(__name__)
|
|
|
12
13
|
|
|
13
14
|
class LocalFileStore(FileStore):
|
|
14
15
|
root: str
|
|
16
|
+
cache: MemoryLRUCache
|
|
15
17
|
|
|
16
|
-
def __init__(
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
root: str,
|
|
21
|
+
cache_limit_size: int = 500,
|
|
22
|
+
cache_memory_size: int = 20 * 1024 * 1024,
|
|
23
|
+
) -> None:
|
|
24
|
+
"""Initialize a LocalFileStore with caching.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
root: Root directory for file storage.
|
|
28
|
+
cache_limit_size: Maximum number of cached entries (default: 500).
|
|
29
|
+
cache_memory_size: Maximum cache memory in bytes (default: 20MB).
|
|
30
|
+
|
|
31
|
+
Note:
|
|
32
|
+
The cache assumes exclusive access to files. External modifications
|
|
33
|
+
to files will not be detected and may result in stale cache reads.
|
|
34
|
+
"""
|
|
17
35
|
if root.startswith("~"):
|
|
18
36
|
root = os.path.expanduser(root)
|
|
19
37
|
root = os.path.abspath(os.path.normpath(root))
|
|
20
38
|
self.root = root
|
|
21
39
|
os.makedirs(self.root, exist_ok=True)
|
|
40
|
+
self.cache = MemoryLRUCache(cache_memory_size, cache_limit_size)
|
|
22
41
|
|
|
23
42
|
def get_full_path(self, path: str) -> str:
|
|
24
43
|
# strip leading slash to keep relative under root
|
|
@@ -32,6 +51,7 @@ class LocalFileStore(FileStore):
|
|
|
32
51
|
# ensure sandboxing
|
|
33
52
|
if os.path.commonpath([self.root, full]) != self.root:
|
|
34
53
|
raise ValueError(f"path escapes filestore root: {path}")
|
|
54
|
+
|
|
35
55
|
return full
|
|
36
56
|
|
|
37
57
|
@observe(name="LocalFileStore.write", span_type="TOOL")
|
|
@@ -41,14 +61,27 @@ class LocalFileStore(FileStore):
|
|
|
41
61
|
if isinstance(contents, str):
|
|
42
62
|
with open(full_path, "w", encoding="utf-8") as f:
|
|
43
63
|
f.write(contents)
|
|
64
|
+
self.cache[full_path] = contents
|
|
44
65
|
else:
|
|
45
66
|
with open(full_path, "wb") as f:
|
|
46
67
|
f.write(contents)
|
|
68
|
+
# Don't cache binary content - LocalFileStore is meant for JSON data
|
|
69
|
+
# If binary data is written and then read, it will error on read
|
|
47
70
|
|
|
48
71
|
def read(self, path: str) -> str:
|
|
49
72
|
full_path = self.get_full_path(path)
|
|
73
|
+
|
|
74
|
+
if full_path in self.cache:
|
|
75
|
+
return self.cache[full_path]
|
|
76
|
+
|
|
77
|
+
if not os.path.exists(full_path):
|
|
78
|
+
raise FileNotFoundError(path)
|
|
79
|
+
|
|
50
80
|
with open(full_path, encoding="utf-8") as f:
|
|
51
|
-
|
|
81
|
+
result = f.read()
|
|
82
|
+
|
|
83
|
+
self.cache[full_path] = result
|
|
84
|
+
return result
|
|
52
85
|
|
|
53
86
|
@observe(name="LocalFileStore.list", span_type="TOOL")
|
|
54
87
|
def list(self, path: str) -> list[str]:
|
|
@@ -72,11 +105,15 @@ class LocalFileStore(FileStore):
|
|
|
72
105
|
if not os.path.exists(full_path):
|
|
73
106
|
logger.debug(f"Local path does not exist: {full_path}")
|
|
74
107
|
return
|
|
108
|
+
|
|
75
109
|
if os.path.isfile(full_path):
|
|
76
110
|
os.remove(full_path)
|
|
111
|
+
del self.cache[full_path]
|
|
77
112
|
logger.debug(f"Removed local file: {full_path}")
|
|
78
113
|
elif os.path.isdir(full_path):
|
|
79
114
|
shutil.rmtree(full_path)
|
|
115
|
+
self.cache.clear()
|
|
80
116
|
logger.debug(f"Removed local directory: {full_path}")
|
|
117
|
+
|
|
81
118
|
except Exception as e:
|
|
82
119
|
logger.error(f"Error clearing local file store: {str(e)}")
|
openhands/sdk/llm/llm.py
CHANGED
|
@@ -247,10 +247,11 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
247
247
|
"like HuggingFace and Groq."
|
|
248
248
|
),
|
|
249
249
|
)
|
|
250
|
-
reasoning_effort: Literal["low", "medium", "high", "none"] | None = Field(
|
|
250
|
+
reasoning_effort: Literal["low", "medium", "high", "xhigh", "none"] | None = Field(
|
|
251
251
|
default="high",
|
|
252
252
|
description="The effort to put into reasoning. "
|
|
253
|
-
"This is a string that can be one of 'low', 'medium', 'high',
|
|
253
|
+
"This is a string that can be one of 'low', 'medium', 'high', 'xhigh', "
|
|
254
|
+
"or 'none'. "
|
|
254
255
|
"Can apply to all reasoning models.",
|
|
255
256
|
)
|
|
256
257
|
reasoning_summary: Literal["auto", "concise", "detailed"] | None = Field(
|
openhands/sdk/llm/message.py
CHANGED
|
@@ -120,8 +120,8 @@ class ThinkingBlock(BaseModel):
|
|
|
120
120
|
|
|
121
121
|
type: Literal["thinking"] = "thinking"
|
|
122
122
|
thinking: str = Field(..., description="The thinking content")
|
|
123
|
-
signature: str = Field(
|
|
124
|
-
|
|
123
|
+
signature: str | None = Field(
|
|
124
|
+
default=None, description="Cryptographic signature for the thinking block"
|
|
125
125
|
)
|
|
126
126
|
|
|
127
127
|
|
|
@@ -169,11 +169,12 @@ class TextContent(BaseContent):
|
|
|
169
169
|
model_config: ClassVar[ConfigDict] = ConfigDict(
|
|
170
170
|
extra="forbid", populate_by_name=True
|
|
171
171
|
)
|
|
172
|
+
enable_truncation: bool = True
|
|
172
173
|
|
|
173
174
|
def to_llm_dict(self) -> list[dict[str, str | dict[str, str]]]:
|
|
174
175
|
"""Convert to LLM API format."""
|
|
175
176
|
text = self.text
|
|
176
|
-
if len(text) > DEFAULT_TEXT_CONTENT_LIMIT:
|
|
177
|
+
if self.enable_truncation and len(text) > DEFAULT_TEXT_CONTENT_LIMIT:
|
|
177
178
|
logger.warning(
|
|
178
179
|
f"TextContent text length ({len(text)}) exceeds limit "
|
|
179
180
|
f"({DEFAULT_TEXT_CONTENT_LIMIT}), truncating"
|
|
@@ -450,7 +450,8 @@ PLEASE follow the format strictly! PLEASE EMIT ONE AND ONLY ONE FUNCTION CALL PE
|
|
|
450
450
|
""" # noqa: E501
|
|
451
451
|
|
|
452
452
|
# Regex patterns for function call parsing
|
|
453
|
-
|
|
453
|
+
# Note: newline after function name is optional for compatibility with various models
|
|
454
|
+
FN_REGEX_PATTERN = r"<function=([^>]+)>\n?(.*?)</function>"
|
|
454
455
|
FN_PARAM_REGEX_PATTERN = r"<parameter=([^>]+)>(.*?)</parameter>"
|
|
455
456
|
|
|
456
457
|
# Add new regex pattern for tool execution results
|
|
@@ -702,7 +703,7 @@ def convert_fncall_messages_to_non_fncall_messages(
|
|
|
702
703
|
first_user_message_encountered = False
|
|
703
704
|
for message in messages:
|
|
704
705
|
role = message["role"]
|
|
705
|
-
content: Content = message
|
|
706
|
+
content: Content = message.get("content") or ""
|
|
706
707
|
|
|
707
708
|
# 1. SYSTEM MESSAGES
|
|
708
709
|
# append system prompt suffix to content
|
|
@@ -880,6 +881,9 @@ def _extract_and_validate_params(
|
|
|
880
881
|
for param_match in param_matches:
|
|
881
882
|
param_name = param_match.group(1)
|
|
882
883
|
param_value = param_match.group(2)
|
|
884
|
+
# Normalize whitespace: some models add extra newlines around values
|
|
885
|
+
if isinstance(param_value, str):
|
|
886
|
+
param_value = param_value.strip()
|
|
883
887
|
|
|
884
888
|
# Validate parameter is allowed
|
|
885
889
|
if allowed_params and param_name not in allowed_params:
|
|
@@ -927,7 +931,11 @@ def _extract_and_validate_params(
|
|
|
927
931
|
found_params.add(param_name)
|
|
928
932
|
|
|
929
933
|
# Check all required parameters are present
|
|
930
|
-
|
|
934
|
+
# Note: security_risk is excluded here because its validation happens later
|
|
935
|
+
# in Agent._extract_security_risk(), which has context about whether a security
|
|
936
|
+
# analyzer is configured. This allows weaker models to omit it when no analyzer
|
|
937
|
+
# is active, while still enforcing it for stronger models with LLMSecurityAnalyzer.
|
|
938
|
+
missing_params = required_params - found_params - {"security_risk"}
|
|
931
939
|
if missing_params:
|
|
932
940
|
raise FunctionCallValidationError(
|
|
933
941
|
f"Missing required parameters for function '{fn_name}': {missing_params}"
|
|
@@ -935,12 +943,31 @@ def _extract_and_validate_params(
|
|
|
935
943
|
return params
|
|
936
944
|
|
|
937
945
|
|
|
946
|
+
def _preprocess_model_output(content: str) -> str:
|
|
947
|
+
"""Clean up model-specific formatting before parsing function calls.
|
|
948
|
+
|
|
949
|
+
Removes wrapper tags that some models (like Nemotron) emit around function calls:
|
|
950
|
+
- </think> before the function call
|
|
951
|
+
- <tool_call>...</tool_call> around the function call
|
|
952
|
+
|
|
953
|
+
Only strips tags at boundaries, not inside parameter values.
|
|
954
|
+
"""
|
|
955
|
+
# Strip </think> when it appears before <function= (Nemotron reasoning end)
|
|
956
|
+
content = re.sub(r"</think>\s*(?=<function=)", "", content)
|
|
957
|
+
# Strip <tool_call> when it appears right before <function=
|
|
958
|
+
content = re.sub(r"<tool_call>\s*(?=<function=)", "", content)
|
|
959
|
+
# Strip </tool_call> when it appears right after </function>
|
|
960
|
+
content = re.sub(r"(?<=</function>)\s*</tool_call>", "", content)
|
|
961
|
+
return content
|
|
962
|
+
|
|
963
|
+
|
|
938
964
|
def _fix_stopword(content: str) -> str:
|
|
939
965
|
"""Fix the issue when some LLM would NOT return the stopword."""
|
|
966
|
+
content = _preprocess_model_output(content)
|
|
940
967
|
if "<function=" in content and content.count("<function=") == 1:
|
|
941
968
|
if content.endswith("</"):
|
|
942
969
|
content = content.rstrip() + "function>"
|
|
943
|
-
|
|
970
|
+
elif not content.rstrip().endswith("</function>"):
|
|
944
971
|
content = content + "\n</function>"
|
|
945
972
|
return content
|
|
946
973
|
|
|
@@ -981,8 +1008,8 @@ def convert_non_fncall_messages_to_fncall_messages(
|
|
|
981
1008
|
|
|
982
1009
|
first_user_message_encountered = False
|
|
983
1010
|
for message in messages:
|
|
984
|
-
role
|
|
985
|
-
content = content or ""
|
|
1011
|
+
role = message["role"]
|
|
1012
|
+
content = message.get("content") or ""
|
|
986
1013
|
# For system messages, remove the added suffix
|
|
987
1014
|
if role == "system":
|
|
988
1015
|
if isinstance(content, str):
|
|
@@ -1124,15 +1151,32 @@ def convert_non_fncall_messages_to_fncall_messages(
|
|
|
1124
1151
|
if fn_match:
|
|
1125
1152
|
fn_name = fn_match.group(1)
|
|
1126
1153
|
fn_body = _normalize_parameter_tags(fn_match.group(2))
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1154
|
+
|
|
1155
|
+
def _find_tool(
|
|
1156
|
+
name: str,
|
|
1157
|
+
) -> ChatCompletionToolParamFunctionChunk | None:
|
|
1158
|
+
return next(
|
|
1159
|
+
(
|
|
1160
|
+
tool["function"]
|
|
1161
|
+
for tool in tools
|
|
1162
|
+
if tool["type"] == "function"
|
|
1163
|
+
and tool["function"]["name"] == name
|
|
1164
|
+
),
|
|
1165
|
+
None,
|
|
1166
|
+
)
|
|
1167
|
+
|
|
1168
|
+
matching_tool = _find_tool(fn_name)
|
|
1169
|
+
# Try aliases if tool not found (some models use legacy names)
|
|
1170
|
+
if not matching_tool:
|
|
1171
|
+
TOOL_NAME_ALIASES = {
|
|
1172
|
+
"str_replace_editor": "file_editor",
|
|
1173
|
+
"bash": "terminal",
|
|
1174
|
+
"execute_bash": "terminal",
|
|
1175
|
+
"str_replace": "file_editor",
|
|
1176
|
+
}
|
|
1177
|
+
if fn_name in TOOL_NAME_ALIASES:
|
|
1178
|
+
fn_name = TOOL_NAME_ALIASES[fn_name]
|
|
1179
|
+
matching_tool = _find_tool(fn_name)
|
|
1136
1180
|
# Validate function exists in tools
|
|
1137
1181
|
if not matching_tool:
|
|
1138
1182
|
available_tools = [
|
|
@@ -1203,7 +1247,8 @@ def convert_from_multiple_tool_calls_to_single_tool_call_messages(
|
|
|
1203
1247
|
for message in messages:
|
|
1204
1248
|
role: str
|
|
1205
1249
|
content: Content
|
|
1206
|
-
role
|
|
1250
|
+
role = message["role"]
|
|
1251
|
+
content = message.get("content") or ""
|
|
1207
1252
|
if role == "assistant":
|
|
1208
1253
|
if message.get("tool_calls") and len(message["tool_calls"]) > 1:
|
|
1209
1254
|
# handle multiple tool calls by breaking them into multiple messages
|
|
@@ -41,7 +41,11 @@ class NonNativeToolCallingMixin:
|
|
|
41
41
|
kwargs: dict,
|
|
42
42
|
) -> tuple[list[dict], dict]:
|
|
43
43
|
"""Convert to non-fncall prompting when native tool-calling is off."""
|
|
44
|
-
|
|
44
|
+
# Skip in-context learning examples for models that understand the format
|
|
45
|
+
# or have limited context windows
|
|
46
|
+
add_iclex = not any(
|
|
47
|
+
s in self.model for s in ("openhands-lm", "devstral", "nemotron")
|
|
48
|
+
)
|
|
45
49
|
messages = convert_fncall_messages_to_non_fncall_messages(
|
|
46
50
|
messages, tools, add_in_context_learning_example=add_iclex
|
|
47
51
|
)
|
|
@@ -15,6 +15,28 @@ def model_matches(model: str, patterns: list[str]) -> bool:
|
|
|
15
15
|
return False
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
def apply_ordered_model_rules(model: str, rules: list[str]) -> bool:
|
|
19
|
+
"""Apply ordered include/exclude model rules to determine final support.
|
|
20
|
+
|
|
21
|
+
Rules semantics:
|
|
22
|
+
- Each entry is a substring token. '!' prefix marks an exclude rule.
|
|
23
|
+
- Case-insensitive substring matching against the raw model string.
|
|
24
|
+
- Evaluated in order; the last matching rule wins.
|
|
25
|
+
- If no rule matches, returns False.
|
|
26
|
+
"""
|
|
27
|
+
raw = (model or "").strip().lower()
|
|
28
|
+
decided: bool | None = None
|
|
29
|
+
for rule in rules:
|
|
30
|
+
token = rule.strip().lower()
|
|
31
|
+
if not token:
|
|
32
|
+
continue
|
|
33
|
+
is_exclude = token.startswith("!")
|
|
34
|
+
core = token[1:] if is_exclude else token
|
|
35
|
+
if core and core in raw:
|
|
36
|
+
decided = not is_exclude
|
|
37
|
+
return bool(decided)
|
|
38
|
+
|
|
39
|
+
|
|
18
40
|
@dataclass(frozen=True)
|
|
19
41
|
class ModelFeatures:
|
|
20
42
|
supports_reasoning_effort: bool
|
|
@@ -27,9 +49,9 @@ class ModelFeatures:
|
|
|
27
49
|
supports_prompt_cache_retention: bool
|
|
28
50
|
|
|
29
51
|
|
|
30
|
-
#
|
|
52
|
+
# Model lists capturing current behavior. Keep entries lowercase.
|
|
31
53
|
|
|
32
|
-
|
|
54
|
+
REASONING_EFFORT_MODELS: list[str] = [
|
|
33
55
|
# Mirror main behavior exactly (no unintended expansion)
|
|
34
56
|
"o1-2024-12-17",
|
|
35
57
|
"o1",
|
|
@@ -45,9 +67,11 @@ REASONING_EFFORT_PATTERNS: list[str] = [
|
|
|
45
67
|
"gpt-5",
|
|
46
68
|
# Anthropic Opus 4.5
|
|
47
69
|
"claude-opus-4-5",
|
|
70
|
+
# Nova 2 Lite
|
|
71
|
+
"nova-2-lite",
|
|
48
72
|
]
|
|
49
73
|
|
|
50
|
-
|
|
74
|
+
EXTENDED_THINKING_MODELS: list[str] = [
|
|
51
75
|
# Anthropic model family
|
|
52
76
|
# We did not include sonnet 3.7 and 4 here as they don't brings
|
|
53
77
|
# significant performance improvements for agents
|
|
@@ -55,7 +79,7 @@ EXTENDED_THINKING_PATTERNS: list[str] = [
|
|
|
55
79
|
"claude-haiku-4-5",
|
|
56
80
|
]
|
|
57
81
|
|
|
58
|
-
|
|
82
|
+
PROMPT_CACHE_MODELS: list[str] = [
|
|
59
83
|
"claude-3-7-sonnet",
|
|
60
84
|
"claude-sonnet-3-7-latest",
|
|
61
85
|
"claude-3-5-sonnet",
|
|
@@ -70,14 +94,27 @@ PROMPT_CACHE_PATTERNS: list[str] = [
|
|
|
70
94
|
]
|
|
71
95
|
|
|
72
96
|
# Models that support a top-level prompt_cache_retention parameter
|
|
73
|
-
|
|
74
|
-
|
|
97
|
+
# Source: OpenAI Prompt Caching docs (extended retention), which list:
|
|
98
|
+
# - gpt-5.2
|
|
99
|
+
# - gpt-5.1
|
|
100
|
+
# - gpt-5.1-codex
|
|
101
|
+
# - gpt-5.1-codex-mini
|
|
102
|
+
# - gpt-5.1-chat-latest
|
|
103
|
+
# - gpt-5
|
|
104
|
+
# - gpt-5-codex
|
|
105
|
+
# - gpt-4.1
|
|
106
|
+
# Use ordered include/exclude rules (last wins) to naturally express exceptions.
|
|
107
|
+
PROMPT_CACHE_RETENTION_MODELS: list[str] = [
|
|
108
|
+
# Broad allow for GPT-5 family and GPT-4.1 (covers gpt-5.2 and variants)
|
|
75
109
|
"gpt-5",
|
|
76
|
-
# GPT-4.1 too
|
|
77
110
|
"gpt-4.1",
|
|
111
|
+
# Exclude all mini variants by default
|
|
112
|
+
"!mini",
|
|
113
|
+
# Re-allow the explicitly documented supported mini variant
|
|
114
|
+
"gpt-5.1-codex-mini",
|
|
78
115
|
]
|
|
79
116
|
|
|
80
|
-
|
|
117
|
+
SUPPORTS_STOP_WORDS_FALSE_MODELS: list[str] = [
|
|
81
118
|
# o-series families don't support stop words
|
|
82
119
|
"o1",
|
|
83
120
|
"o3",
|
|
@@ -89,7 +126,7 @@ SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [
|
|
|
89
126
|
]
|
|
90
127
|
|
|
91
128
|
# Models that should use the OpenAI Responses API path by default
|
|
92
|
-
|
|
129
|
+
RESPONSES_API_MODELS: list[str] = [
|
|
93
130
|
# OpenAI GPT-5 family (includes mini variants)
|
|
94
131
|
"gpt-5",
|
|
95
132
|
# OpenAI Codex (uses Responses API)
|
|
@@ -101,41 +138,44 @@ RESPONSES_API_PATTERNS: list[str] = [
|
|
|
101
138
|
# and need plain strings instead
|
|
102
139
|
# NOTE: model_matches uses case-insensitive substring matching, not globbing.
|
|
103
140
|
# Keep these entries as bare substrings without wildcards.
|
|
104
|
-
|
|
141
|
+
FORCE_STRING_SERIALIZER_MODELS: list[str] = [
|
|
105
142
|
"deepseek", # e.g., DeepSeek-V3.2-Exp
|
|
106
143
|
"glm", # e.g., GLM-4.5 / GLM-4.6
|
|
107
144
|
# Kimi K2-Instruct requires string serialization only on Groq
|
|
108
145
|
"groq/kimi-k2-instruct", # explicit provider-prefixed IDs
|
|
146
|
+
# MiniMax-M2 via OpenRouter rejects array content with
|
|
147
|
+
# "Input should be a valid string" for ChatCompletionToolMessage.content
|
|
148
|
+
"openrouter/minimax",
|
|
109
149
|
]
|
|
110
150
|
|
|
111
151
|
# Models that we should send full reasoning content
|
|
112
152
|
# in the message input
|
|
113
|
-
|
|
153
|
+
SEND_REASONING_CONTENT_MODELS: list[str] = [
|
|
114
154
|
"kimi-k2-thinking",
|
|
155
|
+
"deepseek/deepseek-reasoner",
|
|
115
156
|
]
|
|
116
157
|
|
|
117
158
|
|
|
118
159
|
def get_features(model: str) -> ModelFeatures:
|
|
119
160
|
"""Get model features."""
|
|
120
161
|
return ModelFeatures(
|
|
121
|
-
supports_reasoning_effort=model_matches(model,
|
|
122
|
-
supports_extended_thinking=model_matches(model,
|
|
123
|
-
supports_prompt_cache=model_matches(model,
|
|
124
|
-
supports_stop_words=not model_matches(
|
|
125
|
-
|
|
126
|
-
),
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
model, PROMPT_CACHE_RETENTION_PATTERNS
|
|
162
|
+
supports_reasoning_effort=model_matches(model, REASONING_EFFORT_MODELS),
|
|
163
|
+
supports_extended_thinking=model_matches(model, EXTENDED_THINKING_MODELS),
|
|
164
|
+
supports_prompt_cache=model_matches(model, PROMPT_CACHE_MODELS),
|
|
165
|
+
supports_stop_words=not model_matches(model, SUPPORTS_STOP_WORDS_FALSE_MODELS),
|
|
166
|
+
supports_responses_api=model_matches(model, RESPONSES_API_MODELS),
|
|
167
|
+
force_string_serializer=model_matches(model, FORCE_STRING_SERIALIZER_MODELS),
|
|
168
|
+
send_reasoning_content=model_matches(model, SEND_REASONING_CONTENT_MODELS),
|
|
169
|
+
# Extended prompt_cache_retention support follows ordered include/exclude rules.
|
|
170
|
+
supports_prompt_cache_retention=apply_ordered_model_rules(
|
|
171
|
+
model, PROMPT_CACHE_RETENTION_MODELS
|
|
132
172
|
),
|
|
133
173
|
)
|
|
134
174
|
|
|
135
175
|
|
|
136
176
|
# Default temperature mapping.
|
|
137
177
|
# Each entry: (pattern, default_temperature)
|
|
138
|
-
|
|
178
|
+
DEFAULT_TEMPERATURE_MODELS: list[tuple[str, float]] = [
|
|
139
179
|
("kimi-k2-thinking", 1.0),
|
|
140
180
|
]
|
|
141
181
|
|
|
@@ -145,7 +185,7 @@ def get_default_temperature(model: str) -> float:
|
|
|
145
185
|
|
|
146
186
|
Uses case-insensitive substring matching via model_matches.
|
|
147
187
|
"""
|
|
148
|
-
for pattern, value in
|
|
188
|
+
for pattern, value in DEFAULT_TEMPERATURE_MODELS:
|
|
149
189
|
if model_matches(model, [pattern]):
|
|
150
190
|
return value
|
|
151
191
|
return 0.0
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Utilities for detecting model families and variants.
|
|
2
|
+
|
|
3
|
+
These helpers allow prompts and other systems to tailor behavior for specific
|
|
4
|
+
LLM providers while keeping naming heuristics centralized.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, ConfigDict
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ModelPromptSpec(BaseModel):
|
|
13
|
+
"""Detected prompt metadata for a given model configuration."""
|
|
14
|
+
|
|
15
|
+
model_config = ConfigDict(frozen=True)
|
|
16
|
+
|
|
17
|
+
family: str | None = None
|
|
18
|
+
variant: str | None = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_MODEL_FAMILY_PATTERNS: dict[str, tuple[str, ...]] = {
|
|
22
|
+
"openai_gpt": (
|
|
23
|
+
"gpt-",
|
|
24
|
+
"o1",
|
|
25
|
+
"o3",
|
|
26
|
+
"o4",
|
|
27
|
+
),
|
|
28
|
+
"anthropic_claude": ("claude",),
|
|
29
|
+
"google_gemini": ("gemini",),
|
|
30
|
+
"meta_llama": ("llama",),
|
|
31
|
+
"mistral": ("mistral",),
|
|
32
|
+
"deepseek": ("deepseek",),
|
|
33
|
+
"alibaba_qwen": ("qwen",),
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
# Ordered heuristics to pick the most specific variant available for a family.
|
|
37
|
+
_MODEL_VARIANT_PATTERNS: dict[str, tuple[tuple[str, tuple[str, ...]], ...]] = {
|
|
38
|
+
"openai_gpt": (
|
|
39
|
+
("gpt-5-codex", ("gpt-5-codex", "gpt-5.1-codex")),
|
|
40
|
+
("gpt-5", ("gpt-5", "gpt-5.1")),
|
|
41
|
+
),
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _normalize(name: str | None) -> str:
|
|
46
|
+
return (name or "").strip().lower()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _match_family(model_name: str) -> str | None:
|
|
50
|
+
normalized = _normalize(model_name)
|
|
51
|
+
if not normalized:
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
for family, patterns in _MODEL_FAMILY_PATTERNS.items():
|
|
55
|
+
if any(pattern in normalized for pattern in patterns):
|
|
56
|
+
return family
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _match_variant(
|
|
61
|
+
family: str,
|
|
62
|
+
model_name: str,
|
|
63
|
+
canonical_name: str | None = None,
|
|
64
|
+
) -> str | None:
|
|
65
|
+
patterns = _MODEL_VARIANT_PATTERNS.get(family)
|
|
66
|
+
if not patterns:
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
# Choose canonical_name if available, otherwise fall back to model_name
|
|
70
|
+
candidate = _normalize(canonical_name) or _normalize(model_name)
|
|
71
|
+
if not candidate:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
for variant, substrings in patterns:
|
|
75
|
+
if any(sub in candidate for sub in substrings):
|
|
76
|
+
return variant
|
|
77
|
+
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_model_prompt_spec(
|
|
82
|
+
model_name: str,
|
|
83
|
+
canonical_name: str | None = None,
|
|
84
|
+
) -> ModelPromptSpec:
|
|
85
|
+
"""Return family and variant prompt metadata for the given identifiers."""
|
|
86
|
+
|
|
87
|
+
family = _match_family(model_name)
|
|
88
|
+
if family is None and canonical_name:
|
|
89
|
+
family = _match_family(canonical_name)
|
|
90
|
+
|
|
91
|
+
variant = None
|
|
92
|
+
if family is not None:
|
|
93
|
+
variant = _match_variant(family, model_name, canonical_name)
|
|
94
|
+
|
|
95
|
+
return ModelPromptSpec(family=family, variant=variant)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
__all__ = ["ModelPromptSpec", "get_model_prompt_spec"]
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
VERIFIED_OPENAI_MODELS = [
|
|
2
|
-
"gpt-5
|
|
3
|
-
"gpt-5-2025-08-07",
|
|
4
|
-
"gpt-5-mini-2025-08-07",
|
|
2
|
+
"gpt-5.2",
|
|
5
3
|
"gpt-5.1",
|
|
6
|
-
"gpt-5.1-codex",
|
|
7
4
|
"gpt-5.1-codex-max",
|
|
5
|
+
"gpt-5.1-codex",
|
|
8
6
|
"gpt-5.1-codex-mini",
|
|
7
|
+
"gpt-5-codex",
|
|
8
|
+
"gpt-5-2025-08-07",
|
|
9
|
+
"gpt-5-mini-2025-08-07",
|
|
9
10
|
"o4-mini",
|
|
10
11
|
"gpt-4o",
|
|
11
12
|
"gpt-4o-mini",
|
|
@@ -44,6 +45,7 @@ VERIFIED_MISTRAL_MODELS = [
|
|
|
44
45
|
VERIFIED_OPENHANDS_MODELS = [
|
|
45
46
|
"claude-opus-4-5-20251101",
|
|
46
47
|
"claude-sonnet-4-5-20250929",
|
|
48
|
+
"gpt-5.2",
|
|
47
49
|
"gpt-5.1-codex-max",
|
|
48
50
|
"gpt-5.1-codex",
|
|
49
51
|
"gpt-5.1",
|
openhands/sdk/logger/logger.py
CHANGED
|
@@ -83,7 +83,7 @@ def disable_logger(name: str, level: int = logging.CRITICAL) -> None:
|
|
|
83
83
|
|
|
84
84
|
# Quiet chatty third-party loggers
|
|
85
85
|
for name in ["litellm", "LiteLLM", "openai"]:
|
|
86
|
-
disable_logger(name, logging.DEBUG if _ENABLE_LITELLM_DEBUG else logging.
|
|
86
|
+
disable_logger(name, logging.DEBUG if _ENABLE_LITELLM_DEBUG else logging.ERROR)
|
|
87
87
|
for name in ["httpcore", "httpx", "libtmux"]:
|
|
88
88
|
disable_logger(name, logging.WARNING)
|
|
89
89
|
|
openhands/sdk/tool/schema.py
CHANGED
|
@@ -22,6 +22,16 @@ S = TypeVar("S", bound="Schema")
|
|
|
22
22
|
def py_type(spec: dict[str, Any]) -> Any:
|
|
23
23
|
"""Map JSON schema types to Python types."""
|
|
24
24
|
t = spec.get("type")
|
|
25
|
+
|
|
26
|
+
# Normalize union types like ["string", "null"] to a single representative type.
|
|
27
|
+
# MCP schemas often mark optional fields this way; we keep the non-null type.
|
|
28
|
+
if isinstance(t, (list, tuple, set)):
|
|
29
|
+
types = list(t)
|
|
30
|
+
non_null = [tp for tp in types if tp != "null"]
|
|
31
|
+
if len(non_null) == 1:
|
|
32
|
+
t = non_null[0]
|
|
33
|
+
else:
|
|
34
|
+
return Any
|
|
25
35
|
if t == "array":
|
|
26
36
|
items = spec.get("items", {})
|
|
27
37
|
inner = py_type(items) if isinstance(items, dict) else Any
|
openhands/sdk/tool/tool.py
CHANGED
|
@@ -364,7 +364,7 @@ class ToolDefinition[ActionT, ObservationT](DiscriminatedUnionMixin, ABC):
|
|
|
364
364
|
action_type: type[Schema] | None = None,
|
|
365
365
|
) -> dict[str, Any]:
|
|
366
366
|
action_type = action_type or self.action_type
|
|
367
|
-
action_type_with_risk =
|
|
367
|
+
action_type_with_risk = create_action_type_with_risk(action_type)
|
|
368
368
|
|
|
369
369
|
add_security_risk_prediction = add_security_risk_prediction and (
|
|
370
370
|
self.annotations is None or (not self.annotations.readOnlyHint)
|
|
@@ -460,7 +460,7 @@ class ToolDefinition[ActionT, ObservationT](DiscriminatedUnionMixin, ABC):
|
|
|
460
460
|
raise ValueError(error_msg)
|
|
461
461
|
|
|
462
462
|
|
|
463
|
-
def
|
|
463
|
+
def create_action_type_with_risk(action_type: type[Schema]) -> type[Schema]:
|
|
464
464
|
action_type_with_risk = _action_types_with_risk.get(action_type)
|
|
465
465
|
if action_type_with_risk:
|
|
466
466
|
return action_type_with_risk
|