openhands-sdk 1.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. openhands/sdk/__init__.py +111 -0
  2. openhands/sdk/agent/__init__.py +8 -0
  3. openhands/sdk/agent/agent.py +650 -0
  4. openhands/sdk/agent/base.py +457 -0
  5. openhands/sdk/agent/prompts/in_context_learning_example.j2 +169 -0
  6. openhands/sdk/agent/prompts/in_context_learning_example_suffix.j2 +3 -0
  7. openhands/sdk/agent/prompts/model_specific/anthropic_claude.j2 +3 -0
  8. openhands/sdk/agent/prompts/model_specific/google_gemini.j2 +1 -0
  9. openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5-codex.j2 +2 -0
  10. openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5.j2 +3 -0
  11. openhands/sdk/agent/prompts/security_policy.j2 +22 -0
  12. openhands/sdk/agent/prompts/security_risk_assessment.j2 +21 -0
  13. openhands/sdk/agent/prompts/self_documentation.j2 +15 -0
  14. openhands/sdk/agent/prompts/system_prompt.j2 +132 -0
  15. openhands/sdk/agent/prompts/system_prompt_interactive.j2 +14 -0
  16. openhands/sdk/agent/prompts/system_prompt_long_horizon.j2 +40 -0
  17. openhands/sdk/agent/prompts/system_prompt_planning.j2 +40 -0
  18. openhands/sdk/agent/prompts/system_prompt_tech_philosophy.j2 +122 -0
  19. openhands/sdk/agent/utils.py +228 -0
  20. openhands/sdk/context/__init__.py +28 -0
  21. openhands/sdk/context/agent_context.py +264 -0
  22. openhands/sdk/context/condenser/__init__.py +18 -0
  23. openhands/sdk/context/condenser/base.py +100 -0
  24. openhands/sdk/context/condenser/llm_summarizing_condenser.py +248 -0
  25. openhands/sdk/context/condenser/no_op_condenser.py +14 -0
  26. openhands/sdk/context/condenser/pipeline_condenser.py +56 -0
  27. openhands/sdk/context/condenser/prompts/summarizing_prompt.j2 +59 -0
  28. openhands/sdk/context/condenser/utils.py +149 -0
  29. openhands/sdk/context/prompts/__init__.py +6 -0
  30. openhands/sdk/context/prompts/prompt.py +114 -0
  31. openhands/sdk/context/prompts/templates/ask_agent_template.j2 +11 -0
  32. openhands/sdk/context/prompts/templates/skill_knowledge_info.j2 +8 -0
  33. openhands/sdk/context/prompts/templates/system_message_suffix.j2 +32 -0
  34. openhands/sdk/context/skills/__init__.py +28 -0
  35. openhands/sdk/context/skills/exceptions.py +11 -0
  36. openhands/sdk/context/skills/skill.py +720 -0
  37. openhands/sdk/context/skills/trigger.py +36 -0
  38. openhands/sdk/context/skills/types.py +48 -0
  39. openhands/sdk/context/view.py +503 -0
  40. openhands/sdk/conversation/__init__.py +40 -0
  41. openhands/sdk/conversation/base.py +281 -0
  42. openhands/sdk/conversation/conversation.py +152 -0
  43. openhands/sdk/conversation/conversation_stats.py +85 -0
  44. openhands/sdk/conversation/event_store.py +157 -0
  45. openhands/sdk/conversation/events_list_base.py +17 -0
  46. openhands/sdk/conversation/exceptions.py +50 -0
  47. openhands/sdk/conversation/fifo_lock.py +133 -0
  48. openhands/sdk/conversation/impl/__init__.py +5 -0
  49. openhands/sdk/conversation/impl/local_conversation.py +665 -0
  50. openhands/sdk/conversation/impl/remote_conversation.py +956 -0
  51. openhands/sdk/conversation/persistence_const.py +9 -0
  52. openhands/sdk/conversation/response_utils.py +41 -0
  53. openhands/sdk/conversation/secret_registry.py +126 -0
  54. openhands/sdk/conversation/serialization_diff.py +0 -0
  55. openhands/sdk/conversation/state.py +392 -0
  56. openhands/sdk/conversation/stuck_detector.py +311 -0
  57. openhands/sdk/conversation/title_utils.py +191 -0
  58. openhands/sdk/conversation/types.py +45 -0
  59. openhands/sdk/conversation/visualizer/__init__.py +12 -0
  60. openhands/sdk/conversation/visualizer/base.py +67 -0
  61. openhands/sdk/conversation/visualizer/default.py +373 -0
  62. openhands/sdk/critic/__init__.py +15 -0
  63. openhands/sdk/critic/base.py +38 -0
  64. openhands/sdk/critic/impl/__init__.py +12 -0
  65. openhands/sdk/critic/impl/agent_finished.py +83 -0
  66. openhands/sdk/critic/impl/empty_patch.py +49 -0
  67. openhands/sdk/critic/impl/pass_critic.py +42 -0
  68. openhands/sdk/event/__init__.py +42 -0
  69. openhands/sdk/event/base.py +149 -0
  70. openhands/sdk/event/condenser.py +82 -0
  71. openhands/sdk/event/conversation_error.py +25 -0
  72. openhands/sdk/event/conversation_state.py +104 -0
  73. openhands/sdk/event/llm_completion_log.py +39 -0
  74. openhands/sdk/event/llm_convertible/__init__.py +20 -0
  75. openhands/sdk/event/llm_convertible/action.py +139 -0
  76. openhands/sdk/event/llm_convertible/message.py +142 -0
  77. openhands/sdk/event/llm_convertible/observation.py +141 -0
  78. openhands/sdk/event/llm_convertible/system.py +61 -0
  79. openhands/sdk/event/token.py +16 -0
  80. openhands/sdk/event/types.py +11 -0
  81. openhands/sdk/event/user_action.py +21 -0
  82. openhands/sdk/git/exceptions.py +43 -0
  83. openhands/sdk/git/git_changes.py +249 -0
  84. openhands/sdk/git/git_diff.py +129 -0
  85. openhands/sdk/git/models.py +21 -0
  86. openhands/sdk/git/utils.py +189 -0
  87. openhands/sdk/hooks/__init__.py +30 -0
  88. openhands/sdk/hooks/config.py +180 -0
  89. openhands/sdk/hooks/conversation_hooks.py +227 -0
  90. openhands/sdk/hooks/executor.py +155 -0
  91. openhands/sdk/hooks/manager.py +170 -0
  92. openhands/sdk/hooks/types.py +40 -0
  93. openhands/sdk/io/__init__.py +6 -0
  94. openhands/sdk/io/base.py +48 -0
  95. openhands/sdk/io/cache.py +85 -0
  96. openhands/sdk/io/local.py +119 -0
  97. openhands/sdk/io/memory.py +54 -0
  98. openhands/sdk/llm/__init__.py +45 -0
  99. openhands/sdk/llm/exceptions/__init__.py +45 -0
  100. openhands/sdk/llm/exceptions/classifier.py +50 -0
  101. openhands/sdk/llm/exceptions/mapping.py +54 -0
  102. openhands/sdk/llm/exceptions/types.py +101 -0
  103. openhands/sdk/llm/llm.py +1140 -0
  104. openhands/sdk/llm/llm_registry.py +122 -0
  105. openhands/sdk/llm/llm_response.py +59 -0
  106. openhands/sdk/llm/message.py +656 -0
  107. openhands/sdk/llm/mixins/fn_call_converter.py +1288 -0
  108. openhands/sdk/llm/mixins/non_native_fc.py +97 -0
  109. openhands/sdk/llm/options/__init__.py +1 -0
  110. openhands/sdk/llm/options/chat_options.py +93 -0
  111. openhands/sdk/llm/options/common.py +19 -0
  112. openhands/sdk/llm/options/responses_options.py +67 -0
  113. openhands/sdk/llm/router/__init__.py +10 -0
  114. openhands/sdk/llm/router/base.py +117 -0
  115. openhands/sdk/llm/router/impl/multimodal.py +76 -0
  116. openhands/sdk/llm/router/impl/random.py +22 -0
  117. openhands/sdk/llm/streaming.py +9 -0
  118. openhands/sdk/llm/utils/metrics.py +312 -0
  119. openhands/sdk/llm/utils/model_features.py +192 -0
  120. openhands/sdk/llm/utils/model_info.py +90 -0
  121. openhands/sdk/llm/utils/model_prompt_spec.py +98 -0
  122. openhands/sdk/llm/utils/retry_mixin.py +128 -0
  123. openhands/sdk/llm/utils/telemetry.py +362 -0
  124. openhands/sdk/llm/utils/unverified_models.py +156 -0
  125. openhands/sdk/llm/utils/verified_models.py +65 -0
  126. openhands/sdk/logger/__init__.py +22 -0
  127. openhands/sdk/logger/logger.py +195 -0
  128. openhands/sdk/logger/rolling.py +113 -0
  129. openhands/sdk/mcp/__init__.py +24 -0
  130. openhands/sdk/mcp/client.py +76 -0
  131. openhands/sdk/mcp/definition.py +106 -0
  132. openhands/sdk/mcp/exceptions.py +19 -0
  133. openhands/sdk/mcp/tool.py +270 -0
  134. openhands/sdk/mcp/utils.py +83 -0
  135. openhands/sdk/observability/__init__.py +4 -0
  136. openhands/sdk/observability/laminar.py +166 -0
  137. openhands/sdk/observability/utils.py +20 -0
  138. openhands/sdk/py.typed +0 -0
  139. openhands/sdk/secret/__init__.py +19 -0
  140. openhands/sdk/secret/secrets.py +92 -0
  141. openhands/sdk/security/__init__.py +6 -0
  142. openhands/sdk/security/analyzer.py +111 -0
  143. openhands/sdk/security/confirmation_policy.py +61 -0
  144. openhands/sdk/security/llm_analyzer.py +29 -0
  145. openhands/sdk/security/risk.py +100 -0
  146. openhands/sdk/tool/__init__.py +34 -0
  147. openhands/sdk/tool/builtins/__init__.py +34 -0
  148. openhands/sdk/tool/builtins/finish.py +106 -0
  149. openhands/sdk/tool/builtins/think.py +117 -0
  150. openhands/sdk/tool/registry.py +184 -0
  151. openhands/sdk/tool/schema.py +286 -0
  152. openhands/sdk/tool/spec.py +39 -0
  153. openhands/sdk/tool/tool.py +481 -0
  154. openhands/sdk/utils/__init__.py +22 -0
  155. openhands/sdk/utils/async_executor.py +115 -0
  156. openhands/sdk/utils/async_utils.py +39 -0
  157. openhands/sdk/utils/cipher.py +68 -0
  158. openhands/sdk/utils/command.py +90 -0
  159. openhands/sdk/utils/deprecation.py +166 -0
  160. openhands/sdk/utils/github.py +44 -0
  161. openhands/sdk/utils/json.py +48 -0
  162. openhands/sdk/utils/models.py +570 -0
  163. openhands/sdk/utils/paging.py +63 -0
  164. openhands/sdk/utils/pydantic_diff.py +85 -0
  165. openhands/sdk/utils/pydantic_secrets.py +64 -0
  166. openhands/sdk/utils/truncate.py +117 -0
  167. openhands/sdk/utils/visualize.py +58 -0
  168. openhands/sdk/workspace/__init__.py +17 -0
  169. openhands/sdk/workspace/base.py +158 -0
  170. openhands/sdk/workspace/local.py +189 -0
  171. openhands/sdk/workspace/models.py +35 -0
  172. openhands/sdk/workspace/remote/__init__.py +8 -0
  173. openhands/sdk/workspace/remote/async_remote_workspace.py +149 -0
  174. openhands/sdk/workspace/remote/base.py +164 -0
  175. openhands/sdk/workspace/remote/remote_workspace_mixin.py +323 -0
  176. openhands/sdk/workspace/workspace.py +49 -0
  177. openhands_sdk-1.7.3.dist-info/METADATA +17 -0
  178. openhands_sdk-1.7.3.dist-info/RECORD +180 -0
  179. openhands_sdk-1.7.3.dist-info/WHEEL +5 -0
  180. openhands_sdk-1.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1140 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ import json
5
+ import os
6
+ import warnings
7
+ from collections.abc import Callable, Sequence
8
+ from contextlib import contextmanager
9
+ from typing import TYPE_CHECKING, Any, ClassVar, Literal, get_args, get_origin
10
+
11
+ import httpx # noqa: F401
12
+ from pydantic import (
13
+ BaseModel,
14
+ ConfigDict,
15
+ Field,
16
+ PrivateAttr,
17
+ SecretStr,
18
+ field_serializer,
19
+ field_validator,
20
+ model_validator,
21
+ )
22
+ from pydantic.json_schema import SkipJsonSchema
23
+
24
+ from openhands.sdk.llm.utils.model_info import get_litellm_model_info
25
+ from openhands.sdk.utils.pydantic_secrets import serialize_secret, validate_secret
26
+
27
+
28
+ if TYPE_CHECKING: # type hints only, avoid runtime import cycle
29
+ from openhands.sdk.tool.tool import ToolDefinition
30
+
31
+ from openhands.sdk.utils.pydantic_diff import pretty_pydantic_diff
32
+
33
+
34
+ with warnings.catch_warnings():
35
+ warnings.simplefilter("ignore")
36
+ import litellm
37
+
38
+ from typing import cast
39
+
40
+ from litellm import (
41
+ ChatCompletionToolParam,
42
+ CustomStreamWrapper,
43
+ ResponseInputParam,
44
+ completion as litellm_completion,
45
+ )
46
+ from litellm.exceptions import (
47
+ APIConnectionError,
48
+ InternalServerError,
49
+ RateLimitError,
50
+ ServiceUnavailableError,
51
+ Timeout as LiteLLMTimeout,
52
+ )
53
+ from litellm.responses.main import responses as litellm_responses
54
+ from litellm.types.llms.openai import ResponsesAPIResponse
55
+ from litellm.types.utils import ModelResponse
56
+ from litellm.utils import (
57
+ create_pretrained_tokenizer,
58
+ supports_vision,
59
+ token_counter,
60
+ )
61
+
62
+ from openhands.sdk.llm.exceptions import (
63
+ LLMNoResponseError,
64
+ map_provider_exception,
65
+ )
66
+
67
+ # OpenHands utilities
68
+ from openhands.sdk.llm.llm_response import LLMResponse
69
+ from openhands.sdk.llm.message import (
70
+ Message,
71
+ )
72
+ from openhands.sdk.llm.mixins.non_native_fc import NonNativeToolCallingMixin
73
+ from openhands.sdk.llm.options.chat_options import select_chat_options
74
+ from openhands.sdk.llm.options.responses_options import select_responses_options
75
+ from openhands.sdk.llm.streaming import (
76
+ TokenCallbackType,
77
+ )
78
+ from openhands.sdk.llm.utils.metrics import Metrics, MetricsSnapshot
79
+ from openhands.sdk.llm.utils.model_features import get_default_temperature, get_features
80
+ from openhands.sdk.llm.utils.retry_mixin import RetryMixin
81
+ from openhands.sdk.llm.utils.telemetry import Telemetry
82
+ from openhands.sdk.logger import ENV_LOG_DIR, get_logger
83
+
84
+
85
+ logger = get_logger(__name__)
86
+
87
+ __all__ = ["LLM"]
88
+
89
+
90
+ # Exceptions we retry on
91
+ LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
92
+ APIConnectionError,
93
+ RateLimitError,
94
+ ServiceUnavailableError,
95
+ LiteLLMTimeout,
96
+ InternalServerError,
97
+ LLMNoResponseError,
98
+ )
99
+
100
+
101
+ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
102
+ """Language model interface for OpenHands agents.
103
+
104
+ The LLM class provides a unified interface for interacting with various
105
+ language models through the litellm library. It handles model configuration,
106
+ API authentication,
107
+ retry logic, and tool calling capabilities.
108
+
109
+ Example:
110
+ >>> from openhands.sdk import LLM
111
+ >>> from pydantic import SecretStr
112
+ >>> llm = LLM(
113
+ ... model="claude-sonnet-4-20250514",
114
+ ... api_key=SecretStr("your-api-key"),
115
+ ... usage_id="my-agent"
116
+ ... )
117
+ >>> # Use with agent or conversation
118
+ """
119
+
120
+ # =========================================================================
121
+ # Config fields
122
+ # =========================================================================
123
+ model: str = Field(default="claude-sonnet-4-20250514", description="Model name.")
124
+ api_key: str | SecretStr | None = Field(default=None, description="API key.")
125
+ base_url: str | None = Field(default=None, description="Custom base URL.")
126
+ api_version: str | None = Field(
127
+ default=None, description="API version (e.g., Azure)."
128
+ )
129
+
130
+ aws_access_key_id: str | SecretStr | None = Field(default=None)
131
+ aws_secret_access_key: str | SecretStr | None = Field(default=None)
132
+ aws_region_name: str | None = Field(default=None)
133
+
134
+ openrouter_site_url: str = Field(default="https://docs.all-hands.dev/")
135
+ openrouter_app_name: str = Field(default="OpenHands")
136
+
137
+ num_retries: int = Field(default=5, ge=0)
138
+ retry_multiplier: float = Field(default=8.0, ge=0)
139
+ retry_min_wait: int = Field(default=8, ge=0)
140
+ retry_max_wait: int = Field(default=64, ge=0)
141
+
142
+ timeout: int | None = Field(default=None, ge=0, description="HTTP timeout (s).")
143
+
144
+ max_message_chars: int = Field(
145
+ default=30_000,
146
+ ge=1,
147
+ description="Approx max chars in each event/content sent to the LLM.",
148
+ )
149
+
150
+ temperature: float | None = Field(
151
+ default=None,
152
+ ge=0,
153
+ description=(
154
+ "Sampling temperature for response generation. "
155
+ "Defaults to 0 for most models and provider default for reasoning models."
156
+ ),
157
+ )
158
+ top_p: float | None = Field(default=1.0, ge=0, le=1)
159
+ top_k: float | None = Field(default=None, ge=0)
160
+
161
+ custom_llm_provider: str | None = Field(default=None)
162
+ max_input_tokens: int | None = Field(
163
+ default=None,
164
+ ge=1,
165
+ description="The maximum number of input tokens. "
166
+ "Note that this is currently unused, and the value at runtime is actually"
167
+ " the total tokens in OpenAI (e.g. 128,000 tokens for GPT-4).",
168
+ )
169
+ max_output_tokens: int | None = Field(
170
+ default=None,
171
+ ge=1,
172
+ description="The maximum number of output tokens. This is sent to the LLM.",
173
+ )
174
+ model_canonical_name: str | None = Field(
175
+ default=None,
176
+ description=(
177
+ "Optional canonical model name for feature registry lookups. "
178
+ "The OpenHands SDK maintains a model feature registry that "
179
+ "maps model names to capabilities (e.g., vision support, "
180
+ "prompt caching, responses API support). When using proxied or "
181
+ "aliased model identifiers, set this field to the canonical "
182
+ "model name (e.g., 'openai/gpt-4o') to ensure correct "
183
+ "capability detection. If not provided, the 'model' field "
184
+ "will be used for capability lookups."
185
+ ),
186
+ )
187
+ extra_headers: dict[str, str] | None = Field(
188
+ default=None,
189
+ description="Optional HTTP headers to forward to LiteLLM requests.",
190
+ )
191
+ input_cost_per_token: float | None = Field(
192
+ default=None,
193
+ ge=0,
194
+ description="The cost per input token. This will available in logs for user.",
195
+ )
196
+ output_cost_per_token: float | None = Field(
197
+ default=None,
198
+ ge=0,
199
+ description="The cost per output token. This will available in logs for user.",
200
+ )
201
+ ollama_base_url: str | None = Field(default=None)
202
+
203
+ stream: bool = Field(
204
+ default=False,
205
+ description=(
206
+ "Enable streaming responses from the LLM. "
207
+ "When enabled, the provided `on_token` callback in .completions "
208
+ "and .responses will be invoked for each chunk of tokens."
209
+ ),
210
+ )
211
+ drop_params: bool = Field(default=True)
212
+ modify_params: bool = Field(
213
+ default=True,
214
+ description="Modify params allows litellm to do transformations like adding"
215
+ " a default message, when a message is empty.",
216
+ )
217
+ disable_vision: bool | None = Field(
218
+ default=None,
219
+ description="If model is vision capable, this option allows to disable image "
220
+ "processing (useful for cost reduction).",
221
+ )
222
+ disable_stop_word: bool | None = Field(
223
+ default=False, description="Disable using of stop word."
224
+ )
225
+ caching_prompt: bool = Field(default=True, description="Enable caching of prompts.")
226
+ log_completions: bool = Field(
227
+ default=False, description="Enable logging of completions."
228
+ )
229
+ log_completions_folder: str = Field(
230
+ default=os.path.join(ENV_LOG_DIR, "completions"),
231
+ description="The folder to log LLM completions to. "
232
+ "Required if log_completions is True.",
233
+ )
234
+ custom_tokenizer: str | None = Field(
235
+ default=None, description="A custom tokenizer to use for token counting."
236
+ )
237
+ native_tool_calling: bool = Field(
238
+ default=True,
239
+ description="Whether to use native tool calling.",
240
+ )
241
+ force_string_serializer: bool | None = Field(
242
+ default=None,
243
+ description=(
244
+ "Force using string content serializer when sending to LLM API. "
245
+ "If None (default), auto-detect based on model. "
246
+ "Useful for providers that do not support list content, "
247
+ "like HuggingFace and Groq."
248
+ ),
249
+ )
250
+ reasoning_effort: Literal["low", "medium", "high", "xhigh", "none"] | None = Field(
251
+ default="high",
252
+ description="The effort to put into reasoning. "
253
+ "This is a string that can be one of 'low', 'medium', 'high', 'xhigh', "
254
+ "or 'none'. "
255
+ "Can apply to all reasoning models.",
256
+ )
257
+ reasoning_summary: Literal["auto", "concise", "detailed"] | None = Field(
258
+ default=None,
259
+ description="The level of detail for reasoning summaries. "
260
+ "This is a string that can be one of 'auto', 'concise', or 'detailed'. "
261
+ "Requires verified OpenAI organization. Only sent when explicitly set.",
262
+ )
263
+ enable_encrypted_reasoning: bool = Field(
264
+ default=True,
265
+ description="If True, ask for ['reasoning.encrypted_content'] "
266
+ "in Responses API include.",
267
+ )
268
+ # Prompt cache retention only applies to GPT-5+ models; filtered in chat options
269
+ prompt_cache_retention: str | None = Field(
270
+ default="24h",
271
+ description=(
272
+ "Retention policy for prompt cache. Only sent for GPT-5+ models; "
273
+ "explicitly stripped for all other models."
274
+ ),
275
+ )
276
+ extended_thinking_budget: int | None = Field(
277
+ default=200_000,
278
+ description="The budget tokens for extended thinking, "
279
+ "supported by Anthropic models.",
280
+ )
281
+ seed: int | None = Field(
282
+ default=None, description="The seed to use for random number generation."
283
+ )
284
+ safety_settings: list[dict[str, str]] | None = Field(
285
+ default=None,
286
+ description=(
287
+ "Safety settings for models that support them (like Mistral AI and Gemini)"
288
+ ),
289
+ )
290
+ usage_id: str = Field(
291
+ default="default",
292
+ serialization_alias="usage_id",
293
+ description=(
294
+ "Unique usage identifier for the LLM. Used for registry lookups, "
295
+ "telemetry, and spend tracking."
296
+ ),
297
+ )
298
+ litellm_extra_body: dict[str, Any] = Field(
299
+ default_factory=dict,
300
+ description=(
301
+ "Additional key-value pairs to pass to litellm's extra_body parameter. "
302
+ "This is useful for custom inference endpoints that need additional "
303
+ "parameters for configuration, routing, or advanced features. "
304
+ "NOTE: Not all LLM providers support extra_body parameters. Some providers "
305
+ "(e.g., OpenAI) may reject requests with unrecognized options. "
306
+ "This is commonly supported by: "
307
+ "- LiteLLM proxy servers (routing metadata, tracing) "
308
+ "- vLLM endpoints (return_token_ids, etc.) "
309
+ "- Custom inference clusters "
310
+ "Examples: "
311
+ "- Proxy routing: {'trace_version': '1.0.0', 'tags': ['agent:my-agent']} "
312
+ "- vLLM features: {'return_token_ids': True}"
313
+ ),
314
+ )
315
+
316
+ # =========================================================================
317
+ # Internal fields (excluded from dumps)
318
+ # =========================================================================
319
+ retry_listener: SkipJsonSchema[
320
+ Callable[[int, int, BaseException | None], None] | None
321
+ ] = Field(
322
+ default=None,
323
+ exclude=True,
324
+ )
325
+ _metrics: Metrics | None = PrivateAttr(default=None)
326
+ # ===== Plain class vars (NOT Fields) =====
327
+ # When serializing, these fields (SecretStr) will be dump to "****"
328
+ # When deserializing, these fields will be ignored and we will override
329
+ # them from the LLM instance provided at runtime.
330
+ OVERRIDE_ON_SERIALIZE: tuple[str, ...] = (
331
+ "api_key",
332
+ "aws_access_key_id",
333
+ "aws_secret_access_key",
334
+ # Dynamic runtime metadata for telemetry/routing that can differ across sessions
335
+ # and should not cause resume-time diffs. Always prefer the runtime value.
336
+ "litellm_extra_body",
337
+ )
338
+
339
+ # Runtime-only private attrs
340
+ _model_info: Any = PrivateAttr(default=None)
341
+ _tokenizer: Any = PrivateAttr(default=None)
342
+ _telemetry: Telemetry | None = PrivateAttr(default=None)
343
+
344
+ model_config: ClassVar[ConfigDict] = ConfigDict(
345
+ extra="forbid", arbitrary_types_allowed=True
346
+ )
347
+
348
+ # =========================================================================
349
+ # Validators
350
+ # =========================================================================
351
+ @field_validator("api_key", "aws_access_key_id", "aws_secret_access_key")
352
+ @classmethod
353
+ def _validate_secrets(cls, v: str | SecretStr | None, info) -> SecretStr | None:
354
+ return validate_secret(v, info)
355
+
356
+ @model_validator(mode="before")
357
+ @classmethod
358
+ def _coerce_inputs(cls, data):
359
+ if not isinstance(data, dict):
360
+ return data
361
+ d = dict(data)
362
+
363
+ model_val = d.get("model")
364
+ if not model_val:
365
+ raise ValueError("model must be specified in LLM")
366
+
367
+ # Azure default version
368
+ if model_val.startswith("azure") and not d.get("api_version"):
369
+ d["api_version"] = "2024-12-01-preview"
370
+
371
+ # Provider rewrite: openhands/* -> litellm_proxy/*
372
+ if model_val.startswith("openhands/"):
373
+ model_name = model_val.removeprefix("openhands/")
374
+ d["model"] = f"litellm_proxy/{model_name}"
375
+ # Set base_url (default to the app proxy when base_url is unset or None)
376
+ # Use `or` instead of dict.get() to handle explicit None values
377
+ d["base_url"] = d.get("base_url") or "https://llm-proxy.app.all-hands.dev/"
378
+
379
+ # HF doesn't support the OpenAI default value for top_p (1)
380
+ if model_val.startswith("huggingface"):
381
+ if d.get("top_p", 1.0) == 1.0:
382
+ d["top_p"] = 0.9
383
+
384
+ return d
385
+
386
+ @model_validator(mode="after")
387
+ def _set_env_side_effects(self):
388
+ if self.openrouter_site_url:
389
+ os.environ["OR_SITE_URL"] = self.openrouter_site_url
390
+ if self.openrouter_app_name:
391
+ os.environ["OR_APP_NAME"] = self.openrouter_app_name
392
+ if self.aws_access_key_id:
393
+ assert isinstance(self.aws_access_key_id, SecretStr)
394
+ os.environ["AWS_ACCESS_KEY_ID"] = self.aws_access_key_id.get_secret_value()
395
+ if self.aws_secret_access_key:
396
+ assert isinstance(self.aws_secret_access_key, SecretStr)
397
+ os.environ["AWS_SECRET_ACCESS_KEY"] = (
398
+ self.aws_secret_access_key.get_secret_value()
399
+ )
400
+ if self.aws_region_name:
401
+ os.environ["AWS_REGION_NAME"] = self.aws_region_name
402
+
403
+ # Metrics + Telemetry wiring
404
+ if self._metrics is None:
405
+ self._metrics = Metrics(model_name=self.model)
406
+
407
+ self._telemetry = Telemetry(
408
+ model_name=self.model,
409
+ log_enabled=self.log_completions,
410
+ log_dir=self.log_completions_folder if self.log_completions else None,
411
+ input_cost_per_token=self.input_cost_per_token,
412
+ output_cost_per_token=self.output_cost_per_token,
413
+ metrics=self._metrics,
414
+ )
415
+
416
+ # Tokenizer
417
+ if self.custom_tokenizer:
418
+ self._tokenizer = create_pretrained_tokenizer(self.custom_tokenizer)
419
+
420
+ # Capabilities + model info
421
+ self._init_model_info_and_caps()
422
+
423
+ if self.temperature is None:
424
+ self.temperature = get_default_temperature(self.model)
425
+
426
+ logger.debug(
427
+ f"LLM ready: model={self.model} base_url={self.base_url} "
428
+ f"reasoning_effort={self.reasoning_effort} "
429
+ f"temperature={self.temperature}"
430
+ )
431
+ return self
432
+
433
+ def _retry_listener_fn(
434
+ self, attempt_number: int, num_retries: int, _err: BaseException | None
435
+ ) -> None:
436
+ if self.retry_listener is not None:
437
+ self.retry_listener(attempt_number, num_retries, _err)
438
+ if self._telemetry is not None and _err is not None:
439
+ self._telemetry.on_error(_err)
440
+
441
+ # =========================================================================
442
+ # Serializers
443
+ # =========================================================================
444
+ @field_serializer(
445
+ "api_key", "aws_access_key_id", "aws_secret_access_key", when_used="always"
446
+ )
447
+ def _serialize_secrets(self, v: SecretStr | None, info):
448
+ return serialize_secret(v, info)
449
+
450
+ # =========================================================================
451
+ # Public API
452
+ # =========================================================================
453
+ @property
454
+ def metrics(self) -> Metrics:
455
+ """Get usage metrics for this LLM instance.
456
+
457
+ Returns:
458
+ Metrics object containing token usage, costs, and other statistics.
459
+
460
+ Example:
461
+ >>> cost = llm.metrics.accumulated_cost
462
+ >>> print(f"Total cost: ${cost}")
463
+ """
464
+ assert self._metrics is not None, (
465
+ "Metrics should be initialized after model validation"
466
+ )
467
+ return self._metrics
468
+
469
+ @property
470
+ def telemetry(self) -> Telemetry:
471
+ """Get telemetry handler for this LLM instance.
472
+
473
+ Returns:
474
+ Telemetry object for managing logging and metrics callbacks.
475
+
476
+ Example:
477
+ >>> llm.telemetry.set_log_completions_callback(my_callback)
478
+ """
479
+ assert self._telemetry is not None, (
480
+ "Telemetry should be initialized after model validation"
481
+ )
482
+ return self._telemetry
483
+
484
+ def restore_metrics(self, metrics: Metrics) -> None:
485
+ # Only used by ConversationStats to seed metrics
486
+ self._metrics = metrics
487
+
488
+ def completion(
489
+ self,
490
+ messages: list[Message],
491
+ tools: Sequence[ToolDefinition] | None = None,
492
+ _return_metrics: bool = False,
493
+ add_security_risk_prediction: bool = False,
494
+ on_token: TokenCallbackType | None = None,
495
+ **kwargs,
496
+ ) -> LLMResponse:
497
+ """Generate a completion from the language model.
498
+
499
+ This is the method for getting responses from the model via Completion API.
500
+ It handles message formatting, tool calling, and response processing.
501
+
502
+ Returns:
503
+ LLMResponse containing the model's response and metadata.
504
+
505
+ Raises:
506
+ ValueError: If streaming is requested (not supported).
507
+
508
+ Example:
509
+ >>> from openhands.sdk.llm import Message, TextContent
510
+ >>> messages = [Message(role="user", content=[TextContent(text="Hello")])]
511
+ >>> response = llm.completion(messages)
512
+ >>> print(response.content)
513
+ """
514
+ enable_streaming = bool(kwargs.get("stream", False)) or self.stream
515
+ if enable_streaming:
516
+ if on_token is None:
517
+ raise ValueError("Streaming requires an on_token callback")
518
+ kwargs["stream"] = True
519
+
520
+ # 1) serialize messages
521
+ formatted_messages = self.format_messages_for_llm(messages)
522
+
523
+ # 2) choose function-calling strategy
524
+ use_native_fc = self.native_tool_calling
525
+ original_fncall_msgs = copy.deepcopy(formatted_messages)
526
+
527
+ # Convert Tool objects to ChatCompletionToolParam once here
528
+ cc_tools: list[ChatCompletionToolParam] = []
529
+ if tools:
530
+ cc_tools = [
531
+ t.to_openai_tool(
532
+ add_security_risk_prediction=add_security_risk_prediction
533
+ )
534
+ for t in tools
535
+ ]
536
+
537
+ use_mock_tools = self.should_mock_tool_calls(cc_tools)
538
+ if use_mock_tools:
539
+ logger.debug(
540
+ "LLM.completion: mocking function-calling via prompt "
541
+ f"for model {self.model}"
542
+ )
543
+ formatted_messages, kwargs = self.pre_request_prompt_mock(
544
+ formatted_messages, cc_tools or [], kwargs
545
+ )
546
+
547
+ # 3) normalize provider params
548
+ # Only pass tools when native FC is active
549
+ kwargs["tools"] = cc_tools if (bool(cc_tools) and use_native_fc) else None
550
+ has_tools_flag = bool(cc_tools) and use_native_fc
551
+ # Behavior-preserving: delegate to select_chat_options
552
+ call_kwargs = select_chat_options(self, kwargs, has_tools=has_tools_flag)
553
+
554
+ # 4) optional request logging context (kept small)
555
+ assert self._telemetry is not None
556
+ log_ctx = None
557
+ if self._telemetry.log_enabled:
558
+ log_ctx = {
559
+ "messages": formatted_messages[:], # already simple dicts
560
+ "tools": tools,
561
+ "kwargs": {k: v for k, v in call_kwargs.items()},
562
+ "context_window": self.max_input_tokens or 0,
563
+ }
564
+ if tools and not use_native_fc:
565
+ log_ctx["raw_messages"] = original_fncall_msgs
566
+
567
+ # 5) do the call with retries
568
+ @self.retry_decorator(
569
+ num_retries=self.num_retries,
570
+ retry_exceptions=LLM_RETRY_EXCEPTIONS,
571
+ retry_min_wait=self.retry_min_wait,
572
+ retry_max_wait=self.retry_max_wait,
573
+ retry_multiplier=self.retry_multiplier,
574
+ retry_listener=self._retry_listener_fn,
575
+ )
576
+ def _one_attempt(**retry_kwargs) -> ModelResponse:
577
+ assert self._telemetry is not None
578
+ self._telemetry.on_request(log_ctx=log_ctx)
579
+ # Merge retry-modified kwargs (like temperature) with call_kwargs
580
+ final_kwargs = {**call_kwargs, **retry_kwargs}
581
+ resp = self._transport_call(
582
+ messages=formatted_messages,
583
+ **final_kwargs,
584
+ enable_streaming=enable_streaming,
585
+ on_token=on_token,
586
+ )
587
+ raw_resp: ModelResponse | None = None
588
+ if use_mock_tools:
589
+ raw_resp = copy.deepcopy(resp)
590
+ resp = self.post_response_prompt_mock(
591
+ resp, nonfncall_msgs=formatted_messages, tools=cc_tools
592
+ )
593
+ # 6) telemetry
594
+ self._telemetry.on_response(resp, raw_resp=raw_resp)
595
+
596
+ # Ensure at least one choice.
597
+ # Gemini sometimes returns empty choices; we raise LLMNoResponseError here
598
+ # inside the retry boundary so it is retried.
599
+ if not resp.get("choices") or len(resp["choices"]) < 1:
600
+ raise LLMNoResponseError(
601
+ "Response choices is less than 1. Response: " + str(resp)
602
+ )
603
+
604
+ return resp
605
+
606
+ try:
607
+ resp = _one_attempt()
608
+
609
+ # Convert the first choice to an OpenHands Message
610
+ first_choice = resp["choices"][0]
611
+ message = Message.from_llm_chat_message(first_choice["message"])
612
+
613
+ # Get current metrics snapshot
614
+ metrics_snapshot = MetricsSnapshot(
615
+ model_name=self.metrics.model_name,
616
+ accumulated_cost=self.metrics.accumulated_cost,
617
+ max_budget_per_task=self.metrics.max_budget_per_task,
618
+ accumulated_token_usage=self.metrics.accumulated_token_usage,
619
+ )
620
+
621
+ # Create and return LLMResponse
622
+ return LLMResponse(
623
+ message=message, metrics=metrics_snapshot, raw_response=resp
624
+ )
625
+ except Exception as e:
626
+ self._telemetry.on_error(e)
627
+ mapped = map_provider_exception(e)
628
+ if mapped is not e:
629
+ raise mapped from e
630
+ raise
631
+
632
+ # =========================================================================
633
+ # Responses API (non-stream, v1)
634
+ # =========================================================================
635
+ def responses(
636
+ self,
637
+ messages: list[Message],
638
+ tools: Sequence[ToolDefinition] | None = None,
639
+ include: list[str] | None = None,
640
+ store: bool | None = None,
641
+ _return_metrics: bool = False,
642
+ add_security_risk_prediction: bool = False,
643
+ on_token: TokenCallbackType | None = None,
644
+ **kwargs,
645
+ ) -> LLMResponse:
646
+ """Alternative invocation path using OpenAI Responses API via LiteLLM.
647
+
648
+ Maps Message[] -> (instructions, input[]) and returns LLMResponse.
649
+ """
650
+ # Streaming not yet supported
651
+ if kwargs.get("stream", False) or self.stream or on_token is not None:
652
+ raise ValueError("Streaming is not supported for Responses API yet")
653
+
654
+ # Build instructions + input list using dedicated Responses formatter
655
+ instructions, input_items = self.format_messages_for_responses(messages)
656
+
657
+ # Convert Tool objects to Responses ToolParam
658
+ # (Responses path always supports function tools)
659
+ resp_tools = (
660
+ [
661
+ t.to_responses_tool(
662
+ add_security_risk_prediction=add_security_risk_prediction
663
+ )
664
+ for t in tools
665
+ ]
666
+ if tools
667
+ else None
668
+ )
669
+
670
+ # Normalize/override Responses kwargs consistently
671
+ call_kwargs = select_responses_options(
672
+ self, kwargs, include=include, store=store
673
+ )
674
+
675
+ # Optional request logging
676
+ assert self._telemetry is not None
677
+ log_ctx = None
678
+ if self._telemetry.log_enabled:
679
+ log_ctx = {
680
+ "llm_path": "responses",
681
+ "input": input_items[:],
682
+ "tools": tools,
683
+ "kwargs": {k: v for k, v in call_kwargs.items()},
684
+ "context_window": self.max_input_tokens or 0,
685
+ }
686
+
687
+ # Perform call with retries
688
+ @self.retry_decorator(
689
+ num_retries=self.num_retries,
690
+ retry_exceptions=LLM_RETRY_EXCEPTIONS,
691
+ retry_min_wait=self.retry_min_wait,
692
+ retry_max_wait=self.retry_max_wait,
693
+ retry_multiplier=self.retry_multiplier,
694
+ retry_listener=self._retry_listener_fn,
695
+ )
696
+ def _one_attempt(**retry_kwargs) -> ResponsesAPIResponse:
697
+ assert self._telemetry is not None
698
+ self._telemetry.on_request(log_ctx=log_ctx)
699
+ final_kwargs = {**call_kwargs, **retry_kwargs}
700
+ with self._litellm_modify_params_ctx(self.modify_params):
701
+ with warnings.catch_warnings():
702
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
703
+ typed_input: ResponseInputParam | str = (
704
+ cast(ResponseInputParam, input_items) if input_items else ""
705
+ )
706
+ # Extract api_key value with type assertion for type checker
707
+ api_key_value: str | None = None
708
+ if self.api_key:
709
+ assert isinstance(self.api_key, SecretStr)
710
+ api_key_value = self.api_key.get_secret_value()
711
+
712
+ ret = litellm_responses(
713
+ model=self.model,
714
+ input=typed_input,
715
+ instructions=instructions,
716
+ tools=resp_tools,
717
+ api_key=api_key_value,
718
+ api_base=self.base_url,
719
+ api_version=self.api_version,
720
+ timeout=self.timeout,
721
+ drop_params=self.drop_params,
722
+ seed=self.seed,
723
+ **final_kwargs,
724
+ )
725
+ assert isinstance(ret, ResponsesAPIResponse), (
726
+ f"Expected ResponsesAPIResponse, got {type(ret)}"
727
+ )
728
+ # telemetry (latency, cost). Token usage mapping we handle after.
729
+ self._telemetry.on_response(ret)
730
+ return ret
731
+
732
+ try:
733
+ resp: ResponsesAPIResponse = _one_attempt()
734
+
735
+ # Parse output -> Message (typed)
736
+ # Cast to a typed sequence
737
+ # accepted by from_llm_responses_output
738
+ output_seq = cast(Sequence[Any], resp.output or [])
739
+ message = Message.from_llm_responses_output(output_seq)
740
+
741
+ metrics_snapshot = MetricsSnapshot(
742
+ model_name=self.metrics.model_name,
743
+ accumulated_cost=self.metrics.accumulated_cost,
744
+ max_budget_per_task=self.metrics.max_budget_per_task,
745
+ accumulated_token_usage=self.metrics.accumulated_token_usage,
746
+ )
747
+
748
+ return LLMResponse(
749
+ message=message, metrics=metrics_snapshot, raw_response=resp
750
+ )
751
+ except Exception as e:
752
+ self._telemetry.on_error(e)
753
+ mapped = map_provider_exception(e)
754
+ if mapped is not e:
755
+ raise mapped from e
756
+ raise
757
+
758
+ # =========================================================================
759
+ # Transport + helpers
760
+ # =========================================================================
761
+ def _transport_call(
762
+ self,
763
+ *,
764
+ messages: list[dict[str, Any]],
765
+ enable_streaming: bool = False,
766
+ on_token: TokenCallbackType | None = None,
767
+ **kwargs,
768
+ ) -> ModelResponse:
769
+ # litellm.modify_params is GLOBAL; guard it for thread-safety
770
+ with self._litellm_modify_params_ctx(self.modify_params):
771
+ with warnings.catch_warnings():
772
+ warnings.filterwarnings(
773
+ "ignore", category=DeprecationWarning, module="httpx.*"
774
+ )
775
+ warnings.filterwarnings(
776
+ "ignore",
777
+ message=r".*content=.*upload.*",
778
+ category=DeprecationWarning,
779
+ )
780
+ warnings.filterwarnings(
781
+ "ignore",
782
+ message=r"There is no current event loop",
783
+ category=DeprecationWarning,
784
+ )
785
+ warnings.filterwarnings(
786
+ "ignore",
787
+ category=UserWarning,
788
+ )
789
+ warnings.filterwarnings(
790
+ "ignore",
791
+ category=DeprecationWarning,
792
+ message="Accessing the 'model_fields' attribute.*",
793
+ )
794
+ # Extract api_key value with type assertion for type checker
795
+ api_key_value: str | None = None
796
+ if self.api_key:
797
+ assert isinstance(self.api_key, SecretStr)
798
+ api_key_value = self.api_key.get_secret_value()
799
+
800
+ # Some providers need renames handled in _normalize_call_kwargs.
801
+ ret = litellm_completion(
802
+ model=self.model,
803
+ api_key=api_key_value,
804
+ api_base=self.base_url,
805
+ api_version=self.api_version,
806
+ timeout=self.timeout,
807
+ drop_params=self.drop_params,
808
+ seed=self.seed,
809
+ messages=messages,
810
+ **kwargs,
811
+ )
812
+ if enable_streaming and on_token is not None:
813
+ assert isinstance(ret, CustomStreamWrapper)
814
+ chunks = []
815
+ for chunk in ret:
816
+ on_token(chunk)
817
+ chunks.append(chunk)
818
+ ret = litellm.stream_chunk_builder(chunks, messages=messages)
819
+
820
+ assert isinstance(ret, ModelResponse), (
821
+ f"Expected ModelResponse, got {type(ret)}"
822
+ )
823
+ return ret
824
+
825
+ @contextmanager
826
+ def _litellm_modify_params_ctx(self, flag: bool):
827
+ old = getattr(litellm, "modify_params", None)
828
+ try:
829
+ litellm.modify_params = flag
830
+ yield
831
+ finally:
832
+ litellm.modify_params = old
833
+
834
+ # =========================================================================
835
+ # Capabilities, formatting, and info
836
+ # =========================================================================
837
+ def _model_name_for_capabilities(self) -> str:
838
+ """Return canonical name for capability lookups (e.g., vision support)."""
839
+ return self.model_canonical_name or self.model
840
+
841
+ def _init_model_info_and_caps(self) -> None:
842
+ self._model_info = get_litellm_model_info(
843
+ secret_api_key=self.api_key,
844
+ base_url=self.base_url,
845
+ model=self._model_name_for_capabilities(),
846
+ )
847
+
848
+ # Context window and max_output_tokens
849
+ if (
850
+ self.max_input_tokens is None
851
+ and self._model_info is not None
852
+ and isinstance(self._model_info.get("max_input_tokens"), int)
853
+ ):
854
+ self.max_input_tokens = self._model_info.get("max_input_tokens")
855
+
856
+ if self.max_output_tokens is None:
857
+ if any(
858
+ m in self.model
859
+ for m in [
860
+ "claude-3-7-sonnet",
861
+ "claude-sonnet-4",
862
+ "kimi-k2-thinking",
863
+ ]
864
+ ):
865
+ self.max_output_tokens = (
866
+ 64000 # practical cap (litellm may allow 128k with header)
867
+ )
868
+ logger.debug(
869
+ f"Setting max_output_tokens to {self.max_output_tokens} "
870
+ f"for {self.model}"
871
+ )
872
+ elif self._model_info is not None:
873
+ if isinstance(self._model_info.get("max_output_tokens"), int):
874
+ self.max_output_tokens = self._model_info.get("max_output_tokens")
875
+ elif isinstance(self._model_info.get("max_tokens"), int):
876
+ self.max_output_tokens = self._model_info.get("max_tokens")
877
+
878
+ if "o3" in self.model:
879
+ o3_limit = 100000
880
+ if self.max_output_tokens is None or self.max_output_tokens > o3_limit:
881
+ self.max_output_tokens = o3_limit
882
+ logger.debug(
883
+ "Clamping max_output_tokens to %s for %s",
884
+ self.max_output_tokens,
885
+ self.model,
886
+ )
887
+
888
+ def vision_is_active(self) -> bool:
889
+ with warnings.catch_warnings():
890
+ warnings.simplefilter("ignore")
891
+ return not self.disable_vision and self._supports_vision()
892
+
893
+ def _supports_vision(self) -> bool:
894
+ """Acquire from litellm if model is vision capable.
895
+
896
+ Returns:
897
+ bool: True if model is vision capable. Return False if model not
898
+ supported by litellm.
899
+ """
900
+ # litellm.supports_vision currently returns False for 'openai/gpt-...' or 'anthropic/claude-...' (with prefixes) # noqa: E501
901
+ # but model_info will have the correct value for some reason.
902
+ # we can go with it, but we will need to keep an eye if model_info is correct for Vertex or other providers # noqa: E501
903
+ # remove when litellm is updated to fix https://github.com/BerriAI/litellm/issues/5608 # noqa: E501
904
+ # Check both the full model name and the name after proxy prefix for vision support # noqa: E501
905
+ model_for_caps = self._model_name_for_capabilities()
906
+ return (
907
+ supports_vision(model_for_caps)
908
+ or supports_vision(model_for_caps.split("/")[-1])
909
+ or (
910
+ self._model_info is not None
911
+ and self._model_info.get("supports_vision", False)
912
+ )
913
+ or False # fallback to False if model_info is None
914
+ )
915
+
916
+ def is_caching_prompt_active(self) -> bool:
917
+ """Check if prompt caching is supported and enabled for current model.
918
+
919
+ Returns:
920
+ boolean: True if prompt caching is supported and enabled for the given
921
+ model.
922
+ """
923
+ if not self.caching_prompt:
924
+ return False
925
+ # We don't need to look-up model_info, because
926
+ # only Anthropic models need explicit caching breakpoints
927
+ return (
928
+ self.caching_prompt
929
+ and get_features(self._model_name_for_capabilities()).supports_prompt_cache
930
+ )
931
+
932
+ def uses_responses_api(self) -> bool:
933
+ """Whether this model uses the OpenAI Responses API path."""
934
+
935
+ # by default, uses = supports
936
+ return get_features(self._model_name_for_capabilities()).supports_responses_api
937
+
938
+ @property
939
+ def model_info(self) -> dict | None:
940
+ """Returns the model info dictionary."""
941
+ return self._model_info
942
+
943
+ # =========================================================================
944
+ # Utilities preserved from previous class
945
+ # =========================================================================
946
+ def _apply_prompt_caching(self, messages: list[Message]) -> None:
947
+ """Applies caching breakpoints to the messages.
948
+
949
+ For new Anthropic API, we only need to mark the last user or
950
+ tool message as cacheable.
951
+ """
952
+ if len(messages) > 0 and messages[0].role == "system":
953
+ messages[0].content[-1].cache_prompt = True
954
+ # NOTE: this is only needed for anthropic
955
+ for message in reversed(messages):
956
+ if message.role in ("user", "tool"):
957
+ message.content[
958
+ -1
959
+ ].cache_prompt = True # Last item inside the message content
960
+ break
961
+
962
+ def format_messages_for_llm(self, messages: list[Message]) -> list[dict]:
963
+ """Formats Message objects for LLM consumption."""
964
+
965
+ messages = copy.deepcopy(messages)
966
+ if self.is_caching_prompt_active():
967
+ self._apply_prompt_caching(messages)
968
+
969
+ for message in messages:
970
+ message.cache_enabled = self.is_caching_prompt_active()
971
+ message.vision_enabled = self.vision_is_active()
972
+ message.function_calling_enabled = self.native_tool_calling
973
+ model_features = get_features(self._model_name_for_capabilities())
974
+ message.force_string_serializer = (
975
+ self.force_string_serializer
976
+ if self.force_string_serializer is not None
977
+ else model_features.force_string_serializer
978
+ )
979
+ message.send_reasoning_content = model_features.send_reasoning_content
980
+
981
+ formatted_messages = [message.to_chat_dict() for message in messages]
982
+
983
+ return formatted_messages
984
+
985
+ def format_messages_for_responses(
986
+ self, messages: list[Message]
987
+ ) -> tuple[str | None, list[dict[str, Any]]]:
988
+ """Prepare (instructions, input[]) for the OpenAI Responses API.
989
+
990
+ - Skips prompt caching flags and string serializer concerns
991
+ - Uses Message.to_responses_value to get either instructions (system)
992
+ or input items (others)
993
+ - Concatenates system instructions into a single instructions string
994
+ """
995
+ msgs = copy.deepcopy(messages)
996
+
997
+ # Determine vision based on model detection
998
+ vision_active = self.vision_is_active()
999
+
1000
+ # Assign system instructions as a string, collect input items
1001
+ instructions: str | None = None
1002
+ input_items: list[dict[str, Any]] = []
1003
+ for m in msgs:
1004
+ val = m.to_responses_value(vision_enabled=vision_active)
1005
+ if isinstance(val, str):
1006
+ s = val.strip()
1007
+ if not s:
1008
+ continue
1009
+ instructions = (
1010
+ s if instructions is None else f"{instructions}\n\n---\n\n{s}"
1011
+ )
1012
+ else:
1013
+ if val:
1014
+ input_items.extend(val)
1015
+ return instructions, input_items
1016
+
1017
+ def get_token_count(self, messages: list[Message]) -> int:
1018
+ logger.debug(
1019
+ "Message objects now include serialized tool calls in token counting"
1020
+ )
1021
+ formatted_messages = self.format_messages_for_llm(messages)
1022
+ try:
1023
+ return int(
1024
+ token_counter(
1025
+ model=self.model,
1026
+ messages=formatted_messages,
1027
+ custom_tokenizer=self._tokenizer,
1028
+ )
1029
+ )
1030
+ except Exception as e:
1031
+ logger.error(
1032
+ f"Error getting token count for model {self.model}\n{e}"
1033
+ + (
1034
+ f"\ncustom_tokenizer: {self.custom_tokenizer}"
1035
+ if self.custom_tokenizer
1036
+ else ""
1037
+ ),
1038
+ exc_info=True,
1039
+ )
1040
+ return 0
1041
+
1042
+ # =========================================================================
1043
+ # Serialization helpers
1044
+ # =========================================================================
1045
+ @classmethod
1046
+ def load_from_json(cls, json_path: str) -> LLM:
1047
+ with open(json_path) as f:
1048
+ data = json.load(f)
1049
+ return cls(**data)
1050
+
1051
+ @classmethod
1052
+ def load_from_env(cls, prefix: str = "LLM_") -> LLM:
1053
+ TRUTHY = {"true", "1", "yes", "on"}
1054
+
1055
+ def _unwrap_type(t: Any) -> Any:
1056
+ origin = get_origin(t)
1057
+ if origin is None:
1058
+ return t
1059
+ args = [a for a in get_args(t) if a is not type(None)]
1060
+ return args[0] if args else t
1061
+
1062
+ def _cast_value(raw: str, t: Any) -> Any:
1063
+ t = _unwrap_type(t)
1064
+ if t is SecretStr:
1065
+ return SecretStr(raw)
1066
+ if t is bool:
1067
+ return raw.lower() in TRUTHY
1068
+ if t is int:
1069
+ try:
1070
+ return int(raw)
1071
+ except ValueError:
1072
+ return None
1073
+ if t is float:
1074
+ try:
1075
+ return float(raw)
1076
+ except ValueError:
1077
+ return None
1078
+ origin = get_origin(t)
1079
+ if (origin in (list, dict, tuple)) or (
1080
+ isinstance(t, type) and issubclass(t, BaseModel)
1081
+ ):
1082
+ try:
1083
+ return json.loads(raw)
1084
+ except Exception:
1085
+ pass
1086
+ return raw
1087
+
1088
+ data: dict[str, Any] = {}
1089
+ fields: dict[str, Any] = {
1090
+ name: f.annotation
1091
+ for name, f in cls.model_fields.items()
1092
+ if not getattr(f, "exclude", False)
1093
+ }
1094
+
1095
+ for key, value in os.environ.items():
1096
+ if not key.startswith(prefix):
1097
+ continue
1098
+ field_name = key[len(prefix) :].lower()
1099
+ if field_name not in fields:
1100
+ continue
1101
+ v = _cast_value(value, fields[field_name])
1102
+ if v is not None:
1103
+ data[field_name] = v
1104
+ return cls(**data)
1105
+
1106
+ def resolve_diff_from_deserialized(self, persisted: LLM) -> LLM:
1107
+ """Resolve differences between a deserialized LLM and the current instance.
1108
+
1109
+ This is due to fields like api_key being serialized to "****" in dumps,
1110
+ and we want to ensure that when loading from a file, we still use the
1111
+ runtime-provided api_key in the self instance.
1112
+
1113
+ Return a new LLM instance equivalent to `persisted` but with
1114
+ explicitly whitelisted fields (e.g. api_key) taken from `self`.
1115
+ """
1116
+ if persisted.__class__ is not self.__class__:
1117
+ raise ValueError(
1118
+ f"Cannot resolve_diff_from_deserialized between {self.__class__} "
1119
+ f"and {persisted.__class__}"
1120
+ )
1121
+
1122
+ # Copy allowed fields from runtime llm into the persisted llm
1123
+ llm_updates = {}
1124
+ persisted_dump = persisted.model_dump(context={"expose_secrets": True})
1125
+ for field in self.OVERRIDE_ON_SERIALIZE:
1126
+ if field in persisted_dump.keys():
1127
+ llm_updates[field] = getattr(self, field)
1128
+ if llm_updates:
1129
+ reconciled = persisted.model_copy(update=llm_updates)
1130
+ else:
1131
+ reconciled = persisted
1132
+
1133
+ dump = self.model_dump(context={"expose_secrets": True})
1134
+ reconciled_dump = reconciled.model_dump(context={"expose_secrets": True})
1135
+ if dump != reconciled_dump:
1136
+ raise ValueError(
1137
+ "The LLM provided is different from the one in persisted state.\n"
1138
+ f"Diff: {pretty_pydantic_diff(self, reconciled)}"
1139
+ )
1140
+ return reconciled