openhands-sdk 1.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openhands/sdk/__init__.py +111 -0
- openhands/sdk/agent/__init__.py +8 -0
- openhands/sdk/agent/agent.py +650 -0
- openhands/sdk/agent/base.py +457 -0
- openhands/sdk/agent/prompts/in_context_learning_example.j2 +169 -0
- openhands/sdk/agent/prompts/in_context_learning_example_suffix.j2 +3 -0
- openhands/sdk/agent/prompts/model_specific/anthropic_claude.j2 +3 -0
- openhands/sdk/agent/prompts/model_specific/google_gemini.j2 +1 -0
- openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5-codex.j2 +2 -0
- openhands/sdk/agent/prompts/model_specific/openai_gpt/gpt-5.j2 +3 -0
- openhands/sdk/agent/prompts/security_policy.j2 +22 -0
- openhands/sdk/agent/prompts/security_risk_assessment.j2 +21 -0
- openhands/sdk/agent/prompts/self_documentation.j2 +15 -0
- openhands/sdk/agent/prompts/system_prompt.j2 +132 -0
- openhands/sdk/agent/prompts/system_prompt_interactive.j2 +14 -0
- openhands/sdk/agent/prompts/system_prompt_long_horizon.j2 +40 -0
- openhands/sdk/agent/prompts/system_prompt_planning.j2 +40 -0
- openhands/sdk/agent/prompts/system_prompt_tech_philosophy.j2 +122 -0
- openhands/sdk/agent/utils.py +228 -0
- openhands/sdk/context/__init__.py +28 -0
- openhands/sdk/context/agent_context.py +264 -0
- openhands/sdk/context/condenser/__init__.py +18 -0
- openhands/sdk/context/condenser/base.py +100 -0
- openhands/sdk/context/condenser/llm_summarizing_condenser.py +248 -0
- openhands/sdk/context/condenser/no_op_condenser.py +14 -0
- openhands/sdk/context/condenser/pipeline_condenser.py +56 -0
- openhands/sdk/context/condenser/prompts/summarizing_prompt.j2 +59 -0
- openhands/sdk/context/condenser/utils.py +149 -0
- openhands/sdk/context/prompts/__init__.py +6 -0
- openhands/sdk/context/prompts/prompt.py +114 -0
- openhands/sdk/context/prompts/templates/ask_agent_template.j2 +11 -0
- openhands/sdk/context/prompts/templates/skill_knowledge_info.j2 +8 -0
- openhands/sdk/context/prompts/templates/system_message_suffix.j2 +32 -0
- openhands/sdk/context/skills/__init__.py +28 -0
- openhands/sdk/context/skills/exceptions.py +11 -0
- openhands/sdk/context/skills/skill.py +720 -0
- openhands/sdk/context/skills/trigger.py +36 -0
- openhands/sdk/context/skills/types.py +48 -0
- openhands/sdk/context/view.py +503 -0
- openhands/sdk/conversation/__init__.py +40 -0
- openhands/sdk/conversation/base.py +281 -0
- openhands/sdk/conversation/conversation.py +152 -0
- openhands/sdk/conversation/conversation_stats.py +85 -0
- openhands/sdk/conversation/event_store.py +157 -0
- openhands/sdk/conversation/events_list_base.py +17 -0
- openhands/sdk/conversation/exceptions.py +50 -0
- openhands/sdk/conversation/fifo_lock.py +133 -0
- openhands/sdk/conversation/impl/__init__.py +5 -0
- openhands/sdk/conversation/impl/local_conversation.py +665 -0
- openhands/sdk/conversation/impl/remote_conversation.py +956 -0
- openhands/sdk/conversation/persistence_const.py +9 -0
- openhands/sdk/conversation/response_utils.py +41 -0
- openhands/sdk/conversation/secret_registry.py +126 -0
- openhands/sdk/conversation/serialization_diff.py +0 -0
- openhands/sdk/conversation/state.py +392 -0
- openhands/sdk/conversation/stuck_detector.py +311 -0
- openhands/sdk/conversation/title_utils.py +191 -0
- openhands/sdk/conversation/types.py +45 -0
- openhands/sdk/conversation/visualizer/__init__.py +12 -0
- openhands/sdk/conversation/visualizer/base.py +67 -0
- openhands/sdk/conversation/visualizer/default.py +373 -0
- openhands/sdk/critic/__init__.py +15 -0
- openhands/sdk/critic/base.py +38 -0
- openhands/sdk/critic/impl/__init__.py +12 -0
- openhands/sdk/critic/impl/agent_finished.py +83 -0
- openhands/sdk/critic/impl/empty_patch.py +49 -0
- openhands/sdk/critic/impl/pass_critic.py +42 -0
- openhands/sdk/event/__init__.py +42 -0
- openhands/sdk/event/base.py +149 -0
- openhands/sdk/event/condenser.py +82 -0
- openhands/sdk/event/conversation_error.py +25 -0
- openhands/sdk/event/conversation_state.py +104 -0
- openhands/sdk/event/llm_completion_log.py +39 -0
- openhands/sdk/event/llm_convertible/__init__.py +20 -0
- openhands/sdk/event/llm_convertible/action.py +139 -0
- openhands/sdk/event/llm_convertible/message.py +142 -0
- openhands/sdk/event/llm_convertible/observation.py +141 -0
- openhands/sdk/event/llm_convertible/system.py +61 -0
- openhands/sdk/event/token.py +16 -0
- openhands/sdk/event/types.py +11 -0
- openhands/sdk/event/user_action.py +21 -0
- openhands/sdk/git/exceptions.py +43 -0
- openhands/sdk/git/git_changes.py +249 -0
- openhands/sdk/git/git_diff.py +129 -0
- openhands/sdk/git/models.py +21 -0
- openhands/sdk/git/utils.py +189 -0
- openhands/sdk/hooks/__init__.py +30 -0
- openhands/sdk/hooks/config.py +180 -0
- openhands/sdk/hooks/conversation_hooks.py +227 -0
- openhands/sdk/hooks/executor.py +155 -0
- openhands/sdk/hooks/manager.py +170 -0
- openhands/sdk/hooks/types.py +40 -0
- openhands/sdk/io/__init__.py +6 -0
- openhands/sdk/io/base.py +48 -0
- openhands/sdk/io/cache.py +85 -0
- openhands/sdk/io/local.py +119 -0
- openhands/sdk/io/memory.py +54 -0
- openhands/sdk/llm/__init__.py +45 -0
- openhands/sdk/llm/exceptions/__init__.py +45 -0
- openhands/sdk/llm/exceptions/classifier.py +50 -0
- openhands/sdk/llm/exceptions/mapping.py +54 -0
- openhands/sdk/llm/exceptions/types.py +101 -0
- openhands/sdk/llm/llm.py +1140 -0
- openhands/sdk/llm/llm_registry.py +122 -0
- openhands/sdk/llm/llm_response.py +59 -0
- openhands/sdk/llm/message.py +656 -0
- openhands/sdk/llm/mixins/fn_call_converter.py +1288 -0
- openhands/sdk/llm/mixins/non_native_fc.py +97 -0
- openhands/sdk/llm/options/__init__.py +1 -0
- openhands/sdk/llm/options/chat_options.py +93 -0
- openhands/sdk/llm/options/common.py +19 -0
- openhands/sdk/llm/options/responses_options.py +67 -0
- openhands/sdk/llm/router/__init__.py +10 -0
- openhands/sdk/llm/router/base.py +117 -0
- openhands/sdk/llm/router/impl/multimodal.py +76 -0
- openhands/sdk/llm/router/impl/random.py +22 -0
- openhands/sdk/llm/streaming.py +9 -0
- openhands/sdk/llm/utils/metrics.py +312 -0
- openhands/sdk/llm/utils/model_features.py +192 -0
- openhands/sdk/llm/utils/model_info.py +90 -0
- openhands/sdk/llm/utils/model_prompt_spec.py +98 -0
- openhands/sdk/llm/utils/retry_mixin.py +128 -0
- openhands/sdk/llm/utils/telemetry.py +362 -0
- openhands/sdk/llm/utils/unverified_models.py +156 -0
- openhands/sdk/llm/utils/verified_models.py +65 -0
- openhands/sdk/logger/__init__.py +22 -0
- openhands/sdk/logger/logger.py +195 -0
- openhands/sdk/logger/rolling.py +113 -0
- openhands/sdk/mcp/__init__.py +24 -0
- openhands/sdk/mcp/client.py +76 -0
- openhands/sdk/mcp/definition.py +106 -0
- openhands/sdk/mcp/exceptions.py +19 -0
- openhands/sdk/mcp/tool.py +270 -0
- openhands/sdk/mcp/utils.py +83 -0
- openhands/sdk/observability/__init__.py +4 -0
- openhands/sdk/observability/laminar.py +166 -0
- openhands/sdk/observability/utils.py +20 -0
- openhands/sdk/py.typed +0 -0
- openhands/sdk/secret/__init__.py +19 -0
- openhands/sdk/secret/secrets.py +92 -0
- openhands/sdk/security/__init__.py +6 -0
- openhands/sdk/security/analyzer.py +111 -0
- openhands/sdk/security/confirmation_policy.py +61 -0
- openhands/sdk/security/llm_analyzer.py +29 -0
- openhands/sdk/security/risk.py +100 -0
- openhands/sdk/tool/__init__.py +34 -0
- openhands/sdk/tool/builtins/__init__.py +34 -0
- openhands/sdk/tool/builtins/finish.py +106 -0
- openhands/sdk/tool/builtins/think.py +117 -0
- openhands/sdk/tool/registry.py +184 -0
- openhands/sdk/tool/schema.py +286 -0
- openhands/sdk/tool/spec.py +39 -0
- openhands/sdk/tool/tool.py +481 -0
- openhands/sdk/utils/__init__.py +22 -0
- openhands/sdk/utils/async_executor.py +115 -0
- openhands/sdk/utils/async_utils.py +39 -0
- openhands/sdk/utils/cipher.py +68 -0
- openhands/sdk/utils/command.py +90 -0
- openhands/sdk/utils/deprecation.py +166 -0
- openhands/sdk/utils/github.py +44 -0
- openhands/sdk/utils/json.py +48 -0
- openhands/sdk/utils/models.py +570 -0
- openhands/sdk/utils/paging.py +63 -0
- openhands/sdk/utils/pydantic_diff.py +85 -0
- openhands/sdk/utils/pydantic_secrets.py +64 -0
- openhands/sdk/utils/truncate.py +117 -0
- openhands/sdk/utils/visualize.py +58 -0
- openhands/sdk/workspace/__init__.py +17 -0
- openhands/sdk/workspace/base.py +158 -0
- openhands/sdk/workspace/local.py +189 -0
- openhands/sdk/workspace/models.py +35 -0
- openhands/sdk/workspace/remote/__init__.py +8 -0
- openhands/sdk/workspace/remote/async_remote_workspace.py +149 -0
- openhands/sdk/workspace/remote/base.py +164 -0
- openhands/sdk/workspace/remote/remote_workspace_mixin.py +323 -0
- openhands/sdk/workspace/workspace.py +49 -0
- openhands_sdk-1.7.3.dist-info/METADATA +17 -0
- openhands_sdk-1.7.3.dist-info/RECORD +180 -0
- openhands_sdk-1.7.3.dist-info/WHEEL +5 -0
- openhands_sdk-1.7.3.dist-info/top_level.txt +1 -0
openhands/sdk/llm/llm.py
ADDED
|
@@ -0,0 +1,1140 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import copy
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import warnings
|
|
7
|
+
from collections.abc import Callable, Sequence
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Literal, get_args, get_origin
|
|
10
|
+
|
|
11
|
+
import httpx # noqa: F401
|
|
12
|
+
from pydantic import (
|
|
13
|
+
BaseModel,
|
|
14
|
+
ConfigDict,
|
|
15
|
+
Field,
|
|
16
|
+
PrivateAttr,
|
|
17
|
+
SecretStr,
|
|
18
|
+
field_serializer,
|
|
19
|
+
field_validator,
|
|
20
|
+
model_validator,
|
|
21
|
+
)
|
|
22
|
+
from pydantic.json_schema import SkipJsonSchema
|
|
23
|
+
|
|
24
|
+
from openhands.sdk.llm.utils.model_info import get_litellm_model_info
|
|
25
|
+
from openhands.sdk.utils.pydantic_secrets import serialize_secret, validate_secret
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING: # type hints only, avoid runtime import cycle
|
|
29
|
+
from openhands.sdk.tool.tool import ToolDefinition
|
|
30
|
+
|
|
31
|
+
from openhands.sdk.utils.pydantic_diff import pretty_pydantic_diff
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
with warnings.catch_warnings():
|
|
35
|
+
warnings.simplefilter("ignore")
|
|
36
|
+
import litellm
|
|
37
|
+
|
|
38
|
+
from typing import cast
|
|
39
|
+
|
|
40
|
+
from litellm import (
|
|
41
|
+
ChatCompletionToolParam,
|
|
42
|
+
CustomStreamWrapper,
|
|
43
|
+
ResponseInputParam,
|
|
44
|
+
completion as litellm_completion,
|
|
45
|
+
)
|
|
46
|
+
from litellm.exceptions import (
|
|
47
|
+
APIConnectionError,
|
|
48
|
+
InternalServerError,
|
|
49
|
+
RateLimitError,
|
|
50
|
+
ServiceUnavailableError,
|
|
51
|
+
Timeout as LiteLLMTimeout,
|
|
52
|
+
)
|
|
53
|
+
from litellm.responses.main import responses as litellm_responses
|
|
54
|
+
from litellm.types.llms.openai import ResponsesAPIResponse
|
|
55
|
+
from litellm.types.utils import ModelResponse
|
|
56
|
+
from litellm.utils import (
|
|
57
|
+
create_pretrained_tokenizer,
|
|
58
|
+
supports_vision,
|
|
59
|
+
token_counter,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
from openhands.sdk.llm.exceptions import (
|
|
63
|
+
LLMNoResponseError,
|
|
64
|
+
map_provider_exception,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# OpenHands utilities
|
|
68
|
+
from openhands.sdk.llm.llm_response import LLMResponse
|
|
69
|
+
from openhands.sdk.llm.message import (
|
|
70
|
+
Message,
|
|
71
|
+
)
|
|
72
|
+
from openhands.sdk.llm.mixins.non_native_fc import NonNativeToolCallingMixin
|
|
73
|
+
from openhands.sdk.llm.options.chat_options import select_chat_options
|
|
74
|
+
from openhands.sdk.llm.options.responses_options import select_responses_options
|
|
75
|
+
from openhands.sdk.llm.streaming import (
|
|
76
|
+
TokenCallbackType,
|
|
77
|
+
)
|
|
78
|
+
from openhands.sdk.llm.utils.metrics import Metrics, MetricsSnapshot
|
|
79
|
+
from openhands.sdk.llm.utils.model_features import get_default_temperature, get_features
|
|
80
|
+
from openhands.sdk.llm.utils.retry_mixin import RetryMixin
|
|
81
|
+
from openhands.sdk.llm.utils.telemetry import Telemetry
|
|
82
|
+
from openhands.sdk.logger import ENV_LOG_DIR, get_logger
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
logger = get_logger(__name__)
|
|
86
|
+
|
|
87
|
+
__all__ = ["LLM"]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# Exceptions we retry on
|
|
91
|
+
LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
|
|
92
|
+
APIConnectionError,
|
|
93
|
+
RateLimitError,
|
|
94
|
+
ServiceUnavailableError,
|
|
95
|
+
LiteLLMTimeout,
|
|
96
|
+
InternalServerError,
|
|
97
|
+
LLMNoResponseError,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
102
|
+
"""Language model interface for OpenHands agents.
|
|
103
|
+
|
|
104
|
+
The LLM class provides a unified interface for interacting with various
|
|
105
|
+
language models through the litellm library. It handles model configuration,
|
|
106
|
+
API authentication,
|
|
107
|
+
retry logic, and tool calling capabilities.
|
|
108
|
+
|
|
109
|
+
Example:
|
|
110
|
+
>>> from openhands.sdk import LLM
|
|
111
|
+
>>> from pydantic import SecretStr
|
|
112
|
+
>>> llm = LLM(
|
|
113
|
+
... model="claude-sonnet-4-20250514",
|
|
114
|
+
... api_key=SecretStr("your-api-key"),
|
|
115
|
+
... usage_id="my-agent"
|
|
116
|
+
... )
|
|
117
|
+
>>> # Use with agent or conversation
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
# =========================================================================
|
|
121
|
+
# Config fields
|
|
122
|
+
# =========================================================================
|
|
123
|
+
model: str = Field(default="claude-sonnet-4-20250514", description="Model name.")
|
|
124
|
+
api_key: str | SecretStr | None = Field(default=None, description="API key.")
|
|
125
|
+
base_url: str | None = Field(default=None, description="Custom base URL.")
|
|
126
|
+
api_version: str | None = Field(
|
|
127
|
+
default=None, description="API version (e.g., Azure)."
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
aws_access_key_id: str | SecretStr | None = Field(default=None)
|
|
131
|
+
aws_secret_access_key: str | SecretStr | None = Field(default=None)
|
|
132
|
+
aws_region_name: str | None = Field(default=None)
|
|
133
|
+
|
|
134
|
+
openrouter_site_url: str = Field(default="https://docs.all-hands.dev/")
|
|
135
|
+
openrouter_app_name: str = Field(default="OpenHands")
|
|
136
|
+
|
|
137
|
+
num_retries: int = Field(default=5, ge=0)
|
|
138
|
+
retry_multiplier: float = Field(default=8.0, ge=0)
|
|
139
|
+
retry_min_wait: int = Field(default=8, ge=0)
|
|
140
|
+
retry_max_wait: int = Field(default=64, ge=0)
|
|
141
|
+
|
|
142
|
+
timeout: int | None = Field(default=None, ge=0, description="HTTP timeout (s).")
|
|
143
|
+
|
|
144
|
+
max_message_chars: int = Field(
|
|
145
|
+
default=30_000,
|
|
146
|
+
ge=1,
|
|
147
|
+
description="Approx max chars in each event/content sent to the LLM.",
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
temperature: float | None = Field(
|
|
151
|
+
default=None,
|
|
152
|
+
ge=0,
|
|
153
|
+
description=(
|
|
154
|
+
"Sampling temperature for response generation. "
|
|
155
|
+
"Defaults to 0 for most models and provider default for reasoning models."
|
|
156
|
+
),
|
|
157
|
+
)
|
|
158
|
+
top_p: float | None = Field(default=1.0, ge=0, le=1)
|
|
159
|
+
top_k: float | None = Field(default=None, ge=0)
|
|
160
|
+
|
|
161
|
+
custom_llm_provider: str | None = Field(default=None)
|
|
162
|
+
max_input_tokens: int | None = Field(
|
|
163
|
+
default=None,
|
|
164
|
+
ge=1,
|
|
165
|
+
description="The maximum number of input tokens. "
|
|
166
|
+
"Note that this is currently unused, and the value at runtime is actually"
|
|
167
|
+
" the total tokens in OpenAI (e.g. 128,000 tokens for GPT-4).",
|
|
168
|
+
)
|
|
169
|
+
max_output_tokens: int | None = Field(
|
|
170
|
+
default=None,
|
|
171
|
+
ge=1,
|
|
172
|
+
description="The maximum number of output tokens. This is sent to the LLM.",
|
|
173
|
+
)
|
|
174
|
+
model_canonical_name: str | None = Field(
|
|
175
|
+
default=None,
|
|
176
|
+
description=(
|
|
177
|
+
"Optional canonical model name for feature registry lookups. "
|
|
178
|
+
"The OpenHands SDK maintains a model feature registry that "
|
|
179
|
+
"maps model names to capabilities (e.g., vision support, "
|
|
180
|
+
"prompt caching, responses API support). When using proxied or "
|
|
181
|
+
"aliased model identifiers, set this field to the canonical "
|
|
182
|
+
"model name (e.g., 'openai/gpt-4o') to ensure correct "
|
|
183
|
+
"capability detection. If not provided, the 'model' field "
|
|
184
|
+
"will be used for capability lookups."
|
|
185
|
+
),
|
|
186
|
+
)
|
|
187
|
+
extra_headers: dict[str, str] | None = Field(
|
|
188
|
+
default=None,
|
|
189
|
+
description="Optional HTTP headers to forward to LiteLLM requests.",
|
|
190
|
+
)
|
|
191
|
+
input_cost_per_token: float | None = Field(
|
|
192
|
+
default=None,
|
|
193
|
+
ge=0,
|
|
194
|
+
description="The cost per input token. This will available in logs for user.",
|
|
195
|
+
)
|
|
196
|
+
output_cost_per_token: float | None = Field(
|
|
197
|
+
default=None,
|
|
198
|
+
ge=0,
|
|
199
|
+
description="The cost per output token. This will available in logs for user.",
|
|
200
|
+
)
|
|
201
|
+
ollama_base_url: str | None = Field(default=None)
|
|
202
|
+
|
|
203
|
+
stream: bool = Field(
|
|
204
|
+
default=False,
|
|
205
|
+
description=(
|
|
206
|
+
"Enable streaming responses from the LLM. "
|
|
207
|
+
"When enabled, the provided `on_token` callback in .completions "
|
|
208
|
+
"and .responses will be invoked for each chunk of tokens."
|
|
209
|
+
),
|
|
210
|
+
)
|
|
211
|
+
drop_params: bool = Field(default=True)
|
|
212
|
+
modify_params: bool = Field(
|
|
213
|
+
default=True,
|
|
214
|
+
description="Modify params allows litellm to do transformations like adding"
|
|
215
|
+
" a default message, when a message is empty.",
|
|
216
|
+
)
|
|
217
|
+
disable_vision: bool | None = Field(
|
|
218
|
+
default=None,
|
|
219
|
+
description="If model is vision capable, this option allows to disable image "
|
|
220
|
+
"processing (useful for cost reduction).",
|
|
221
|
+
)
|
|
222
|
+
disable_stop_word: bool | None = Field(
|
|
223
|
+
default=False, description="Disable using of stop word."
|
|
224
|
+
)
|
|
225
|
+
caching_prompt: bool = Field(default=True, description="Enable caching of prompts.")
|
|
226
|
+
log_completions: bool = Field(
|
|
227
|
+
default=False, description="Enable logging of completions."
|
|
228
|
+
)
|
|
229
|
+
log_completions_folder: str = Field(
|
|
230
|
+
default=os.path.join(ENV_LOG_DIR, "completions"),
|
|
231
|
+
description="The folder to log LLM completions to. "
|
|
232
|
+
"Required if log_completions is True.",
|
|
233
|
+
)
|
|
234
|
+
custom_tokenizer: str | None = Field(
|
|
235
|
+
default=None, description="A custom tokenizer to use for token counting."
|
|
236
|
+
)
|
|
237
|
+
native_tool_calling: bool = Field(
|
|
238
|
+
default=True,
|
|
239
|
+
description="Whether to use native tool calling.",
|
|
240
|
+
)
|
|
241
|
+
force_string_serializer: bool | None = Field(
|
|
242
|
+
default=None,
|
|
243
|
+
description=(
|
|
244
|
+
"Force using string content serializer when sending to LLM API. "
|
|
245
|
+
"If None (default), auto-detect based on model. "
|
|
246
|
+
"Useful for providers that do not support list content, "
|
|
247
|
+
"like HuggingFace and Groq."
|
|
248
|
+
),
|
|
249
|
+
)
|
|
250
|
+
reasoning_effort: Literal["low", "medium", "high", "xhigh", "none"] | None = Field(
|
|
251
|
+
default="high",
|
|
252
|
+
description="The effort to put into reasoning. "
|
|
253
|
+
"This is a string that can be one of 'low', 'medium', 'high', 'xhigh', "
|
|
254
|
+
"or 'none'. "
|
|
255
|
+
"Can apply to all reasoning models.",
|
|
256
|
+
)
|
|
257
|
+
reasoning_summary: Literal["auto", "concise", "detailed"] | None = Field(
|
|
258
|
+
default=None,
|
|
259
|
+
description="The level of detail for reasoning summaries. "
|
|
260
|
+
"This is a string that can be one of 'auto', 'concise', or 'detailed'. "
|
|
261
|
+
"Requires verified OpenAI organization. Only sent when explicitly set.",
|
|
262
|
+
)
|
|
263
|
+
enable_encrypted_reasoning: bool = Field(
|
|
264
|
+
default=True,
|
|
265
|
+
description="If True, ask for ['reasoning.encrypted_content'] "
|
|
266
|
+
"in Responses API include.",
|
|
267
|
+
)
|
|
268
|
+
# Prompt cache retention only applies to GPT-5+ models; filtered in chat options
|
|
269
|
+
prompt_cache_retention: str | None = Field(
|
|
270
|
+
default="24h",
|
|
271
|
+
description=(
|
|
272
|
+
"Retention policy for prompt cache. Only sent for GPT-5+ models; "
|
|
273
|
+
"explicitly stripped for all other models."
|
|
274
|
+
),
|
|
275
|
+
)
|
|
276
|
+
extended_thinking_budget: int | None = Field(
|
|
277
|
+
default=200_000,
|
|
278
|
+
description="The budget tokens for extended thinking, "
|
|
279
|
+
"supported by Anthropic models.",
|
|
280
|
+
)
|
|
281
|
+
seed: int | None = Field(
|
|
282
|
+
default=None, description="The seed to use for random number generation."
|
|
283
|
+
)
|
|
284
|
+
safety_settings: list[dict[str, str]] | None = Field(
|
|
285
|
+
default=None,
|
|
286
|
+
description=(
|
|
287
|
+
"Safety settings for models that support them (like Mistral AI and Gemini)"
|
|
288
|
+
),
|
|
289
|
+
)
|
|
290
|
+
usage_id: str = Field(
|
|
291
|
+
default="default",
|
|
292
|
+
serialization_alias="usage_id",
|
|
293
|
+
description=(
|
|
294
|
+
"Unique usage identifier for the LLM. Used for registry lookups, "
|
|
295
|
+
"telemetry, and spend tracking."
|
|
296
|
+
),
|
|
297
|
+
)
|
|
298
|
+
litellm_extra_body: dict[str, Any] = Field(
|
|
299
|
+
default_factory=dict,
|
|
300
|
+
description=(
|
|
301
|
+
"Additional key-value pairs to pass to litellm's extra_body parameter. "
|
|
302
|
+
"This is useful for custom inference endpoints that need additional "
|
|
303
|
+
"parameters for configuration, routing, or advanced features. "
|
|
304
|
+
"NOTE: Not all LLM providers support extra_body parameters. Some providers "
|
|
305
|
+
"(e.g., OpenAI) may reject requests with unrecognized options. "
|
|
306
|
+
"This is commonly supported by: "
|
|
307
|
+
"- LiteLLM proxy servers (routing metadata, tracing) "
|
|
308
|
+
"- vLLM endpoints (return_token_ids, etc.) "
|
|
309
|
+
"- Custom inference clusters "
|
|
310
|
+
"Examples: "
|
|
311
|
+
"- Proxy routing: {'trace_version': '1.0.0', 'tags': ['agent:my-agent']} "
|
|
312
|
+
"- vLLM features: {'return_token_ids': True}"
|
|
313
|
+
),
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# =========================================================================
|
|
317
|
+
# Internal fields (excluded from dumps)
|
|
318
|
+
# =========================================================================
|
|
319
|
+
retry_listener: SkipJsonSchema[
|
|
320
|
+
Callable[[int, int, BaseException | None], None] | None
|
|
321
|
+
] = Field(
|
|
322
|
+
default=None,
|
|
323
|
+
exclude=True,
|
|
324
|
+
)
|
|
325
|
+
_metrics: Metrics | None = PrivateAttr(default=None)
|
|
326
|
+
# ===== Plain class vars (NOT Fields) =====
|
|
327
|
+
# When serializing, these fields (SecretStr) will be dump to "****"
|
|
328
|
+
# When deserializing, these fields will be ignored and we will override
|
|
329
|
+
# them from the LLM instance provided at runtime.
|
|
330
|
+
OVERRIDE_ON_SERIALIZE: tuple[str, ...] = (
|
|
331
|
+
"api_key",
|
|
332
|
+
"aws_access_key_id",
|
|
333
|
+
"aws_secret_access_key",
|
|
334
|
+
# Dynamic runtime metadata for telemetry/routing that can differ across sessions
|
|
335
|
+
# and should not cause resume-time diffs. Always prefer the runtime value.
|
|
336
|
+
"litellm_extra_body",
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# Runtime-only private attrs
|
|
340
|
+
_model_info: Any = PrivateAttr(default=None)
|
|
341
|
+
_tokenizer: Any = PrivateAttr(default=None)
|
|
342
|
+
_telemetry: Telemetry | None = PrivateAttr(default=None)
|
|
343
|
+
|
|
344
|
+
model_config: ClassVar[ConfigDict] = ConfigDict(
|
|
345
|
+
extra="forbid", arbitrary_types_allowed=True
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# =========================================================================
|
|
349
|
+
# Validators
|
|
350
|
+
# =========================================================================
|
|
351
|
+
@field_validator("api_key", "aws_access_key_id", "aws_secret_access_key")
|
|
352
|
+
@classmethod
|
|
353
|
+
def _validate_secrets(cls, v: str | SecretStr | None, info) -> SecretStr | None:
|
|
354
|
+
return validate_secret(v, info)
|
|
355
|
+
|
|
356
|
+
@model_validator(mode="before")
|
|
357
|
+
@classmethod
|
|
358
|
+
def _coerce_inputs(cls, data):
|
|
359
|
+
if not isinstance(data, dict):
|
|
360
|
+
return data
|
|
361
|
+
d = dict(data)
|
|
362
|
+
|
|
363
|
+
model_val = d.get("model")
|
|
364
|
+
if not model_val:
|
|
365
|
+
raise ValueError("model must be specified in LLM")
|
|
366
|
+
|
|
367
|
+
# Azure default version
|
|
368
|
+
if model_val.startswith("azure") and not d.get("api_version"):
|
|
369
|
+
d["api_version"] = "2024-12-01-preview"
|
|
370
|
+
|
|
371
|
+
# Provider rewrite: openhands/* -> litellm_proxy/*
|
|
372
|
+
if model_val.startswith("openhands/"):
|
|
373
|
+
model_name = model_val.removeprefix("openhands/")
|
|
374
|
+
d["model"] = f"litellm_proxy/{model_name}"
|
|
375
|
+
# Set base_url (default to the app proxy when base_url is unset or None)
|
|
376
|
+
# Use `or` instead of dict.get() to handle explicit None values
|
|
377
|
+
d["base_url"] = d.get("base_url") or "https://llm-proxy.app.all-hands.dev/"
|
|
378
|
+
|
|
379
|
+
# HF doesn't support the OpenAI default value for top_p (1)
|
|
380
|
+
if model_val.startswith("huggingface"):
|
|
381
|
+
if d.get("top_p", 1.0) == 1.0:
|
|
382
|
+
d["top_p"] = 0.9
|
|
383
|
+
|
|
384
|
+
return d
|
|
385
|
+
|
|
386
|
+
@model_validator(mode="after")
|
|
387
|
+
def _set_env_side_effects(self):
|
|
388
|
+
if self.openrouter_site_url:
|
|
389
|
+
os.environ["OR_SITE_URL"] = self.openrouter_site_url
|
|
390
|
+
if self.openrouter_app_name:
|
|
391
|
+
os.environ["OR_APP_NAME"] = self.openrouter_app_name
|
|
392
|
+
if self.aws_access_key_id:
|
|
393
|
+
assert isinstance(self.aws_access_key_id, SecretStr)
|
|
394
|
+
os.environ["AWS_ACCESS_KEY_ID"] = self.aws_access_key_id.get_secret_value()
|
|
395
|
+
if self.aws_secret_access_key:
|
|
396
|
+
assert isinstance(self.aws_secret_access_key, SecretStr)
|
|
397
|
+
os.environ["AWS_SECRET_ACCESS_KEY"] = (
|
|
398
|
+
self.aws_secret_access_key.get_secret_value()
|
|
399
|
+
)
|
|
400
|
+
if self.aws_region_name:
|
|
401
|
+
os.environ["AWS_REGION_NAME"] = self.aws_region_name
|
|
402
|
+
|
|
403
|
+
# Metrics + Telemetry wiring
|
|
404
|
+
if self._metrics is None:
|
|
405
|
+
self._metrics = Metrics(model_name=self.model)
|
|
406
|
+
|
|
407
|
+
self._telemetry = Telemetry(
|
|
408
|
+
model_name=self.model,
|
|
409
|
+
log_enabled=self.log_completions,
|
|
410
|
+
log_dir=self.log_completions_folder if self.log_completions else None,
|
|
411
|
+
input_cost_per_token=self.input_cost_per_token,
|
|
412
|
+
output_cost_per_token=self.output_cost_per_token,
|
|
413
|
+
metrics=self._metrics,
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# Tokenizer
|
|
417
|
+
if self.custom_tokenizer:
|
|
418
|
+
self._tokenizer = create_pretrained_tokenizer(self.custom_tokenizer)
|
|
419
|
+
|
|
420
|
+
# Capabilities + model info
|
|
421
|
+
self._init_model_info_and_caps()
|
|
422
|
+
|
|
423
|
+
if self.temperature is None:
|
|
424
|
+
self.temperature = get_default_temperature(self.model)
|
|
425
|
+
|
|
426
|
+
logger.debug(
|
|
427
|
+
f"LLM ready: model={self.model} base_url={self.base_url} "
|
|
428
|
+
f"reasoning_effort={self.reasoning_effort} "
|
|
429
|
+
f"temperature={self.temperature}"
|
|
430
|
+
)
|
|
431
|
+
return self
|
|
432
|
+
|
|
433
|
+
def _retry_listener_fn(
|
|
434
|
+
self, attempt_number: int, num_retries: int, _err: BaseException | None
|
|
435
|
+
) -> None:
|
|
436
|
+
if self.retry_listener is not None:
|
|
437
|
+
self.retry_listener(attempt_number, num_retries, _err)
|
|
438
|
+
if self._telemetry is not None and _err is not None:
|
|
439
|
+
self._telemetry.on_error(_err)
|
|
440
|
+
|
|
441
|
+
# =========================================================================
|
|
442
|
+
# Serializers
|
|
443
|
+
# =========================================================================
|
|
444
|
+
@field_serializer(
|
|
445
|
+
"api_key", "aws_access_key_id", "aws_secret_access_key", when_used="always"
|
|
446
|
+
)
|
|
447
|
+
def _serialize_secrets(self, v: SecretStr | None, info):
|
|
448
|
+
return serialize_secret(v, info)
|
|
449
|
+
|
|
450
|
+
# =========================================================================
|
|
451
|
+
# Public API
|
|
452
|
+
# =========================================================================
|
|
453
|
+
@property
|
|
454
|
+
def metrics(self) -> Metrics:
|
|
455
|
+
"""Get usage metrics for this LLM instance.
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
Metrics object containing token usage, costs, and other statistics.
|
|
459
|
+
|
|
460
|
+
Example:
|
|
461
|
+
>>> cost = llm.metrics.accumulated_cost
|
|
462
|
+
>>> print(f"Total cost: ${cost}")
|
|
463
|
+
"""
|
|
464
|
+
assert self._metrics is not None, (
|
|
465
|
+
"Metrics should be initialized after model validation"
|
|
466
|
+
)
|
|
467
|
+
return self._metrics
|
|
468
|
+
|
|
469
|
+
@property
|
|
470
|
+
def telemetry(self) -> Telemetry:
|
|
471
|
+
"""Get telemetry handler for this LLM instance.
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
Telemetry object for managing logging and metrics callbacks.
|
|
475
|
+
|
|
476
|
+
Example:
|
|
477
|
+
>>> llm.telemetry.set_log_completions_callback(my_callback)
|
|
478
|
+
"""
|
|
479
|
+
assert self._telemetry is not None, (
|
|
480
|
+
"Telemetry should be initialized after model validation"
|
|
481
|
+
)
|
|
482
|
+
return self._telemetry
|
|
483
|
+
|
|
484
|
+
def restore_metrics(self, metrics: Metrics) -> None:
|
|
485
|
+
# Only used by ConversationStats to seed metrics
|
|
486
|
+
self._metrics = metrics
|
|
487
|
+
|
|
488
|
+
def completion(
|
|
489
|
+
self,
|
|
490
|
+
messages: list[Message],
|
|
491
|
+
tools: Sequence[ToolDefinition] | None = None,
|
|
492
|
+
_return_metrics: bool = False,
|
|
493
|
+
add_security_risk_prediction: bool = False,
|
|
494
|
+
on_token: TokenCallbackType | None = None,
|
|
495
|
+
**kwargs,
|
|
496
|
+
) -> LLMResponse:
|
|
497
|
+
"""Generate a completion from the language model.
|
|
498
|
+
|
|
499
|
+
This is the method for getting responses from the model via Completion API.
|
|
500
|
+
It handles message formatting, tool calling, and response processing.
|
|
501
|
+
|
|
502
|
+
Returns:
|
|
503
|
+
LLMResponse containing the model's response and metadata.
|
|
504
|
+
|
|
505
|
+
Raises:
|
|
506
|
+
ValueError: If streaming is requested (not supported).
|
|
507
|
+
|
|
508
|
+
Example:
|
|
509
|
+
>>> from openhands.sdk.llm import Message, TextContent
|
|
510
|
+
>>> messages = [Message(role="user", content=[TextContent(text="Hello")])]
|
|
511
|
+
>>> response = llm.completion(messages)
|
|
512
|
+
>>> print(response.content)
|
|
513
|
+
"""
|
|
514
|
+
enable_streaming = bool(kwargs.get("stream", False)) or self.stream
|
|
515
|
+
if enable_streaming:
|
|
516
|
+
if on_token is None:
|
|
517
|
+
raise ValueError("Streaming requires an on_token callback")
|
|
518
|
+
kwargs["stream"] = True
|
|
519
|
+
|
|
520
|
+
# 1) serialize messages
|
|
521
|
+
formatted_messages = self.format_messages_for_llm(messages)
|
|
522
|
+
|
|
523
|
+
# 2) choose function-calling strategy
|
|
524
|
+
use_native_fc = self.native_tool_calling
|
|
525
|
+
original_fncall_msgs = copy.deepcopy(formatted_messages)
|
|
526
|
+
|
|
527
|
+
# Convert Tool objects to ChatCompletionToolParam once here
|
|
528
|
+
cc_tools: list[ChatCompletionToolParam] = []
|
|
529
|
+
if tools:
|
|
530
|
+
cc_tools = [
|
|
531
|
+
t.to_openai_tool(
|
|
532
|
+
add_security_risk_prediction=add_security_risk_prediction
|
|
533
|
+
)
|
|
534
|
+
for t in tools
|
|
535
|
+
]
|
|
536
|
+
|
|
537
|
+
use_mock_tools = self.should_mock_tool_calls(cc_tools)
|
|
538
|
+
if use_mock_tools:
|
|
539
|
+
logger.debug(
|
|
540
|
+
"LLM.completion: mocking function-calling via prompt "
|
|
541
|
+
f"for model {self.model}"
|
|
542
|
+
)
|
|
543
|
+
formatted_messages, kwargs = self.pre_request_prompt_mock(
|
|
544
|
+
formatted_messages, cc_tools or [], kwargs
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
# 3) normalize provider params
|
|
548
|
+
# Only pass tools when native FC is active
|
|
549
|
+
kwargs["tools"] = cc_tools if (bool(cc_tools) and use_native_fc) else None
|
|
550
|
+
has_tools_flag = bool(cc_tools) and use_native_fc
|
|
551
|
+
# Behavior-preserving: delegate to select_chat_options
|
|
552
|
+
call_kwargs = select_chat_options(self, kwargs, has_tools=has_tools_flag)
|
|
553
|
+
|
|
554
|
+
# 4) optional request logging context (kept small)
|
|
555
|
+
assert self._telemetry is not None
|
|
556
|
+
log_ctx = None
|
|
557
|
+
if self._telemetry.log_enabled:
|
|
558
|
+
log_ctx = {
|
|
559
|
+
"messages": formatted_messages[:], # already simple dicts
|
|
560
|
+
"tools": tools,
|
|
561
|
+
"kwargs": {k: v for k, v in call_kwargs.items()},
|
|
562
|
+
"context_window": self.max_input_tokens or 0,
|
|
563
|
+
}
|
|
564
|
+
if tools and not use_native_fc:
|
|
565
|
+
log_ctx["raw_messages"] = original_fncall_msgs
|
|
566
|
+
|
|
567
|
+
# 5) do the call with retries
|
|
568
|
+
@self.retry_decorator(
|
|
569
|
+
num_retries=self.num_retries,
|
|
570
|
+
retry_exceptions=LLM_RETRY_EXCEPTIONS,
|
|
571
|
+
retry_min_wait=self.retry_min_wait,
|
|
572
|
+
retry_max_wait=self.retry_max_wait,
|
|
573
|
+
retry_multiplier=self.retry_multiplier,
|
|
574
|
+
retry_listener=self._retry_listener_fn,
|
|
575
|
+
)
|
|
576
|
+
def _one_attempt(**retry_kwargs) -> ModelResponse:
|
|
577
|
+
assert self._telemetry is not None
|
|
578
|
+
self._telemetry.on_request(log_ctx=log_ctx)
|
|
579
|
+
# Merge retry-modified kwargs (like temperature) with call_kwargs
|
|
580
|
+
final_kwargs = {**call_kwargs, **retry_kwargs}
|
|
581
|
+
resp = self._transport_call(
|
|
582
|
+
messages=formatted_messages,
|
|
583
|
+
**final_kwargs,
|
|
584
|
+
enable_streaming=enable_streaming,
|
|
585
|
+
on_token=on_token,
|
|
586
|
+
)
|
|
587
|
+
raw_resp: ModelResponse | None = None
|
|
588
|
+
if use_mock_tools:
|
|
589
|
+
raw_resp = copy.deepcopy(resp)
|
|
590
|
+
resp = self.post_response_prompt_mock(
|
|
591
|
+
resp, nonfncall_msgs=formatted_messages, tools=cc_tools
|
|
592
|
+
)
|
|
593
|
+
# 6) telemetry
|
|
594
|
+
self._telemetry.on_response(resp, raw_resp=raw_resp)
|
|
595
|
+
|
|
596
|
+
# Ensure at least one choice.
|
|
597
|
+
# Gemini sometimes returns empty choices; we raise LLMNoResponseError here
|
|
598
|
+
# inside the retry boundary so it is retried.
|
|
599
|
+
if not resp.get("choices") or len(resp["choices"]) < 1:
|
|
600
|
+
raise LLMNoResponseError(
|
|
601
|
+
"Response choices is less than 1. Response: " + str(resp)
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
return resp
|
|
605
|
+
|
|
606
|
+
try:
|
|
607
|
+
resp = _one_attempt()
|
|
608
|
+
|
|
609
|
+
# Convert the first choice to an OpenHands Message
|
|
610
|
+
first_choice = resp["choices"][0]
|
|
611
|
+
message = Message.from_llm_chat_message(first_choice["message"])
|
|
612
|
+
|
|
613
|
+
# Get current metrics snapshot
|
|
614
|
+
metrics_snapshot = MetricsSnapshot(
|
|
615
|
+
model_name=self.metrics.model_name,
|
|
616
|
+
accumulated_cost=self.metrics.accumulated_cost,
|
|
617
|
+
max_budget_per_task=self.metrics.max_budget_per_task,
|
|
618
|
+
accumulated_token_usage=self.metrics.accumulated_token_usage,
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
# Create and return LLMResponse
|
|
622
|
+
return LLMResponse(
|
|
623
|
+
message=message, metrics=metrics_snapshot, raw_response=resp
|
|
624
|
+
)
|
|
625
|
+
except Exception as e:
|
|
626
|
+
self._telemetry.on_error(e)
|
|
627
|
+
mapped = map_provider_exception(e)
|
|
628
|
+
if mapped is not e:
|
|
629
|
+
raise mapped from e
|
|
630
|
+
raise
|
|
631
|
+
|
|
632
|
+
# =========================================================================
|
|
633
|
+
# Responses API (non-stream, v1)
|
|
634
|
+
# =========================================================================
|
|
635
|
+
def responses(
|
|
636
|
+
self,
|
|
637
|
+
messages: list[Message],
|
|
638
|
+
tools: Sequence[ToolDefinition] | None = None,
|
|
639
|
+
include: list[str] | None = None,
|
|
640
|
+
store: bool | None = None,
|
|
641
|
+
_return_metrics: bool = False,
|
|
642
|
+
add_security_risk_prediction: bool = False,
|
|
643
|
+
on_token: TokenCallbackType | None = None,
|
|
644
|
+
**kwargs,
|
|
645
|
+
) -> LLMResponse:
|
|
646
|
+
"""Alternative invocation path using OpenAI Responses API via LiteLLM.
|
|
647
|
+
|
|
648
|
+
Maps Message[] -> (instructions, input[]) and returns LLMResponse.
|
|
649
|
+
"""
|
|
650
|
+
# Streaming not yet supported
|
|
651
|
+
if kwargs.get("stream", False) or self.stream or on_token is not None:
|
|
652
|
+
raise ValueError("Streaming is not supported for Responses API yet")
|
|
653
|
+
|
|
654
|
+
# Build instructions + input list using dedicated Responses formatter
|
|
655
|
+
instructions, input_items = self.format_messages_for_responses(messages)
|
|
656
|
+
|
|
657
|
+
# Convert Tool objects to Responses ToolParam
|
|
658
|
+
# (Responses path always supports function tools)
|
|
659
|
+
resp_tools = (
|
|
660
|
+
[
|
|
661
|
+
t.to_responses_tool(
|
|
662
|
+
add_security_risk_prediction=add_security_risk_prediction
|
|
663
|
+
)
|
|
664
|
+
for t in tools
|
|
665
|
+
]
|
|
666
|
+
if tools
|
|
667
|
+
else None
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
# Normalize/override Responses kwargs consistently
|
|
671
|
+
call_kwargs = select_responses_options(
|
|
672
|
+
self, kwargs, include=include, store=store
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
# Optional request logging
|
|
676
|
+
assert self._telemetry is not None
|
|
677
|
+
log_ctx = None
|
|
678
|
+
if self._telemetry.log_enabled:
|
|
679
|
+
log_ctx = {
|
|
680
|
+
"llm_path": "responses",
|
|
681
|
+
"input": input_items[:],
|
|
682
|
+
"tools": tools,
|
|
683
|
+
"kwargs": {k: v for k, v in call_kwargs.items()},
|
|
684
|
+
"context_window": self.max_input_tokens or 0,
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
# Perform call with retries
|
|
688
|
+
@self.retry_decorator(
|
|
689
|
+
num_retries=self.num_retries,
|
|
690
|
+
retry_exceptions=LLM_RETRY_EXCEPTIONS,
|
|
691
|
+
retry_min_wait=self.retry_min_wait,
|
|
692
|
+
retry_max_wait=self.retry_max_wait,
|
|
693
|
+
retry_multiplier=self.retry_multiplier,
|
|
694
|
+
retry_listener=self._retry_listener_fn,
|
|
695
|
+
)
|
|
696
|
+
def _one_attempt(**retry_kwargs) -> ResponsesAPIResponse:
|
|
697
|
+
assert self._telemetry is not None
|
|
698
|
+
self._telemetry.on_request(log_ctx=log_ctx)
|
|
699
|
+
final_kwargs = {**call_kwargs, **retry_kwargs}
|
|
700
|
+
with self._litellm_modify_params_ctx(self.modify_params):
|
|
701
|
+
with warnings.catch_warnings():
|
|
702
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
|
703
|
+
typed_input: ResponseInputParam | str = (
|
|
704
|
+
cast(ResponseInputParam, input_items) if input_items else ""
|
|
705
|
+
)
|
|
706
|
+
# Extract api_key value with type assertion for type checker
|
|
707
|
+
api_key_value: str | None = None
|
|
708
|
+
if self.api_key:
|
|
709
|
+
assert isinstance(self.api_key, SecretStr)
|
|
710
|
+
api_key_value = self.api_key.get_secret_value()
|
|
711
|
+
|
|
712
|
+
ret = litellm_responses(
|
|
713
|
+
model=self.model,
|
|
714
|
+
input=typed_input,
|
|
715
|
+
instructions=instructions,
|
|
716
|
+
tools=resp_tools,
|
|
717
|
+
api_key=api_key_value,
|
|
718
|
+
api_base=self.base_url,
|
|
719
|
+
api_version=self.api_version,
|
|
720
|
+
timeout=self.timeout,
|
|
721
|
+
drop_params=self.drop_params,
|
|
722
|
+
seed=self.seed,
|
|
723
|
+
**final_kwargs,
|
|
724
|
+
)
|
|
725
|
+
assert isinstance(ret, ResponsesAPIResponse), (
|
|
726
|
+
f"Expected ResponsesAPIResponse, got {type(ret)}"
|
|
727
|
+
)
|
|
728
|
+
# telemetry (latency, cost). Token usage mapping we handle after.
|
|
729
|
+
self._telemetry.on_response(ret)
|
|
730
|
+
return ret
|
|
731
|
+
|
|
732
|
+
try:
|
|
733
|
+
resp: ResponsesAPIResponse = _one_attempt()
|
|
734
|
+
|
|
735
|
+
# Parse output -> Message (typed)
|
|
736
|
+
# Cast to a typed sequence
|
|
737
|
+
# accepted by from_llm_responses_output
|
|
738
|
+
output_seq = cast(Sequence[Any], resp.output or [])
|
|
739
|
+
message = Message.from_llm_responses_output(output_seq)
|
|
740
|
+
|
|
741
|
+
metrics_snapshot = MetricsSnapshot(
|
|
742
|
+
model_name=self.metrics.model_name,
|
|
743
|
+
accumulated_cost=self.metrics.accumulated_cost,
|
|
744
|
+
max_budget_per_task=self.metrics.max_budget_per_task,
|
|
745
|
+
accumulated_token_usage=self.metrics.accumulated_token_usage,
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
return LLMResponse(
|
|
749
|
+
message=message, metrics=metrics_snapshot, raw_response=resp
|
|
750
|
+
)
|
|
751
|
+
except Exception as e:
|
|
752
|
+
self._telemetry.on_error(e)
|
|
753
|
+
mapped = map_provider_exception(e)
|
|
754
|
+
if mapped is not e:
|
|
755
|
+
raise mapped from e
|
|
756
|
+
raise
|
|
757
|
+
|
|
758
|
+
# =========================================================================
|
|
759
|
+
# Transport + helpers
|
|
760
|
+
# =========================================================================
|
|
761
|
+
def _transport_call(
|
|
762
|
+
self,
|
|
763
|
+
*,
|
|
764
|
+
messages: list[dict[str, Any]],
|
|
765
|
+
enable_streaming: bool = False,
|
|
766
|
+
on_token: TokenCallbackType | None = None,
|
|
767
|
+
**kwargs,
|
|
768
|
+
) -> ModelResponse:
|
|
769
|
+
# litellm.modify_params is GLOBAL; guard it for thread-safety
|
|
770
|
+
with self._litellm_modify_params_ctx(self.modify_params):
|
|
771
|
+
with warnings.catch_warnings():
|
|
772
|
+
warnings.filterwarnings(
|
|
773
|
+
"ignore", category=DeprecationWarning, module="httpx.*"
|
|
774
|
+
)
|
|
775
|
+
warnings.filterwarnings(
|
|
776
|
+
"ignore",
|
|
777
|
+
message=r".*content=.*upload.*",
|
|
778
|
+
category=DeprecationWarning,
|
|
779
|
+
)
|
|
780
|
+
warnings.filterwarnings(
|
|
781
|
+
"ignore",
|
|
782
|
+
message=r"There is no current event loop",
|
|
783
|
+
category=DeprecationWarning,
|
|
784
|
+
)
|
|
785
|
+
warnings.filterwarnings(
|
|
786
|
+
"ignore",
|
|
787
|
+
category=UserWarning,
|
|
788
|
+
)
|
|
789
|
+
warnings.filterwarnings(
|
|
790
|
+
"ignore",
|
|
791
|
+
category=DeprecationWarning,
|
|
792
|
+
message="Accessing the 'model_fields' attribute.*",
|
|
793
|
+
)
|
|
794
|
+
# Extract api_key value with type assertion for type checker
|
|
795
|
+
api_key_value: str | None = None
|
|
796
|
+
if self.api_key:
|
|
797
|
+
assert isinstance(self.api_key, SecretStr)
|
|
798
|
+
api_key_value = self.api_key.get_secret_value()
|
|
799
|
+
|
|
800
|
+
# Some providers need renames handled in _normalize_call_kwargs.
|
|
801
|
+
ret = litellm_completion(
|
|
802
|
+
model=self.model,
|
|
803
|
+
api_key=api_key_value,
|
|
804
|
+
api_base=self.base_url,
|
|
805
|
+
api_version=self.api_version,
|
|
806
|
+
timeout=self.timeout,
|
|
807
|
+
drop_params=self.drop_params,
|
|
808
|
+
seed=self.seed,
|
|
809
|
+
messages=messages,
|
|
810
|
+
**kwargs,
|
|
811
|
+
)
|
|
812
|
+
if enable_streaming and on_token is not None:
|
|
813
|
+
assert isinstance(ret, CustomStreamWrapper)
|
|
814
|
+
chunks = []
|
|
815
|
+
for chunk in ret:
|
|
816
|
+
on_token(chunk)
|
|
817
|
+
chunks.append(chunk)
|
|
818
|
+
ret = litellm.stream_chunk_builder(chunks, messages=messages)
|
|
819
|
+
|
|
820
|
+
assert isinstance(ret, ModelResponse), (
|
|
821
|
+
f"Expected ModelResponse, got {type(ret)}"
|
|
822
|
+
)
|
|
823
|
+
return ret
|
|
824
|
+
|
|
825
|
+
@contextmanager
|
|
826
|
+
def _litellm_modify_params_ctx(self, flag: bool):
|
|
827
|
+
old = getattr(litellm, "modify_params", None)
|
|
828
|
+
try:
|
|
829
|
+
litellm.modify_params = flag
|
|
830
|
+
yield
|
|
831
|
+
finally:
|
|
832
|
+
litellm.modify_params = old
|
|
833
|
+
|
|
834
|
+
# =========================================================================
|
|
835
|
+
# Capabilities, formatting, and info
|
|
836
|
+
# =========================================================================
|
|
837
|
+
def _model_name_for_capabilities(self) -> str:
|
|
838
|
+
"""Return canonical name for capability lookups (e.g., vision support)."""
|
|
839
|
+
return self.model_canonical_name or self.model
|
|
840
|
+
|
|
841
|
+
def _init_model_info_and_caps(self) -> None:
|
|
842
|
+
self._model_info = get_litellm_model_info(
|
|
843
|
+
secret_api_key=self.api_key,
|
|
844
|
+
base_url=self.base_url,
|
|
845
|
+
model=self._model_name_for_capabilities(),
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
# Context window and max_output_tokens
|
|
849
|
+
if (
|
|
850
|
+
self.max_input_tokens is None
|
|
851
|
+
and self._model_info is not None
|
|
852
|
+
and isinstance(self._model_info.get("max_input_tokens"), int)
|
|
853
|
+
):
|
|
854
|
+
self.max_input_tokens = self._model_info.get("max_input_tokens")
|
|
855
|
+
|
|
856
|
+
if self.max_output_tokens is None:
|
|
857
|
+
if any(
|
|
858
|
+
m in self.model
|
|
859
|
+
for m in [
|
|
860
|
+
"claude-3-7-sonnet",
|
|
861
|
+
"claude-sonnet-4",
|
|
862
|
+
"kimi-k2-thinking",
|
|
863
|
+
]
|
|
864
|
+
):
|
|
865
|
+
self.max_output_tokens = (
|
|
866
|
+
64000 # practical cap (litellm may allow 128k with header)
|
|
867
|
+
)
|
|
868
|
+
logger.debug(
|
|
869
|
+
f"Setting max_output_tokens to {self.max_output_tokens} "
|
|
870
|
+
f"for {self.model}"
|
|
871
|
+
)
|
|
872
|
+
elif self._model_info is not None:
|
|
873
|
+
if isinstance(self._model_info.get("max_output_tokens"), int):
|
|
874
|
+
self.max_output_tokens = self._model_info.get("max_output_tokens")
|
|
875
|
+
elif isinstance(self._model_info.get("max_tokens"), int):
|
|
876
|
+
self.max_output_tokens = self._model_info.get("max_tokens")
|
|
877
|
+
|
|
878
|
+
if "o3" in self.model:
|
|
879
|
+
o3_limit = 100000
|
|
880
|
+
if self.max_output_tokens is None or self.max_output_tokens > o3_limit:
|
|
881
|
+
self.max_output_tokens = o3_limit
|
|
882
|
+
logger.debug(
|
|
883
|
+
"Clamping max_output_tokens to %s for %s",
|
|
884
|
+
self.max_output_tokens,
|
|
885
|
+
self.model,
|
|
886
|
+
)
|
|
887
|
+
|
|
888
|
+
def vision_is_active(self) -> bool:
|
|
889
|
+
with warnings.catch_warnings():
|
|
890
|
+
warnings.simplefilter("ignore")
|
|
891
|
+
return not self.disable_vision and self._supports_vision()
|
|
892
|
+
|
|
893
|
+
def _supports_vision(self) -> bool:
|
|
894
|
+
"""Acquire from litellm if model is vision capable.
|
|
895
|
+
|
|
896
|
+
Returns:
|
|
897
|
+
bool: True if model is vision capable. Return False if model not
|
|
898
|
+
supported by litellm.
|
|
899
|
+
"""
|
|
900
|
+
# litellm.supports_vision currently returns False for 'openai/gpt-...' or 'anthropic/claude-...' (with prefixes) # noqa: E501
|
|
901
|
+
# but model_info will have the correct value for some reason.
|
|
902
|
+
# we can go with it, but we will need to keep an eye if model_info is correct for Vertex or other providers # noqa: E501
|
|
903
|
+
# remove when litellm is updated to fix https://github.com/BerriAI/litellm/issues/5608 # noqa: E501
|
|
904
|
+
# Check both the full model name and the name after proxy prefix for vision support # noqa: E501
|
|
905
|
+
model_for_caps = self._model_name_for_capabilities()
|
|
906
|
+
return (
|
|
907
|
+
supports_vision(model_for_caps)
|
|
908
|
+
or supports_vision(model_for_caps.split("/")[-1])
|
|
909
|
+
or (
|
|
910
|
+
self._model_info is not None
|
|
911
|
+
and self._model_info.get("supports_vision", False)
|
|
912
|
+
)
|
|
913
|
+
or False # fallback to False if model_info is None
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
def is_caching_prompt_active(self) -> bool:
|
|
917
|
+
"""Check if prompt caching is supported and enabled for current model.
|
|
918
|
+
|
|
919
|
+
Returns:
|
|
920
|
+
boolean: True if prompt caching is supported and enabled for the given
|
|
921
|
+
model.
|
|
922
|
+
"""
|
|
923
|
+
if not self.caching_prompt:
|
|
924
|
+
return False
|
|
925
|
+
# We don't need to look-up model_info, because
|
|
926
|
+
# only Anthropic models need explicit caching breakpoints
|
|
927
|
+
return (
|
|
928
|
+
self.caching_prompt
|
|
929
|
+
and get_features(self._model_name_for_capabilities()).supports_prompt_cache
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
def uses_responses_api(self) -> bool:
|
|
933
|
+
"""Whether this model uses the OpenAI Responses API path."""
|
|
934
|
+
|
|
935
|
+
# by default, uses = supports
|
|
936
|
+
return get_features(self._model_name_for_capabilities()).supports_responses_api
|
|
937
|
+
|
|
938
|
+
@property
|
|
939
|
+
def model_info(self) -> dict | None:
|
|
940
|
+
"""Returns the model info dictionary."""
|
|
941
|
+
return self._model_info
|
|
942
|
+
|
|
943
|
+
# =========================================================================
|
|
944
|
+
# Utilities preserved from previous class
|
|
945
|
+
# =========================================================================
|
|
946
|
+
def _apply_prompt_caching(self, messages: list[Message]) -> None:
|
|
947
|
+
"""Applies caching breakpoints to the messages.
|
|
948
|
+
|
|
949
|
+
For new Anthropic API, we only need to mark the last user or
|
|
950
|
+
tool message as cacheable.
|
|
951
|
+
"""
|
|
952
|
+
if len(messages) > 0 and messages[0].role == "system":
|
|
953
|
+
messages[0].content[-1].cache_prompt = True
|
|
954
|
+
# NOTE: this is only needed for anthropic
|
|
955
|
+
for message in reversed(messages):
|
|
956
|
+
if message.role in ("user", "tool"):
|
|
957
|
+
message.content[
|
|
958
|
+
-1
|
|
959
|
+
].cache_prompt = True # Last item inside the message content
|
|
960
|
+
break
|
|
961
|
+
|
|
962
|
+
def format_messages_for_llm(self, messages: list[Message]) -> list[dict]:
|
|
963
|
+
"""Formats Message objects for LLM consumption."""
|
|
964
|
+
|
|
965
|
+
messages = copy.deepcopy(messages)
|
|
966
|
+
if self.is_caching_prompt_active():
|
|
967
|
+
self._apply_prompt_caching(messages)
|
|
968
|
+
|
|
969
|
+
for message in messages:
|
|
970
|
+
message.cache_enabled = self.is_caching_prompt_active()
|
|
971
|
+
message.vision_enabled = self.vision_is_active()
|
|
972
|
+
message.function_calling_enabled = self.native_tool_calling
|
|
973
|
+
model_features = get_features(self._model_name_for_capabilities())
|
|
974
|
+
message.force_string_serializer = (
|
|
975
|
+
self.force_string_serializer
|
|
976
|
+
if self.force_string_serializer is not None
|
|
977
|
+
else model_features.force_string_serializer
|
|
978
|
+
)
|
|
979
|
+
message.send_reasoning_content = model_features.send_reasoning_content
|
|
980
|
+
|
|
981
|
+
formatted_messages = [message.to_chat_dict() for message in messages]
|
|
982
|
+
|
|
983
|
+
return formatted_messages
|
|
984
|
+
|
|
985
|
+
def format_messages_for_responses(
|
|
986
|
+
self, messages: list[Message]
|
|
987
|
+
) -> tuple[str | None, list[dict[str, Any]]]:
|
|
988
|
+
"""Prepare (instructions, input[]) for the OpenAI Responses API.
|
|
989
|
+
|
|
990
|
+
- Skips prompt caching flags and string serializer concerns
|
|
991
|
+
- Uses Message.to_responses_value to get either instructions (system)
|
|
992
|
+
or input items (others)
|
|
993
|
+
- Concatenates system instructions into a single instructions string
|
|
994
|
+
"""
|
|
995
|
+
msgs = copy.deepcopy(messages)
|
|
996
|
+
|
|
997
|
+
# Determine vision based on model detection
|
|
998
|
+
vision_active = self.vision_is_active()
|
|
999
|
+
|
|
1000
|
+
# Assign system instructions as a string, collect input items
|
|
1001
|
+
instructions: str | None = None
|
|
1002
|
+
input_items: list[dict[str, Any]] = []
|
|
1003
|
+
for m in msgs:
|
|
1004
|
+
val = m.to_responses_value(vision_enabled=vision_active)
|
|
1005
|
+
if isinstance(val, str):
|
|
1006
|
+
s = val.strip()
|
|
1007
|
+
if not s:
|
|
1008
|
+
continue
|
|
1009
|
+
instructions = (
|
|
1010
|
+
s if instructions is None else f"{instructions}\n\n---\n\n{s}"
|
|
1011
|
+
)
|
|
1012
|
+
else:
|
|
1013
|
+
if val:
|
|
1014
|
+
input_items.extend(val)
|
|
1015
|
+
return instructions, input_items
|
|
1016
|
+
|
|
1017
|
+
def get_token_count(self, messages: list[Message]) -> int:
|
|
1018
|
+
logger.debug(
|
|
1019
|
+
"Message objects now include serialized tool calls in token counting"
|
|
1020
|
+
)
|
|
1021
|
+
formatted_messages = self.format_messages_for_llm(messages)
|
|
1022
|
+
try:
|
|
1023
|
+
return int(
|
|
1024
|
+
token_counter(
|
|
1025
|
+
model=self.model,
|
|
1026
|
+
messages=formatted_messages,
|
|
1027
|
+
custom_tokenizer=self._tokenizer,
|
|
1028
|
+
)
|
|
1029
|
+
)
|
|
1030
|
+
except Exception as e:
|
|
1031
|
+
logger.error(
|
|
1032
|
+
f"Error getting token count for model {self.model}\n{e}"
|
|
1033
|
+
+ (
|
|
1034
|
+
f"\ncustom_tokenizer: {self.custom_tokenizer}"
|
|
1035
|
+
if self.custom_tokenizer
|
|
1036
|
+
else ""
|
|
1037
|
+
),
|
|
1038
|
+
exc_info=True,
|
|
1039
|
+
)
|
|
1040
|
+
return 0
|
|
1041
|
+
|
|
1042
|
+
# =========================================================================
|
|
1043
|
+
# Serialization helpers
|
|
1044
|
+
# =========================================================================
|
|
1045
|
+
@classmethod
|
|
1046
|
+
def load_from_json(cls, json_path: str) -> LLM:
|
|
1047
|
+
with open(json_path) as f:
|
|
1048
|
+
data = json.load(f)
|
|
1049
|
+
return cls(**data)
|
|
1050
|
+
|
|
1051
|
+
@classmethod
|
|
1052
|
+
def load_from_env(cls, prefix: str = "LLM_") -> LLM:
|
|
1053
|
+
TRUTHY = {"true", "1", "yes", "on"}
|
|
1054
|
+
|
|
1055
|
+
def _unwrap_type(t: Any) -> Any:
|
|
1056
|
+
origin = get_origin(t)
|
|
1057
|
+
if origin is None:
|
|
1058
|
+
return t
|
|
1059
|
+
args = [a for a in get_args(t) if a is not type(None)]
|
|
1060
|
+
return args[0] if args else t
|
|
1061
|
+
|
|
1062
|
+
def _cast_value(raw: str, t: Any) -> Any:
|
|
1063
|
+
t = _unwrap_type(t)
|
|
1064
|
+
if t is SecretStr:
|
|
1065
|
+
return SecretStr(raw)
|
|
1066
|
+
if t is bool:
|
|
1067
|
+
return raw.lower() in TRUTHY
|
|
1068
|
+
if t is int:
|
|
1069
|
+
try:
|
|
1070
|
+
return int(raw)
|
|
1071
|
+
except ValueError:
|
|
1072
|
+
return None
|
|
1073
|
+
if t is float:
|
|
1074
|
+
try:
|
|
1075
|
+
return float(raw)
|
|
1076
|
+
except ValueError:
|
|
1077
|
+
return None
|
|
1078
|
+
origin = get_origin(t)
|
|
1079
|
+
if (origin in (list, dict, tuple)) or (
|
|
1080
|
+
isinstance(t, type) and issubclass(t, BaseModel)
|
|
1081
|
+
):
|
|
1082
|
+
try:
|
|
1083
|
+
return json.loads(raw)
|
|
1084
|
+
except Exception:
|
|
1085
|
+
pass
|
|
1086
|
+
return raw
|
|
1087
|
+
|
|
1088
|
+
data: dict[str, Any] = {}
|
|
1089
|
+
fields: dict[str, Any] = {
|
|
1090
|
+
name: f.annotation
|
|
1091
|
+
for name, f in cls.model_fields.items()
|
|
1092
|
+
if not getattr(f, "exclude", False)
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
for key, value in os.environ.items():
|
|
1096
|
+
if not key.startswith(prefix):
|
|
1097
|
+
continue
|
|
1098
|
+
field_name = key[len(prefix) :].lower()
|
|
1099
|
+
if field_name not in fields:
|
|
1100
|
+
continue
|
|
1101
|
+
v = _cast_value(value, fields[field_name])
|
|
1102
|
+
if v is not None:
|
|
1103
|
+
data[field_name] = v
|
|
1104
|
+
return cls(**data)
|
|
1105
|
+
|
|
1106
|
+
def resolve_diff_from_deserialized(self, persisted: LLM) -> LLM:
|
|
1107
|
+
"""Resolve differences between a deserialized LLM and the current instance.
|
|
1108
|
+
|
|
1109
|
+
This is due to fields like api_key being serialized to "****" in dumps,
|
|
1110
|
+
and we want to ensure that when loading from a file, we still use the
|
|
1111
|
+
runtime-provided api_key in the self instance.
|
|
1112
|
+
|
|
1113
|
+
Return a new LLM instance equivalent to `persisted` but with
|
|
1114
|
+
explicitly whitelisted fields (e.g. api_key) taken from `self`.
|
|
1115
|
+
"""
|
|
1116
|
+
if persisted.__class__ is not self.__class__:
|
|
1117
|
+
raise ValueError(
|
|
1118
|
+
f"Cannot resolve_diff_from_deserialized between {self.__class__} "
|
|
1119
|
+
f"and {persisted.__class__}"
|
|
1120
|
+
)
|
|
1121
|
+
|
|
1122
|
+
# Copy allowed fields from runtime llm into the persisted llm
|
|
1123
|
+
llm_updates = {}
|
|
1124
|
+
persisted_dump = persisted.model_dump(context={"expose_secrets": True})
|
|
1125
|
+
for field in self.OVERRIDE_ON_SERIALIZE:
|
|
1126
|
+
if field in persisted_dump.keys():
|
|
1127
|
+
llm_updates[field] = getattr(self, field)
|
|
1128
|
+
if llm_updates:
|
|
1129
|
+
reconciled = persisted.model_copy(update=llm_updates)
|
|
1130
|
+
else:
|
|
1131
|
+
reconciled = persisted
|
|
1132
|
+
|
|
1133
|
+
dump = self.model_dump(context={"expose_secrets": True})
|
|
1134
|
+
reconciled_dump = reconciled.model_dump(context={"expose_secrets": True})
|
|
1135
|
+
if dump != reconciled_dump:
|
|
1136
|
+
raise ValueError(
|
|
1137
|
+
"The LLM provided is different from the one in persisted state.\n"
|
|
1138
|
+
f"Diff: {pretty_pydantic_diff(self, reconciled)}"
|
|
1139
|
+
)
|
|
1140
|
+
return reconciled
|