openhands-sdk 1.3.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openhands/sdk/__init__.py +4 -0
- openhands/sdk/agent/agent.py +55 -22
- openhands/sdk/agent/base.py +8 -1
- openhands/sdk/agent/prompts/system_prompt.j2 +1 -11
- openhands/sdk/agent/utils.py +5 -0
- openhands/sdk/context/agent_context.py +30 -0
- openhands/sdk/context/skills/__init__.py +2 -0
- openhands/sdk/context/skills/skill.py +202 -1
- openhands/sdk/conversation/__init__.py +5 -1
- openhands/sdk/conversation/base.py +15 -6
- openhands/sdk/conversation/conversation.py +10 -1
- openhands/sdk/conversation/conversation_stats.py +38 -1
- openhands/sdk/conversation/fifo_lock.py +14 -8
- openhands/sdk/conversation/impl/local_conversation.py +21 -5
- openhands/sdk/conversation/secret_source.py +1 -1
- openhands/sdk/conversation/state.py +8 -0
- openhands/sdk/conversation/types.py +5 -0
- openhands/sdk/event/conversation_state.py +8 -0
- openhands/sdk/llm/__init__.py +3 -0
- openhands/sdk/llm/llm.py +82 -16
- openhands/sdk/llm/llm_registry.py +1 -1
- openhands/sdk/llm/options/chat_options.py +12 -24
- openhands/sdk/llm/options/responses_options.py +9 -1
- openhands/sdk/llm/router/base.py +3 -0
- openhands/sdk/llm/streaming.py +9 -0
- openhands/sdk/llm/utils/model_features.py +12 -0
- openhands/sdk/logger/logger.py +7 -0
- openhands/sdk/tool/tool.py +18 -1
- openhands/sdk/utils/models.py +90 -9
- openhands/sdk/utils/truncate.py +81 -8
- openhands/sdk/workspace/__init__.py +3 -1
- openhands/sdk/workspace/models.py +7 -1
- openhands/sdk/workspace/remote/async_remote_workspace.py +22 -1
- openhands/sdk/workspace/remote/base.py +13 -0
- {openhands_sdk-1.3.0.dist-info → openhands_sdk-1.4.1.dist-info}/METADATA +2 -2
- {openhands_sdk-1.3.0.dist-info → openhands_sdk-1.4.1.dist-info}/RECORD +38 -37
- {openhands_sdk-1.3.0.dist-info → openhands_sdk-1.4.1.dist-info}/WHEEL +0 -0
- {openhands_sdk-1.3.0.dist-info → openhands_sdk-1.4.1.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
from
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field, PrivateAttr, model_serializer
|
|
2
4
|
|
|
3
5
|
from openhands.sdk.llm.llm_registry import RegistryEvent
|
|
4
6
|
from openhands.sdk.llm.utils.metrics import Metrics
|
|
@@ -18,6 +20,41 @@ class ConversationStats(BaseModel):
|
|
|
18
20
|
|
|
19
21
|
_restored_usage_ids: set[str] = PrivateAttr(default_factory=set)
|
|
20
22
|
|
|
23
|
+
@model_serializer(mode="wrap")
|
|
24
|
+
def _serialize_with_context(self, serializer: Any, info: Any) -> dict[str, Any]:
|
|
25
|
+
"""Serialize metrics based on context.
|
|
26
|
+
|
|
27
|
+
By default, preserves full metrics history including costs,
|
|
28
|
+
response_latencies, and token_usages lists for persistence.
|
|
29
|
+
|
|
30
|
+
When context={'use_snapshot': True} is passed, converts Metrics to
|
|
31
|
+
MetricsSnapshot format to minimize payload size for network transmission.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
serializer: Pydantic's default serializer
|
|
35
|
+
info: Serialization info containing context
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Dictionary with metrics serialized based on context
|
|
39
|
+
"""
|
|
40
|
+
# Get the default serialization
|
|
41
|
+
data = serializer(self)
|
|
42
|
+
|
|
43
|
+
# Check if we should use snapshot serialization
|
|
44
|
+
context = info.context if info else None
|
|
45
|
+
use_snapshot = context.get("use_snapshot", False) if context else False
|
|
46
|
+
|
|
47
|
+
if use_snapshot and "usage_to_metrics" in data:
|
|
48
|
+
# Replace each Metrics with its snapshot
|
|
49
|
+
usage_to_snapshots = {}
|
|
50
|
+
for usage_id, metrics in self.usage_to_metrics.items():
|
|
51
|
+
snapshot = metrics.get_snapshot()
|
|
52
|
+
usage_to_snapshots[usage_id] = snapshot.model_dump()
|
|
53
|
+
|
|
54
|
+
data["usage_to_metrics"] = usage_to_snapshots
|
|
55
|
+
|
|
56
|
+
return data
|
|
57
|
+
|
|
21
58
|
def get_combined_metrics(self) -> Metrics:
|
|
22
59
|
total_metrics = Metrics()
|
|
23
60
|
for metrics in self.usage_to_metrics.values():
|
|
@@ -50,7 +50,6 @@ class FIFOLock:
|
|
|
50
50
|
Returns:
|
|
51
51
|
True if lock was acquired, False otherwise.
|
|
52
52
|
"""
|
|
53
|
-
me = threading.Condition(self._mutex)
|
|
54
53
|
ident = threading.get_ident()
|
|
55
54
|
start = time.monotonic()
|
|
56
55
|
|
|
@@ -60,21 +59,27 @@ class FIFOLock:
|
|
|
60
59
|
self._count += 1
|
|
61
60
|
return True
|
|
62
61
|
|
|
62
|
+
if self._owner is None and not self._waiters:
|
|
63
|
+
self._owner = ident
|
|
64
|
+
self._count = 1
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
if not blocking:
|
|
68
|
+
# Give up immediately
|
|
69
|
+
return False
|
|
70
|
+
|
|
63
71
|
# Add to wait queue
|
|
72
|
+
me = threading.Condition(self._mutex)
|
|
64
73
|
self._waiters.append(me)
|
|
65
74
|
|
|
66
75
|
while True:
|
|
67
76
|
# If I'm at the front of the queue and nobody owns it → acquire
|
|
68
77
|
if self._waiters[0] is me and self._owner is None:
|
|
78
|
+
self._waiters.popleft()
|
|
69
79
|
self._owner = ident
|
|
70
80
|
self._count = 1
|
|
71
81
|
return True
|
|
72
82
|
|
|
73
|
-
if not blocking:
|
|
74
|
-
# Give up immediately
|
|
75
|
-
self._waiters.remove(me)
|
|
76
|
-
return False
|
|
77
|
-
|
|
78
83
|
if timeout >= 0:
|
|
79
84
|
remaining = timeout - (time.monotonic() - start)
|
|
80
85
|
if remaining <= 0:
|
|
@@ -95,11 +100,12 @@ class FIFOLock:
|
|
|
95
100
|
with self._mutex:
|
|
96
101
|
if self._owner != ident:
|
|
97
102
|
raise RuntimeError("Cannot release lock not owned by current thread")
|
|
98
|
-
|
|
103
|
+
assert self._count >= 1, (
|
|
104
|
+
"When releasing the resource, the count must be >= 1"
|
|
105
|
+
)
|
|
99
106
|
self._count -= 1
|
|
100
107
|
if self._count == 0:
|
|
101
108
|
self._owner = None
|
|
102
|
-
self._waiters.popleft()
|
|
103
109
|
if self._waiters:
|
|
104
110
|
self._waiters[0].notify()
|
|
105
111
|
|
|
@@ -4,7 +4,6 @@ from collections.abc import Mapping
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
6
|
from openhands.sdk.agent.base import AgentBase
|
|
7
|
-
from openhands.sdk.agent.utils import make_llm_completion, prepare_llm_messages
|
|
8
7
|
from openhands.sdk.context.prompts.prompt import render_template
|
|
9
8
|
from openhands.sdk.conversation.base import BaseConversation
|
|
10
9
|
from openhands.sdk.conversation.exceptions import ConversationRunError
|
|
@@ -15,7 +14,11 @@ from openhands.sdk.conversation.state import (
|
|
|
15
14
|
)
|
|
16
15
|
from openhands.sdk.conversation.stuck_detector import StuckDetector
|
|
17
16
|
from openhands.sdk.conversation.title_utils import generate_conversation_title
|
|
18
|
-
from openhands.sdk.conversation.types import
|
|
17
|
+
from openhands.sdk.conversation.types import (
|
|
18
|
+
ConversationCallbackType,
|
|
19
|
+
ConversationID,
|
|
20
|
+
ConversationTokenCallbackType,
|
|
21
|
+
)
|
|
19
22
|
from openhands.sdk.conversation.visualizer import (
|
|
20
23
|
ConversationVisualizerBase,
|
|
21
24
|
DefaultConversationVisualizer,
|
|
@@ -46,6 +49,7 @@ class LocalConversation(BaseConversation):
|
|
|
46
49
|
_state: ConversationState
|
|
47
50
|
_visualizer: ConversationVisualizerBase | None
|
|
48
51
|
_on_event: ConversationCallbackType
|
|
52
|
+
_on_token: ConversationTokenCallbackType | None
|
|
49
53
|
max_iteration_per_run: int
|
|
50
54
|
_stuck_detector: StuckDetector | None
|
|
51
55
|
llm_registry: LLMRegistry
|
|
@@ -58,6 +62,7 @@ class LocalConversation(BaseConversation):
|
|
|
58
62
|
persistence_dir: str | Path | None = None,
|
|
59
63
|
conversation_id: ConversationID | None = None,
|
|
60
64
|
callbacks: list[ConversationCallbackType] | None = None,
|
|
65
|
+
token_callbacks: list[ConversationTokenCallbackType] | None = None,
|
|
61
66
|
max_iteration_per_run: int = 500,
|
|
62
67
|
stuck_detection: bool = True,
|
|
63
68
|
visualizer: (
|
|
@@ -78,6 +83,7 @@ class LocalConversation(BaseConversation):
|
|
|
78
83
|
be used to identify the conversation. The user might want to
|
|
79
84
|
suffix their persistent filestore with this ID.
|
|
80
85
|
callbacks: Optional list of callback functions to handle events
|
|
86
|
+
token_callbacks: Optional list of callbacks invoked for streaming deltas
|
|
81
87
|
max_iteration_per_run: Maximum number of iterations per run
|
|
82
88
|
visualizer: Visualization configuration. Can be:
|
|
83
89
|
- ConversationVisualizerBase subclass: Class to instantiate
|
|
@@ -143,6 +149,12 @@ class LocalConversation(BaseConversation):
|
|
|
143
149
|
self._visualizer = None
|
|
144
150
|
|
|
145
151
|
self._on_event = BaseConversation.compose_callbacks(composed_list)
|
|
152
|
+
self._on_token = (
|
|
153
|
+
BaseConversation.compose_callbacks(token_callbacks)
|
|
154
|
+
if token_callbacks
|
|
155
|
+
else None
|
|
156
|
+
)
|
|
157
|
+
|
|
146
158
|
self.max_iteration_per_run = max_iteration_per_run
|
|
147
159
|
|
|
148
160
|
# Initialize stuck detector
|
|
@@ -305,8 +317,9 @@ class LocalConversation(BaseConversation):
|
|
|
305
317
|
ConversationExecutionStatus.RUNNING
|
|
306
318
|
)
|
|
307
319
|
|
|
308
|
-
|
|
309
|
-
|
|
320
|
+
self.agent.step(
|
|
321
|
+
self, on_event=self._on_event, on_token=self._on_token
|
|
322
|
+
)
|
|
310
323
|
iteration += 1
|
|
311
324
|
|
|
312
325
|
# Check for non-finished terminal conditions
|
|
@@ -436,7 +449,7 @@ class LocalConversation(BaseConversation):
|
|
|
436
449
|
executable_tool = tool.as_executable()
|
|
437
450
|
executable_tool.executor.close()
|
|
438
451
|
except NotImplementedError:
|
|
439
|
-
# Tool has no executor, skip it
|
|
452
|
+
# Tool has no executor, skip it without erroring
|
|
440
453
|
continue
|
|
441
454
|
except Exception as e:
|
|
442
455
|
logger.warning(f"Error closing executor for tool '{tool.name}': {e}")
|
|
@@ -456,6 +469,9 @@ class LocalConversation(BaseConversation):
|
|
|
456
469
|
Returns:
|
|
457
470
|
A string response from the agent
|
|
458
471
|
"""
|
|
472
|
+
# Import here to avoid circular imports
|
|
473
|
+
from openhands.sdk.agent.utils import make_llm_completion, prepare_llm_messages
|
|
474
|
+
|
|
459
475
|
template_dir = (
|
|
460
476
|
Path(__file__).parent.parent.parent / "context" / "prompts" / "templates"
|
|
461
477
|
)
|
|
@@ -45,7 +45,7 @@ class LookupSecret(SecretSource):
|
|
|
45
45
|
headers: dict[str, str] = Field(default_factory=dict)
|
|
46
46
|
|
|
47
47
|
def get_value(self):
|
|
48
|
-
response = httpx.get(self.url, headers=self.headers)
|
|
48
|
+
response = httpx.get(self.url, headers=self.headers, timeout=30.0)
|
|
49
49
|
response.raise_for_status()
|
|
50
50
|
return response.text
|
|
51
51
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import json
|
|
3
3
|
from collections.abc import Sequence
|
|
4
4
|
from enum import Enum
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
from typing import Any, Self
|
|
6
7
|
|
|
7
8
|
from pydantic import AliasChoices, Field, PrivateAttr
|
|
@@ -124,6 +125,13 @@ class ConversationState(OpenHandsModel):
|
|
|
124
125
|
def events(self) -> EventLog:
|
|
125
126
|
return self._events
|
|
126
127
|
|
|
128
|
+
@property
|
|
129
|
+
def env_observation_persistence_dir(self) -> str | None:
|
|
130
|
+
"""Directory for persisting environment observation files."""
|
|
131
|
+
if self.persistence_dir is None:
|
|
132
|
+
return None
|
|
133
|
+
return str(Path(self.persistence_dir) / "observations")
|
|
134
|
+
|
|
127
135
|
def set_on_state_change(self, callback: ConversationCallbackType | None) -> None:
|
|
128
136
|
"""Set a callback to be called when state changes.
|
|
129
137
|
|
|
@@ -2,9 +2,14 @@ import uuid
|
|
|
2
2
|
from collections.abc import Callable
|
|
3
3
|
|
|
4
4
|
from openhands.sdk.event.base import Event
|
|
5
|
+
from openhands.sdk.llm.streaming import TokenCallbackType
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
ConversationCallbackType = Callable[[Event], None]
|
|
9
|
+
"""Type alias for event callback functions."""
|
|
10
|
+
|
|
11
|
+
ConversationTokenCallbackType = TokenCallbackType
|
|
12
|
+
"""Callback type invoked for streaming LLM deltas."""
|
|
8
13
|
|
|
9
14
|
ConversationID = uuid.UUID
|
|
10
15
|
"""Type alias for conversation IDs."""
|
|
@@ -49,6 +49,14 @@ class ConversationStateUpdateEvent(Event):
|
|
|
49
49
|
|
|
50
50
|
@field_validator("value")
|
|
51
51
|
def validate_value(cls, value, info):
|
|
52
|
+
# Prevent circular import
|
|
53
|
+
from openhands.sdk.conversation.conversation_stats import ConversationStats
|
|
54
|
+
|
|
55
|
+
# For ConversationStats, use snapshot serialization to avoid
|
|
56
|
+
# sending lengthy lists over WebSocket
|
|
57
|
+
if isinstance(value, ConversationStats):
|
|
58
|
+
return value.model_dump(mode="json", context={"use_snapshot": True})
|
|
59
|
+
|
|
52
60
|
key = info.data.get("key")
|
|
53
61
|
if key is None:
|
|
54
62
|
# Allow value without key for flexibility
|
openhands/sdk/llm/__init__.py
CHANGED
|
@@ -12,6 +12,7 @@ from openhands.sdk.llm.message import (
|
|
|
12
12
|
content_to_str,
|
|
13
13
|
)
|
|
14
14
|
from openhands.sdk.llm.router import RouterLLM
|
|
15
|
+
from openhands.sdk.llm.streaming import LLMStreamChunk, TokenCallbackType
|
|
15
16
|
from openhands.sdk.llm.utils.metrics import Metrics, MetricsSnapshot
|
|
16
17
|
from openhands.sdk.llm.utils.unverified_models import (
|
|
17
18
|
UNVERIFIED_MODELS_EXCLUDING_BEDROCK,
|
|
@@ -34,6 +35,8 @@ __all__ = [
|
|
|
34
35
|
"RedactedThinkingBlock",
|
|
35
36
|
"ReasoningItemModel",
|
|
36
37
|
"content_to_str",
|
|
38
|
+
"LLMStreamChunk",
|
|
39
|
+
"TokenCallbackType",
|
|
37
40
|
"Metrics",
|
|
38
41
|
"MetricsSnapshot",
|
|
39
42
|
"VERIFIED_MODELS",
|
openhands/sdk/llm/llm.py
CHANGED
|
@@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any, ClassVar, Literal, get_args, get_origin
|
|
|
10
10
|
|
|
11
11
|
import httpx # noqa: F401
|
|
12
12
|
from pydantic import (
|
|
13
|
-
AliasChoices,
|
|
14
13
|
BaseModel,
|
|
15
14
|
ConfigDict,
|
|
16
15
|
Field,
|
|
@@ -40,6 +39,7 @@ from typing import cast
|
|
|
40
39
|
|
|
41
40
|
from litellm import (
|
|
42
41
|
ChatCompletionToolParam,
|
|
42
|
+
CustomStreamWrapper,
|
|
43
43
|
ResponseInputParam,
|
|
44
44
|
completion as litellm_completion,
|
|
45
45
|
)
|
|
@@ -72,6 +72,9 @@ from openhands.sdk.llm.message import (
|
|
|
72
72
|
from openhands.sdk.llm.mixins.non_native_fc import NonNativeToolCallingMixin
|
|
73
73
|
from openhands.sdk.llm.options.chat_options import select_chat_options
|
|
74
74
|
from openhands.sdk.llm.options.responses_options import select_responses_options
|
|
75
|
+
from openhands.sdk.llm.streaming import (
|
|
76
|
+
TokenCallbackType,
|
|
77
|
+
)
|
|
75
78
|
from openhands.sdk.llm.utils.metrics import Metrics, MetricsSnapshot
|
|
76
79
|
from openhands.sdk.llm.utils.model_features import get_default_temperature, get_features
|
|
77
80
|
from openhands.sdk.llm.utils.retry_mixin import RetryMixin
|
|
@@ -168,6 +171,19 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
168
171
|
ge=1,
|
|
169
172
|
description="The maximum number of output tokens. This is sent to the LLM.",
|
|
170
173
|
)
|
|
174
|
+
model_canonical_name: str | None = Field(
|
|
175
|
+
default=None,
|
|
176
|
+
description=(
|
|
177
|
+
"Optional canonical model name for feature registry lookups. "
|
|
178
|
+
"The OpenHands SDK maintains a model feature registry that "
|
|
179
|
+
"maps model names to capabilities (e.g., vision support, "
|
|
180
|
+
"prompt caching, responses API support). When using proxied or "
|
|
181
|
+
"aliased model identifiers, set this field to the canonical "
|
|
182
|
+
"model name (e.g., 'openai/gpt-4o') to ensure correct "
|
|
183
|
+
"capability detection. If not provided, the 'model' field "
|
|
184
|
+
"will be used for capability lookups."
|
|
185
|
+
),
|
|
186
|
+
)
|
|
171
187
|
extra_headers: dict[str, str] | None = Field(
|
|
172
188
|
default=None,
|
|
173
189
|
description="Optional HTTP headers to forward to LiteLLM requests.",
|
|
@@ -184,6 +200,14 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
184
200
|
)
|
|
185
201
|
ollama_base_url: str | None = Field(default=None)
|
|
186
202
|
|
|
203
|
+
stream: bool = Field(
|
|
204
|
+
default=False,
|
|
205
|
+
description=(
|
|
206
|
+
"Enable streaming responses from the LLM. "
|
|
207
|
+
"When enabled, the provided `on_token` callback in .completions "
|
|
208
|
+
"and .responses will be invoked for each chunk of tokens."
|
|
209
|
+
),
|
|
210
|
+
)
|
|
187
211
|
drop_params: bool = Field(default=True)
|
|
188
212
|
modify_params: bool = Field(
|
|
189
213
|
default=True,
|
|
@@ -240,6 +264,14 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
240
264
|
description="If True, ask for ['reasoning.encrypted_content'] "
|
|
241
265
|
"in Responses API include.",
|
|
242
266
|
)
|
|
267
|
+
# Prompt cache retention only applies to GPT-5+ models; filtered in chat options
|
|
268
|
+
prompt_cache_retention: str | None = Field(
|
|
269
|
+
default="24h",
|
|
270
|
+
description=(
|
|
271
|
+
"Retention policy for prompt cache. Only sent for GPT-5+ models; "
|
|
272
|
+
"explicitly stripped for all other models."
|
|
273
|
+
),
|
|
274
|
+
)
|
|
243
275
|
extended_thinking_budget: int | None = Field(
|
|
244
276
|
default=200_000,
|
|
245
277
|
description="The budget tokens for extended thinking, "
|
|
@@ -256,7 +288,6 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
256
288
|
)
|
|
257
289
|
usage_id: str = Field(
|
|
258
290
|
default="default",
|
|
259
|
-
validation_alias=AliasChoices("usage_id", "service_id"),
|
|
260
291
|
serialization_alias="usage_id",
|
|
261
292
|
description=(
|
|
262
293
|
"Unique usage identifier for the LLM. Used for registry lookups, "
|
|
@@ -338,7 +369,8 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
338
369
|
if model_val.startswith("openhands/"):
|
|
339
370
|
model_name = model_val.removeprefix("openhands/")
|
|
340
371
|
d["model"] = f"litellm_proxy/{model_name}"
|
|
341
|
-
|
|
372
|
+
# Set base_url (default to the app proxy when base_url is unset)
|
|
373
|
+
d["base_url"] = d.get("base_url", "https://llm-proxy.app.all-hands.dev/")
|
|
342
374
|
|
|
343
375
|
# HF doesn't support the OpenAI default value for top_p (1)
|
|
344
376
|
if model_val.startswith("huggingface"):
|
|
@@ -447,6 +479,7 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
447
479
|
tools: Sequence[ToolDefinition] | None = None,
|
|
448
480
|
_return_metrics: bool = False,
|
|
449
481
|
add_security_risk_prediction: bool = False,
|
|
482
|
+
on_token: TokenCallbackType | None = None,
|
|
450
483
|
**kwargs,
|
|
451
484
|
) -> LLMResponse:
|
|
452
485
|
"""Generate a completion from the language model.
|
|
@@ -466,9 +499,11 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
466
499
|
>>> response = llm.completion(messages)
|
|
467
500
|
>>> print(response.content)
|
|
468
501
|
"""
|
|
469
|
-
|
|
470
|
-
if
|
|
471
|
-
|
|
502
|
+
enable_streaming = bool(kwargs.get("stream", False)) or self.stream
|
|
503
|
+
if enable_streaming:
|
|
504
|
+
if on_token is None:
|
|
505
|
+
raise ValueError("Streaming requires an on_token callback")
|
|
506
|
+
kwargs["stream"] = True
|
|
472
507
|
|
|
473
508
|
# 1) serialize messages
|
|
474
509
|
formatted_messages = self.format_messages_for_llm(messages)
|
|
@@ -531,7 +566,12 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
531
566
|
self._telemetry.on_request(log_ctx=log_ctx)
|
|
532
567
|
# Merge retry-modified kwargs (like temperature) with call_kwargs
|
|
533
568
|
final_kwargs = {**call_kwargs, **retry_kwargs}
|
|
534
|
-
resp = self._transport_call(
|
|
569
|
+
resp = self._transport_call(
|
|
570
|
+
messages=formatted_messages,
|
|
571
|
+
**final_kwargs,
|
|
572
|
+
enable_streaming=enable_streaming,
|
|
573
|
+
on_token=on_token,
|
|
574
|
+
)
|
|
535
575
|
raw_resp: ModelResponse | None = None
|
|
536
576
|
if use_mock_tools:
|
|
537
577
|
raw_resp = copy.deepcopy(resp)
|
|
@@ -588,15 +628,15 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
588
628
|
store: bool | None = None,
|
|
589
629
|
_return_metrics: bool = False,
|
|
590
630
|
add_security_risk_prediction: bool = False,
|
|
631
|
+
on_token: TokenCallbackType | None = None,
|
|
591
632
|
**kwargs,
|
|
592
633
|
) -> LLMResponse:
|
|
593
634
|
"""Alternative invocation path using OpenAI Responses API via LiteLLM.
|
|
594
635
|
|
|
595
636
|
Maps Message[] -> (instructions, input[]) and returns LLMResponse.
|
|
596
|
-
Non-stream only for v1.
|
|
597
637
|
"""
|
|
598
638
|
# Streaming not yet supported
|
|
599
|
-
if kwargs.get("stream", False):
|
|
639
|
+
if kwargs.get("stream", False) or self.stream or on_token is not None:
|
|
600
640
|
raise ValueError("Streaming is not supported for Responses API yet")
|
|
601
641
|
|
|
602
642
|
# Build instructions + input list using dedicated Responses formatter
|
|
@@ -707,7 +747,12 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
707
747
|
# Transport + helpers
|
|
708
748
|
# =========================================================================
|
|
709
749
|
def _transport_call(
|
|
710
|
-
self,
|
|
750
|
+
self,
|
|
751
|
+
*,
|
|
752
|
+
messages: list[dict[str, Any]],
|
|
753
|
+
enable_streaming: bool = False,
|
|
754
|
+
on_token: TokenCallbackType | None = None,
|
|
755
|
+
**kwargs,
|
|
711
756
|
) -> ModelResponse:
|
|
712
757
|
# litellm.modify_params is GLOBAL; guard it for thread-safety
|
|
713
758
|
with self._litellm_modify_params_ctx(self.modify_params):
|
|
@@ -729,6 +774,11 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
729
774
|
"ignore",
|
|
730
775
|
category=UserWarning,
|
|
731
776
|
)
|
|
777
|
+
warnings.filterwarnings(
|
|
778
|
+
"ignore",
|
|
779
|
+
category=DeprecationWarning,
|
|
780
|
+
message="Accessing the 'model_fields' attribute.*",
|
|
781
|
+
)
|
|
732
782
|
# Extract api_key value with type assertion for type checker
|
|
733
783
|
api_key_value: str | None = None
|
|
734
784
|
if self.api_key:
|
|
@@ -747,6 +797,14 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
747
797
|
messages=messages,
|
|
748
798
|
**kwargs,
|
|
749
799
|
)
|
|
800
|
+
if enable_streaming and on_token is not None:
|
|
801
|
+
assert isinstance(ret, CustomStreamWrapper)
|
|
802
|
+
chunks = []
|
|
803
|
+
for chunk in ret:
|
|
804
|
+
on_token(chunk)
|
|
805
|
+
chunks.append(chunk)
|
|
806
|
+
ret = litellm.stream_chunk_builder(chunks, messages=messages)
|
|
807
|
+
|
|
750
808
|
assert isinstance(ret, ModelResponse), (
|
|
751
809
|
f"Expected ModelResponse, got {type(ret)}"
|
|
752
810
|
)
|
|
@@ -764,11 +822,15 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
764
822
|
# =========================================================================
|
|
765
823
|
# Capabilities, formatting, and info
|
|
766
824
|
# =========================================================================
|
|
825
|
+
def _model_name_for_capabilities(self) -> str:
|
|
826
|
+
"""Return canonical name for capability lookups (e.g., vision support)."""
|
|
827
|
+
return self.model_canonical_name or self.model
|
|
828
|
+
|
|
767
829
|
def _init_model_info_and_caps(self) -> None:
|
|
768
830
|
self._model_info = get_litellm_model_info(
|
|
769
831
|
secret_api_key=self.api_key,
|
|
770
832
|
base_url=self.base_url,
|
|
771
|
-
model=self.
|
|
833
|
+
model=self._model_name_for_capabilities(),
|
|
772
834
|
)
|
|
773
835
|
|
|
774
836
|
# Context window and max_output_tokens
|
|
@@ -828,9 +890,10 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
828
890
|
# we can go with it, but we will need to keep an eye if model_info is correct for Vertex or other providers # noqa: E501
|
|
829
891
|
# remove when litellm is updated to fix https://github.com/BerriAI/litellm/issues/5608 # noqa: E501
|
|
830
892
|
# Check both the full model name and the name after proxy prefix for vision support # noqa: E501
|
|
893
|
+
model_for_caps = self._model_name_for_capabilities()
|
|
831
894
|
return (
|
|
832
|
-
supports_vision(
|
|
833
|
-
or supports_vision(
|
|
895
|
+
supports_vision(model_for_caps)
|
|
896
|
+
or supports_vision(model_for_caps.split("/")[-1])
|
|
834
897
|
or (
|
|
835
898
|
self._model_info is not None
|
|
836
899
|
and self._model_info.get("supports_vision", False)
|
|
@@ -849,13 +912,16 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
849
912
|
return False
|
|
850
913
|
# We don't need to look-up model_info, because
|
|
851
914
|
# only Anthropic models need explicit caching breakpoints
|
|
852
|
-
return
|
|
915
|
+
return (
|
|
916
|
+
self.caching_prompt
|
|
917
|
+
and get_features(self._model_name_for_capabilities()).supports_prompt_cache
|
|
918
|
+
)
|
|
853
919
|
|
|
854
920
|
def uses_responses_api(self) -> bool:
|
|
855
921
|
"""Whether this model uses the OpenAI Responses API path."""
|
|
856
922
|
|
|
857
923
|
# by default, uses = supports
|
|
858
|
-
return get_features(self.
|
|
924
|
+
return get_features(self._model_name_for_capabilities()).supports_responses_api
|
|
859
925
|
|
|
860
926
|
@property
|
|
861
927
|
def model_info(self) -> dict | None:
|
|
@@ -892,7 +958,7 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
|
|
|
892
958
|
message.cache_enabled = self.is_caching_prompt_active()
|
|
893
959
|
message.vision_enabled = self.vision_is_active()
|
|
894
960
|
message.function_calling_enabled = self.native_tool_calling
|
|
895
|
-
model_features = get_features(self.
|
|
961
|
+
model_features = get_features(self._model_name_for_capabilities())
|
|
896
962
|
message.force_string_serializer = (
|
|
897
963
|
self.force_string_serializer
|
|
898
964
|
if self.force_string_serializer is not None
|
|
@@ -82,7 +82,7 @@ class LLMRegistry:
|
|
|
82
82
|
if usage_id in self._usage_to_llm:
|
|
83
83
|
message = (
|
|
84
84
|
f"Usage ID '{usage_id}' already exists in registry. "
|
|
85
|
-
"Use a different usage_id on the LLM
|
|
85
|
+
"Use a different usage_id on the LLM or "
|
|
86
86
|
"call get() to retrieve the existing LLM."
|
|
87
87
|
)
|
|
88
88
|
raise ValueError(message)
|
|
@@ -4,7 +4,6 @@ from typing import Any
|
|
|
4
4
|
|
|
5
5
|
from openhands.sdk.llm.options.common import apply_defaults_if_absent
|
|
6
6
|
from openhands.sdk.llm.utils.model_features import get_features
|
|
7
|
-
from openhands.sdk.utils.deprecation import warn_cleanup
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
def select_chat_options(
|
|
@@ -35,28 +34,10 @@ def select_chat_options(
|
|
|
35
34
|
|
|
36
35
|
# Reasoning-model quirks
|
|
37
36
|
if get_features(llm.model).supports_reasoning_effort:
|
|
38
|
-
#
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
cleanup_by="1.4.0",
|
|
43
|
-
details=(
|
|
44
|
-
"LiteLLM does not yet redirect reasoning_effort to "
|
|
45
|
-
"output_config.effort for Claude Opus 4.5. Remove this workaround "
|
|
46
|
-
"once LiteLLM adds native support."
|
|
47
|
-
),
|
|
48
|
-
)
|
|
49
|
-
# Claude uses output_config.effort instead of reasoning_effort
|
|
50
|
-
if llm.reasoning_effort is not None:
|
|
51
|
-
out["output_config"] = {"effort": llm.reasoning_effort}
|
|
52
|
-
# Claude requires beta header for effort parameter
|
|
53
|
-
if "extra_headers" not in out:
|
|
54
|
-
out["extra_headers"] = {}
|
|
55
|
-
out["extra_headers"]["anthropic-beta"] = "effort-2025-11-24"
|
|
56
|
-
else:
|
|
57
|
-
# OpenAI/other models use reasoning_effort parameter
|
|
58
|
-
if llm.reasoning_effort is not None:
|
|
59
|
-
out["reasoning_effort"] = llm.reasoning_effort
|
|
37
|
+
# LiteLLM automatically handles reasoning_effort for all models, including
|
|
38
|
+
# Claude Opus 4.5 (maps to output_config and adds beta header automatically)
|
|
39
|
+
if llm.reasoning_effort is not None:
|
|
40
|
+
out["reasoning_effort"] = llm.reasoning_effort
|
|
60
41
|
|
|
61
42
|
# All reasoning models ignore temp/top_p
|
|
62
43
|
out.pop("temperature", None)
|
|
@@ -98,7 +79,14 @@ def select_chat_options(
|
|
|
98
79
|
out.pop("tools", None)
|
|
99
80
|
out.pop("tool_choice", None)
|
|
100
81
|
|
|
101
|
-
#
|
|
82
|
+
# Send prompt_cache_retention only if model supports it
|
|
83
|
+
if (
|
|
84
|
+
get_features(llm.model).supports_prompt_cache_retention
|
|
85
|
+
and llm.prompt_cache_retention
|
|
86
|
+
):
|
|
87
|
+
out["prompt_cache_retention"] = llm.prompt_cache_retention
|
|
88
|
+
|
|
89
|
+
# Pass through user-provided extra_body unchanged
|
|
102
90
|
if llm.litellm_extra_body:
|
|
103
91
|
out["extra_body"] = llm.litellm_extra_body
|
|
104
92
|
|
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
5
|
from openhands.sdk.llm.options.common import apply_defaults_if_absent
|
|
6
|
+
from openhands.sdk.llm.utils.model_features import get_features
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
def select_responses_options(
|
|
@@ -50,7 +51,14 @@ def select_responses_options(
|
|
|
50
51
|
if llm.reasoning_summary:
|
|
51
52
|
out["reasoning"]["summary"] = llm.reasoning_summary
|
|
52
53
|
|
|
53
|
-
#
|
|
54
|
+
# Send prompt_cache_retention only if model supports it
|
|
55
|
+
if (
|
|
56
|
+
get_features(llm.model).supports_prompt_cache_retention
|
|
57
|
+
and llm.prompt_cache_retention
|
|
58
|
+
):
|
|
59
|
+
out["prompt_cache_retention"] = llm.prompt_cache_retention
|
|
60
|
+
|
|
61
|
+
# Pass through user-provided extra_body unchanged
|
|
54
62
|
if llm.litellm_extra_body:
|
|
55
63
|
out["extra_body"] = llm.litellm_extra_body
|
|
56
64
|
|
openhands/sdk/llm/router/base.py
CHANGED
|
@@ -10,6 +10,7 @@ from pydantic import (
|
|
|
10
10
|
from openhands.sdk.llm.llm import LLM
|
|
11
11
|
from openhands.sdk.llm.llm_response import LLMResponse
|
|
12
12
|
from openhands.sdk.llm.message import Message
|
|
13
|
+
from openhands.sdk.llm.streaming import TokenCallbackType
|
|
13
14
|
from openhands.sdk.logger import get_logger
|
|
14
15
|
from openhands.sdk.tool.tool import ToolDefinition
|
|
15
16
|
|
|
@@ -52,6 +53,7 @@ class RouterLLM(LLM):
|
|
|
52
53
|
tools: Sequence[ToolDefinition] | None = None,
|
|
53
54
|
return_metrics: bool = False,
|
|
54
55
|
add_security_risk_prediction: bool = False,
|
|
56
|
+
on_token: TokenCallbackType | None = None,
|
|
55
57
|
**kwargs,
|
|
56
58
|
) -> LLMResponse:
|
|
57
59
|
"""
|
|
@@ -70,6 +72,7 @@ class RouterLLM(LLM):
|
|
|
70
72
|
tools=tools,
|
|
71
73
|
_return_metrics=return_metrics,
|
|
72
74
|
add_security_risk_prediction=add_security_risk_prediction,
|
|
75
|
+
on_token=on_token,
|
|
73
76
|
**kwargs,
|
|
74
77
|
)
|
|
75
78
|
|