PyPI - openhands-sdk - Versions diffs - 1.3.0__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

openhands-sdk 1.3.0py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

openhands/sdk/__init__.py +4 -0
openhands/sdk/agent/agent.py +55 -22
openhands/sdk/agent/base.py +8 -1
openhands/sdk/agent/prompts/system_prompt.j2 +1 -11
openhands/sdk/agent/utils.py +5 -0
openhands/sdk/context/agent_context.py +30 -0
openhands/sdk/context/skills/__init__.py +2 -0
openhands/sdk/context/skills/skill.py +202 -1
openhands/sdk/conversation/__init__.py +5 -1
openhands/sdk/conversation/base.py +15 -6
openhands/sdk/conversation/conversation.py +10 -1
openhands/sdk/conversation/conversation_stats.py +38 -1
openhands/sdk/conversation/fifo_lock.py +14 -8
openhands/sdk/conversation/impl/local_conversation.py +21 -5
openhands/sdk/conversation/secret_source.py +1 -1
openhands/sdk/conversation/state.py +8 -0
openhands/sdk/conversation/types.py +5 -0
openhands/sdk/event/conversation_state.py +8 -0
openhands/sdk/llm/__init__.py +3 -0
openhands/sdk/llm/llm.py +82 -16
openhands/sdk/llm/llm_registry.py +1 -1
openhands/sdk/llm/options/chat_options.py +12 -24
openhands/sdk/llm/options/responses_options.py +9 -1
openhands/sdk/llm/router/base.py +3 -0
openhands/sdk/llm/streaming.py +9 -0
openhands/sdk/llm/utils/model_features.py +12 -0
openhands/sdk/logger/logger.py +7 -0
openhands/sdk/tool/tool.py +18 -1
openhands/sdk/utils/models.py +90 -9
openhands/sdk/utils/truncate.py +81 -8
openhands/sdk/workspace/__init__.py +3 -1
openhands/sdk/workspace/models.py +7 -1
openhands/sdk/workspace/remote/async_remote_workspace.py +22 -1
openhands/sdk/workspace/remote/base.py +13 -0
{openhands_sdk-1.3.0.dist-info → openhands_sdk-1.4.1.dist-info}/METADATA +2 -2
{openhands_sdk-1.3.0.dist-info → openhands_sdk-1.4.1.dist-info}/RECORD +38 -37
{openhands_sdk-1.3.0.dist-info → openhands_sdk-1.4.1.dist-info}/WHEEL +0 -0
{openhands_sdk-1.3.0.dist-info → openhands_sdk-1.4.1.dist-info}/top_level.txt +0 -0

openhands/sdk/conversation/conversation_stats.py CHANGED Viewed

@@ -1,4 +1,6 @@
-from pydantic import BaseModel, Field, PrivateAttr
+from typing import Any
+from pydantic import BaseModel, Field, PrivateAttr, model_serializer
 from openhands.sdk.llm.llm_registry import RegistryEvent
 from openhands.sdk.llm.utils.metrics import Metrics
@@ -18,6 +20,41 @@ class ConversationStats(BaseModel):
     _restored_usage_ids: set[str] = PrivateAttr(default_factory=set)
+    @model_serializer(mode="wrap")
+    def _serialize_with_context(self, serializer: Any, info: Any) -> dict[str, Any]:
+        """Serialize metrics based on context.
+        By default, preserves full metrics history including costs,
+        response_latencies, and token_usages lists for persistence.
+        When context={'use_snapshot': True} is passed, converts Metrics to
+        MetricsSnapshot format to minimize payload size for network transmission.
+        Args:
+            serializer: Pydantic's default serializer
+            info: Serialization info containing context
+        Returns:
+            Dictionary with metrics serialized based on context
+        """
+        # Get the default serialization
+        data = serializer(self)
+        # Check if we should use snapshot serialization
+        context = info.context if info else None
+        use_snapshot = context.get("use_snapshot", False) if context else False
+        if use_snapshot and "usage_to_metrics" in data:
+            # Replace each Metrics with its snapshot
+            usage_to_snapshots = {}
+            for usage_id, metrics in self.usage_to_metrics.items():
+                snapshot = metrics.get_snapshot()
+                usage_to_snapshots[usage_id] = snapshot.model_dump()
+            data["usage_to_metrics"] = usage_to_snapshots
+        return data
     def get_combined_metrics(self) -> Metrics:
         total_metrics = Metrics()
         for metrics in self.usage_to_metrics.values():

openhands/sdk/conversation/fifo_lock.py CHANGED Viewed

@@ -50,7 +50,6 @@ class FIFOLock:
         Returns:
             True if lock was acquired, False otherwise.
         """
-        me = threading.Condition(self._mutex)
         ident = threading.get_ident()
         start = time.monotonic()
@@ -60,21 +59,27 @@ class FIFOLock:
                 self._count += 1
                 return True
+            if self._owner is None and not self._waiters:
+                self._owner = ident
+                self._count = 1
+                return True
+            if not blocking:
+                # Give up immediately
+                return False
             # Add to wait queue
+            me = threading.Condition(self._mutex)
             self._waiters.append(me)
             while True:
                 # If I'm at the front of the queue and nobody owns it → acquire
                 if self._waiters[0] is me and self._owner is None:
+                    self._waiters.popleft()
                     self._owner = ident
                     self._count = 1
                     return True
-                if not blocking:
-                    # Give up immediately
-                    self._waiters.remove(me)
-                    return False
                 if timeout >= 0:
                     remaining = timeout - (time.monotonic() - start)
                     if remaining <= 0:
@@ -95,11 +100,12 @@ class FIFOLock:
         with self._mutex:
             if self._owner != ident:
                 raise RuntimeError("Cannot release lock not owned by current thread")
+            assert self._count >= 1, (
+                "When releasing the resource, the count must be >= 1"
+            )
             self._count -= 1
             if self._count == 0:
                 self._owner = None
-                self._waiters.popleft()
                 if self._waiters:
                     self._waiters[0].notify()

openhands/sdk/conversation/impl/local_conversation.py CHANGED Viewed

@@ -4,7 +4,6 @@ from collections.abc import Mapping
 from pathlib import Path
 from openhands.sdk.agent.base import AgentBase
-from openhands.sdk.agent.utils import make_llm_completion, prepare_llm_messages
 from openhands.sdk.context.prompts.prompt import render_template
 from openhands.sdk.conversation.base import BaseConversation
 from openhands.sdk.conversation.exceptions import ConversationRunError
@@ -15,7 +14,11 @@ from openhands.sdk.conversation.state import (
 )
 from openhands.sdk.conversation.stuck_detector import StuckDetector
 from openhands.sdk.conversation.title_utils import generate_conversation_title
-from openhands.sdk.conversation.types import ConversationCallbackType, ConversationID
+from openhands.sdk.conversation.types import (
+    ConversationCallbackType,
+    ConversationID,
+    ConversationTokenCallbackType,
+)
 from openhands.sdk.conversation.visualizer import (
     ConversationVisualizerBase,
     DefaultConversationVisualizer,
@@ -46,6 +49,7 @@ class LocalConversation(BaseConversation):
     _state: ConversationState
     _visualizer: ConversationVisualizerBase | None
     _on_event: ConversationCallbackType
+    _on_token: ConversationTokenCallbackType | None
     max_iteration_per_run: int
     _stuck_detector: StuckDetector | None
     llm_registry: LLMRegistry
@@ -58,6 +62,7 @@ class LocalConversation(BaseConversation):
         persistence_dir: str | Path | None = None,
         conversation_id: ConversationID | None = None,
         callbacks: list[ConversationCallbackType] | None = None,
+        token_callbacks: list[ConversationTokenCallbackType] | None = None,
         max_iteration_per_run: int = 500,
         stuck_detection: bool = True,
         visualizer: (
@@ -78,6 +83,7 @@ class LocalConversation(BaseConversation):
                       be used to identify the conversation. The user might want to
                       suffix their persistent filestore with this ID.
             callbacks: Optional list of callback functions to handle events
+            token_callbacks: Optional list of callbacks invoked for streaming deltas
             max_iteration_per_run: Maximum number of iterations per run
             visualizer: Visualization configuration. Can be:
                        - ConversationVisualizerBase subclass: Class to instantiate
@@ -143,6 +149,12 @@ class LocalConversation(BaseConversation):
             self._visualizer = None
         self._on_event = BaseConversation.compose_callbacks(composed_list)
+        self._on_token = (
+            BaseConversation.compose_callbacks(token_callbacks)
+            if token_callbacks
+            else None
+        )
         self.max_iteration_per_run = max_iteration_per_run
         # Initialize stuck detector
@@ -305,8 +317,9 @@ class LocalConversation(BaseConversation):
                             ConversationExecutionStatus.RUNNING
                         )
-                    # step must mutate the SAME state object
-                    self.agent.step(self, on_event=self._on_event)
+                    self.agent.step(
+                        self, on_event=self._on_event, on_token=self._on_token
+                    )
                     iteration += 1
                     # Check for non-finished terminal conditions
@@ -436,7 +449,7 @@ class LocalConversation(BaseConversation):
                 executable_tool = tool.as_executable()
                 executable_tool.executor.close()
             except NotImplementedError:
-                # Tool has no executor, skip it
+                # Tool has no executor, skip it without erroring
                 continue
             except Exception as e:
                 logger.warning(f"Error closing executor for tool '{tool.name}': {e}")
@@ -456,6 +469,9 @@ class LocalConversation(BaseConversation):
         Returns:
             A string response from the agent
         """
+        # Import here to avoid circular imports
+        from openhands.sdk.agent.utils import make_llm_completion, prepare_llm_messages
         template_dir = (
             Path(__file__).parent.parent.parent / "context" / "prompts" / "templates"
         )

openhands/sdk/conversation/secret_source.py CHANGED Viewed

@@ -45,7 +45,7 @@ class LookupSecret(SecretSource):
     headers: dict[str, str] = Field(default_factory=dict)
     def get_value(self):
-        response = httpx.get(self.url, headers=self.headers)
+        response = httpx.get(self.url, headers=self.headers, timeout=30.0)
         response.raise_for_status()
         return response.text

openhands/sdk/conversation/state.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import json
 from collections.abc import Sequence
 from enum import Enum
+from pathlib import Path
 from typing import Any, Self
 from pydantic import AliasChoices, Field, PrivateAttr
@@ -124,6 +125,13 @@ class ConversationState(OpenHandsModel):
     def events(self) -> EventLog:
         return self._events
+    @property
+    def env_observation_persistence_dir(self) -> str | None:
+        """Directory for persisting environment observation files."""
+        if self.persistence_dir is None:
+            return None
+        return str(Path(self.persistence_dir) / "observations")
     def set_on_state_change(self, callback: ConversationCallbackType | None) -> None:
         """Set a callback to be called when state changes.

openhands/sdk/conversation/types.py CHANGED Viewed

@@ -2,9 +2,14 @@ import uuid
 from collections.abc import Callable
 from openhands.sdk.event.base import Event
+from openhands.sdk.llm.streaming import TokenCallbackType
 ConversationCallbackType = Callable[[Event], None]
+"""Type alias for event callback functions."""
+ConversationTokenCallbackType = TokenCallbackType
+"""Callback type invoked for streaming LLM deltas."""
 ConversationID = uuid.UUID
 """Type alias for conversation IDs."""

openhands/sdk/event/conversation_state.py CHANGED Viewed

@@ -49,6 +49,14 @@ class ConversationStateUpdateEvent(Event):
     @field_validator("value")
     def validate_value(cls, value, info):
+        # Prevent circular import
+        from openhands.sdk.conversation.conversation_stats import ConversationStats
+        # For ConversationStats, use snapshot serialization to avoid
+        # sending lengthy lists over WebSocket
+        if isinstance(value, ConversationStats):
+            return value.model_dump(mode="json", context={"use_snapshot": True})
         key = info.data.get("key")
         if key is None:
             # Allow value without key for flexibility

openhands/sdk/llm/__init__.py CHANGED Viewed

@@ -12,6 +12,7 @@ from openhands.sdk.llm.message import (
     content_to_str,
 )
 from openhands.sdk.llm.router import RouterLLM
+from openhands.sdk.llm.streaming import LLMStreamChunk, TokenCallbackType
 from openhands.sdk.llm.utils.metrics import Metrics, MetricsSnapshot
 from openhands.sdk.llm.utils.unverified_models import (
     UNVERIFIED_MODELS_EXCLUDING_BEDROCK,
@@ -34,6 +35,8 @@ __all__ = [
     "RedactedThinkingBlock",
     "ReasoningItemModel",
     "content_to_str",
+    "LLMStreamChunk",
+    "TokenCallbackType",
     "Metrics",
     "MetricsSnapshot",
     "VERIFIED_MODELS",

openhands/sdk/llm/llm.py CHANGED Viewed

@@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any, ClassVar, Literal, get_args, get_origin
 import httpx  # noqa: F401
 from pydantic import (
-    AliasChoices,
     BaseModel,
     ConfigDict,
     Field,
@@ -40,6 +39,7 @@ from typing import cast
 from litellm import (
     ChatCompletionToolParam,
+    CustomStreamWrapper,
     ResponseInputParam,
     completion as litellm_completion,
 )
@@ -72,6 +72,9 @@ from openhands.sdk.llm.message import (
 from openhands.sdk.llm.mixins.non_native_fc import NonNativeToolCallingMixin
 from openhands.sdk.llm.options.chat_options import select_chat_options
 from openhands.sdk.llm.options.responses_options import select_responses_options
+from openhands.sdk.llm.streaming import (
+    TokenCallbackType,
+)
 from openhands.sdk.llm.utils.metrics import Metrics, MetricsSnapshot
 from openhands.sdk.llm.utils.model_features import get_default_temperature, get_features
 from openhands.sdk.llm.utils.retry_mixin import RetryMixin
@@ -168,6 +171,19 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
         ge=1,
         description="The maximum number of output tokens. This is sent to the LLM.",
     )
+    model_canonical_name: str | None = Field(
+        default=None,
+        description=(
+            "Optional canonical model name for feature registry lookups. "
+            "The OpenHands SDK maintains a model feature registry that "
+            "maps model names to capabilities (e.g., vision support, "
+            "prompt caching, responses API support). When using proxied or "
+            "aliased model identifiers, set this field to the canonical "
+            "model name (e.g., 'openai/gpt-4o') to ensure correct "
+            "capability detection. If not provided, the 'model' field "
+            "will be used for capability lookups."
+        ),
+    )
     extra_headers: dict[str, str] | None = Field(
         default=None,
         description="Optional HTTP headers to forward to LiteLLM requests.",
@@ -184,6 +200,14 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
     )
     ollama_base_url: str | None = Field(default=None)
+    stream: bool = Field(
+        default=False,
+        description=(
+            "Enable streaming responses from the LLM. "
+            "When enabled, the provided `on_token` callback in .completions "
+            "and .responses will be invoked for each chunk of tokens."
+        ),
+    )
     drop_params: bool = Field(default=True)
     modify_params: bool = Field(
         default=True,
@@ -240,6 +264,14 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
         description="If True, ask for ['reasoning.encrypted_content'] "
         "in Responses API include.",
     )
+    # Prompt cache retention only applies to GPT-5+ models; filtered in chat options
+    prompt_cache_retention: str | None = Field(
+        default="24h",
+        description=(
+            "Retention policy for prompt cache. Only sent for GPT-5+ models; "
+            "explicitly stripped for all other models."
+        ),
+    )
     extended_thinking_budget: int | None = Field(
         default=200_000,
         description="The budget tokens for extended thinking, "
@@ -256,7 +288,6 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
     )
     usage_id: str = Field(
         default="default",
-        validation_alias=AliasChoices("usage_id", "service_id"),
         serialization_alias="usage_id",
         description=(
             "Unique usage identifier for the LLM. Used for registry lookups, "
@@ -338,7 +369,8 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
         if model_val.startswith("openhands/"):
             model_name = model_val.removeprefix("openhands/")
             d["model"] = f"litellm_proxy/{model_name}"
-            d["base_url"] = "https://llm-proxy.app.all-hands.dev/"
+            # Set base_url (default to the app proxy when base_url is unset)
+            d["base_url"] = d.get("base_url", "https://llm-proxy.app.all-hands.dev/")
         # HF doesn't support the OpenAI default value for top_p (1)
         if model_val.startswith("huggingface"):
@@ -447,6 +479,7 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
         tools: Sequence[ToolDefinition] | None = None,
         _return_metrics: bool = False,
         add_security_risk_prediction: bool = False,
+        on_token: TokenCallbackType | None = None,
         **kwargs,
     ) -> LLMResponse:
         """Generate a completion from the language model.
@@ -466,9 +499,11 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
             >>> response = llm.completion(messages)
             >>> print(response.content)
         """
-        # Check if streaming is requested
-        if kwargs.get("stream", False):
-            raise ValueError("Streaming is not supported")
+        enable_streaming = bool(kwargs.get("stream", False)) or self.stream
+        if enable_streaming:
+            if on_token is None:
+                raise ValueError("Streaming requires an on_token callback")
+            kwargs["stream"] = True
         # 1) serialize messages
         formatted_messages = self.format_messages_for_llm(messages)
@@ -531,7 +566,12 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
             self._telemetry.on_request(log_ctx=log_ctx)
             # Merge retry-modified kwargs (like temperature) with call_kwargs
             final_kwargs = {**call_kwargs, **retry_kwargs}
-            resp = self._transport_call(messages=formatted_messages, **final_kwargs)
+            resp = self._transport_call(
+                messages=formatted_messages,
+                **final_kwargs,
+                enable_streaming=enable_streaming,
+                on_token=on_token,
+            )
             raw_resp: ModelResponse | None = None
             if use_mock_tools:
                 raw_resp = copy.deepcopy(resp)
@@ -588,15 +628,15 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
         store: bool | None = None,
         _return_metrics: bool = False,
         add_security_risk_prediction: bool = False,
+        on_token: TokenCallbackType | None = None,
         **kwargs,
     ) -> LLMResponse:
         """Alternative invocation path using OpenAI Responses API via LiteLLM.
         Maps Message[] -> (instructions, input[]) and returns LLMResponse.
-        Non-stream only for v1.
         """
         # Streaming not yet supported
-        if kwargs.get("stream", False):
+        if kwargs.get("stream", False) or self.stream or on_token is not None:
             raise ValueError("Streaming is not supported for Responses API yet")
         # Build instructions + input list using dedicated Responses formatter
@@ -707,7 +747,12 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
     # Transport + helpers
     # =========================================================================
     def _transport_call(
-        self, *, messages: list[dict[str, Any]], **kwargs
+        self,
+        *,
+        messages: list[dict[str, Any]],
+        enable_streaming: bool = False,
+        on_token: TokenCallbackType | None = None,
+        **kwargs,
     ) -> ModelResponse:
         # litellm.modify_params is GLOBAL; guard it for thread-safety
         with self._litellm_modify_params_ctx(self.modify_params):
@@ -729,6 +774,11 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
                     "ignore",
                     category=UserWarning,
                 )
+                warnings.filterwarnings(
+                    "ignore",
+                    category=DeprecationWarning,
+                    message="Accessing the 'model_fields' attribute.*",
+                )
                 # Extract api_key value with type assertion for type checker
                 api_key_value: str | None = None
                 if self.api_key:
@@ -747,6 +797,14 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
                     messages=messages,
                     **kwargs,
                 )
+                if enable_streaming and on_token is not None:
+                    assert isinstance(ret, CustomStreamWrapper)
+                    chunks = []
+                    for chunk in ret:
+                        on_token(chunk)
+                        chunks.append(chunk)
+                    ret = litellm.stream_chunk_builder(chunks, messages=messages)
                 assert isinstance(ret, ModelResponse), (
                     f"Expected ModelResponse, got {type(ret)}"
                 )
@@ -764,11 +822,15 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
     # =========================================================================
     # Capabilities, formatting, and info
     # =========================================================================
+    def _model_name_for_capabilities(self) -> str:
+        """Return canonical name for capability lookups (e.g., vision support)."""
+        return self.model_canonical_name or self.model
     def _init_model_info_and_caps(self) -> None:
         self._model_info = get_litellm_model_info(
             secret_api_key=self.api_key,
             base_url=self.base_url,
-            model=self.model,
+            model=self._model_name_for_capabilities(),
         )
         # Context window and max_output_tokens
@@ -828,9 +890,10 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
         # we can go with it, but we will need to keep an eye if model_info is correct for Vertex or other providers  # noqa: E501
         # remove when litellm is updated to fix https://github.com/BerriAI/litellm/issues/5608  # noqa: E501
         # Check both the full model name and the name after proxy prefix for vision support  # noqa: E501
+        model_for_caps = self._model_name_for_capabilities()
         return (
-            supports_vision(self.model)
-            or supports_vision(self.model.split("/")[-1])
+            supports_vision(model_for_caps)
+            or supports_vision(model_for_caps.split("/")[-1])
             or (
                 self._model_info is not None
                 and self._model_info.get("supports_vision", False)
@@ -849,13 +912,16 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
             return False
         # We don't need to look-up model_info, because
         # only Anthropic models need explicit caching breakpoints
-        return self.caching_prompt and get_features(self.model).supports_prompt_cache
+        return (
+            self.caching_prompt
+            and get_features(self._model_name_for_capabilities()).supports_prompt_cache
+        )
     def uses_responses_api(self) -> bool:
         """Whether this model uses the OpenAI Responses API path."""
         # by default, uses = supports
-        return get_features(self.model).supports_responses_api
+        return get_features(self._model_name_for_capabilities()).supports_responses_api
     @property
     def model_info(self) -> dict | None:
@@ -892,7 +958,7 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
             message.cache_enabled = self.is_caching_prompt_active()
             message.vision_enabled = self.vision_is_active()
             message.function_calling_enabled = self.native_tool_calling
-            model_features = get_features(self.model)
+            model_features = get_features(self._model_name_for_capabilities())
             message.force_string_serializer = (
                 self.force_string_serializer
                 if self.force_string_serializer is not None

openhands/sdk/llm/llm_registry.py CHANGED Viewed

@@ -82,7 +82,7 @@ class LLMRegistry:
         if usage_id in self._usage_to_llm:
             message = (
                 f"Usage ID '{usage_id}' already exists in registry. "
-                "Use a different usage_id on the LLM (previously service_id) or "
+                "Use a different usage_id on the LLM or "
                 "call get() to retrieve the existing LLM."
             )
             raise ValueError(message)

openhands/sdk/llm/options/chat_options.py CHANGED Viewed

@@ -4,7 +4,6 @@ from typing import Any
 from openhands.sdk.llm.options.common import apply_defaults_if_absent
 from openhands.sdk.llm.utils.model_features import get_features
-from openhands.sdk.utils.deprecation import warn_cleanup
 def select_chat_options(
@@ -35,28 +34,10 @@ def select_chat_options(
     # Reasoning-model quirks
     if get_features(llm.model).supports_reasoning_effort:
-        # Claude models use different parameter format
-        if "claude-opus-4-5" in llm.model.lower():
-            warn_cleanup(
-                "Claude Opus 4.5 effort parameter workaround",
-                cleanup_by="1.4.0",
-                details=(
-                    "LiteLLM does not yet redirect reasoning_effort to "
-                    "output_config.effort for Claude Opus 4.5. Remove this workaround "
-                    "once LiteLLM adds native support."
-                ),
-            )
-            # Claude uses output_config.effort instead of reasoning_effort
-            if llm.reasoning_effort is not None:
-                out["output_config"] = {"effort": llm.reasoning_effort}
-            # Claude requires beta header for effort parameter
-            if "extra_headers" not in out:
-                out["extra_headers"] = {}
-            out["extra_headers"]["anthropic-beta"] = "effort-2025-11-24"
-        else:
-            # OpenAI/other models use reasoning_effort parameter
-            if llm.reasoning_effort is not None:
-                out["reasoning_effort"] = llm.reasoning_effort
+        # LiteLLM automatically handles reasoning_effort for all models, including
+        # Claude Opus 4.5 (maps to output_config and adds beta header automatically)
+        if llm.reasoning_effort is not None:
+            out["reasoning_effort"] = llm.reasoning_effort
         # All reasoning models ignore temp/top_p
         out.pop("temperature", None)
@@ -98,7 +79,14 @@ def select_chat_options(
         out.pop("tools", None)
         out.pop("tool_choice", None)
-    # Always forward extra_body if provided; let the LLM provider validate
+    # Send prompt_cache_retention only if model supports it
+    if (
+        get_features(llm.model).supports_prompt_cache_retention
+        and llm.prompt_cache_retention
+    ):
+        out["prompt_cache_retention"] = llm.prompt_cache_retention
+    # Pass through user-provided extra_body unchanged
     if llm.litellm_extra_body:
         out["extra_body"] = llm.litellm_extra_body

openhands/sdk/llm/options/responses_options.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 from typing import Any
 from openhands.sdk.llm.options.common import apply_defaults_if_absent
+from openhands.sdk.llm.utils.model_features import get_features
 def select_responses_options(
@@ -50,7 +51,14 @@ def select_responses_options(
         if llm.reasoning_summary:
             out["reasoning"]["summary"] = llm.reasoning_summary
-    # Always forward extra_body if provided; let the LLM provider validate
+    # Send prompt_cache_retention only if model supports it
+    if (
+        get_features(llm.model).supports_prompt_cache_retention
+        and llm.prompt_cache_retention
+    ):
+        out["prompt_cache_retention"] = llm.prompt_cache_retention
+    # Pass through user-provided extra_body unchanged
     if llm.litellm_extra_body:
         out["extra_body"] = llm.litellm_extra_body

openhands/sdk/llm/router/base.py CHANGED Viewed

@@ -10,6 +10,7 @@ from pydantic import (
 from openhands.sdk.llm.llm import LLM
 from openhands.sdk.llm.llm_response import LLMResponse
 from openhands.sdk.llm.message import Message
+from openhands.sdk.llm.streaming import TokenCallbackType
 from openhands.sdk.logger import get_logger
 from openhands.sdk.tool.tool import ToolDefinition
@@ -52,6 +53,7 @@ class RouterLLM(LLM):
         tools: Sequence[ToolDefinition] | None = None,
         return_metrics: bool = False,
         add_security_risk_prediction: bool = False,
+        on_token: TokenCallbackType | None = None,
         **kwargs,
     ) -> LLMResponse:
         """
@@ -70,6 +72,7 @@ class RouterLLM(LLM):
             tools=tools,
             _return_metrics=return_metrics,
             add_security_risk_prediction=add_security_risk_prediction,
+            on_token=on_token,
             **kwargs,
         )

openhands/sdk/llm/streaming.py ADDED Viewed

@@ -0,0 +1,9 @@
+from collections.abc import Callable
+from litellm.types.utils import ModelResponseStream
+# Type alias for stream chunks
+LLMStreamChunk = ModelResponseStream
+TokenCallbackType = Callable[[LLMStreamChunk], None]

openhands-sdk 1.3.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

openhands-sdk 1.3.0py3-none-any.whl → 1.4.1py3-none-any.whl