PyPI - openhands-sdk - Versions diffs - 1.10.0__py3-none-any.whl → 1.11.1__py3-none-any.whl - Mend

openhands-sdk 1.10.0py3-none-any.whl → 1.11.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

openhands/sdk/agent/agent.py +60 -27
openhands/sdk/agent/base.py +1 -1
openhands/sdk/context/condenser/base.py +36 -3
openhands/sdk/context/condenser/llm_summarizing_condenser.py +65 -1
openhands/sdk/context/prompts/templates/system_message_suffix.j2 +2 -1
openhands/sdk/context/skills/skill.py +15 -30
openhands/sdk/conversation/base.py +31 -0
openhands/sdk/conversation/conversation.py +5 -0
openhands/sdk/conversation/impl/local_conversation.py +63 -13
openhands/sdk/conversation/impl/remote_conversation.py +128 -13
openhands/sdk/conversation/state.py +19 -0
openhands/sdk/conversation/stuck_detector.py +18 -9
openhands/sdk/llm/__init__.py +16 -0
openhands/sdk/llm/auth/__init__.py +28 -0
openhands/sdk/llm/auth/credentials.py +157 -0
openhands/sdk/llm/auth/openai.py +762 -0
openhands/sdk/llm/llm.py +175 -20
openhands/sdk/llm/message.py +21 -11
openhands/sdk/llm/options/responses_options.py +8 -7
openhands/sdk/llm/utils/model_features.py +2 -0
openhands/sdk/llm/utils/verified_models.py +3 -0
openhands/sdk/mcp/tool.py +27 -4
openhands/sdk/secret/secrets.py +13 -1
openhands/sdk/workspace/remote/base.py +8 -3
openhands/sdk/workspace/remote/remote_workspace_mixin.py +40 -7
{openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/METADATA +1 -1
{openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/RECORD +29 -26
{openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/WHEEL +0 -0
{openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/top_level.txt +0 -0

openhands/sdk/llm/llm.py CHANGED Viewed

@@ -27,8 +27,11 @@ from openhands.sdk.utils.pydantic_secrets import serialize_secret, validate_secr
 if TYPE_CHECKING:  # type hints only, avoid runtime import cycle
+    from openhands.sdk.llm.auth import SupportedVendor
     from openhands.sdk.tool.tool import ToolDefinition
+from openhands.sdk.llm.auth.openai import transform_for_subscription
 with warnings.catch_warnings():
     warnings.simplefilter("ignore")
@@ -50,8 +53,20 @@ from litellm.exceptions import (
     Timeout as LiteLLMTimeout,
 )
 from litellm.responses.main import responses as litellm_responses
-from litellm.types.llms.openai import ResponsesAPIResponse
-from litellm.types.utils import ModelResponse
+from litellm.responses.streaming_iterator import SyncResponsesAPIStreamingIterator
+from litellm.types.llms.openai import (
+    OutputTextDeltaEvent,
+    ReasoningSummaryTextDeltaEvent,
+    RefusalDeltaEvent,
+    ResponseCompletedEvent,
+    ResponsesAPIResponse,
+)
+from litellm.types.utils import (
+    Delta,
+    ModelResponse,
+    ModelResponseStream,
+    StreamingChoices,
+)
 from litellm.utils import (
     create_pretrained_tokenizer,
     supports_vision,
@@ -335,6 +350,7 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
     _model_info: Any = PrivateAttr(default=None)
     _tokenizer: Any = PrivateAttr(default=None)
     _telemetry: Telemetry | None = PrivateAttr(default=None)
+    _is_subscription: bool = PrivateAttr(default=False)
     model_config: ClassVar[ConfigDict] = ConfigDict(
         extra="ignore", arbitrary_types_allowed=True
@@ -499,6 +515,19 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
         )
         return self._telemetry
+    @property
+    def is_subscription(self) -> bool:
+        """Check if this LLM uses subscription-based authentication.
+        Returns True when the LLM was created via `LLM.subscription_login()`,
+        which uses the ChatGPT subscription Codex backend rather than the
+        standard OpenAI API.
+        Returns:
+            bool: True if using subscription-based transport, False otherwise.
+        """
+        return self._is_subscription
     def restore_metrics(self, metrics: Metrics) -> None:
         # Only used by ConversationStats to seed metrics
         self._metrics = metrics
@@ -662,7 +691,7 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
             raise
     # =========================================================================
-    # Responses API (non-stream, v1)
+    # Responses API (v1)
     # =========================================================================
     def responses(
         self,
@@ -686,16 +715,19 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
             store: Whether to store the conversation
             _return_metrics: Whether to return usage metrics
             add_security_risk_prediction: Add security_risk field to tool schemas
-            on_token: Optional callback for streaming tokens (not yet supported)
+            on_token: Optional callback for streaming deltas
             **kwargs: Additional arguments passed to the API
         Note:
             Summary field is always added to tool schemas for transparency and
             explainability of agent actions.
         """
-        # Streaming not yet supported
-        if kwargs.get("stream", False) or self.stream or on_token is not None:
-            raise ValueError("Streaming is not supported for Responses API yet")
+        user_enable_streaming = bool(kwargs.get("stream", False)) or self.stream
+        if user_enable_streaming:
+            if on_token is None and not self.is_subscription:
+                # We allow on_token to be None for subscription mode
+                raise ValueError("Streaming requires an on_token callback")
+            kwargs["stream"] = True
         # Build instructions + input list using dedicated Responses formatter
         instructions, input_items = self.format_messages_for_responses(messages)
@@ -771,12 +803,67 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
                         seed=self.seed,
                         **final_kwargs,
                     )
-                    assert isinstance(ret, ResponsesAPIResponse), (
+                    if isinstance(ret, ResponsesAPIResponse):
+                        if user_enable_streaming:
+                            logger.warning(
+                                "Responses streaming was requested, but the provider "
+                                "returned a non-streaming response; no on_token deltas "
+                                "will be emitted."
+                            )
+                        self._telemetry.on_response(ret)
+                        return ret
+                    # When stream=True, LiteLLM returns a streaming iterator rather than
+                    # a single ResponsesAPIResponse. Drain the iterator and use the
+                    # completed response.
+                    if final_kwargs.get("stream", False):
+                        if not isinstance(ret, SyncResponsesAPIStreamingIterator):
+                            raise AssertionError(
+                                f"Expected Responses stream iterator, got {type(ret)}"
+                            )
+                        stream_callback = on_token if user_enable_streaming else None
+                        for event in ret:
+                            if stream_callback is None:
+                                continue
+                            if isinstance(
+                                event,
+                                (
+                                    OutputTextDeltaEvent,
+                                    RefusalDeltaEvent,
+                                    ReasoningSummaryTextDeltaEvent,
+                                ),
+                            ):
+                                delta = event.delta
+                                if delta:
+                                    stream_callback(
+                                        ModelResponseStream(
+                                            choices=[
+                                                StreamingChoices(
+                                                    delta=Delta(content=delta)
+                                                )
+                                            ]
+                                        )
+                                    )
+                        completed_event = ret.completed_response
+                        if completed_event is None:
+                            raise LLMNoResponseError(
+                                "Responses stream finished without a completed response"
+                            )
+                        if not isinstance(completed_event, ResponseCompletedEvent):
+                            raise LLMNoResponseError(
+                                f"Unexpected completed event: {type(completed_event)}"
+                            )
+                        completed_resp = completed_event.response
+                        self._telemetry.on_response(completed_resp)
+                        return completed_resp
+                    raise AssertionError(
                         f"Expected ResponsesAPIResponse, got {type(ret)}"
                     )
-                    # telemetry (latency, cost). Token usage mapping we handle after.
-                    self._telemetry.on_response(ret)
-                    return ret
         try:
             resp: ResponsesAPIResponse = _one_attempt()
@@ -1046,8 +1133,9 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
         - Skips prompt caching flags and string serializer concerns
         - Uses Message.to_responses_value to get either instructions (system)
-         or input items (others)
+          or input items (others)
         - Concatenates system instructions into a single instructions string
+        - For subscription mode, system prompts are prepended to user content
         """
         msgs = copy.deepcopy(messages)
@@ -1057,18 +1145,26 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
         # Assign system instructions as a string, collect input items
         instructions: str | None = None
         input_items: list[dict[str, Any]] = []
+        system_chunks: list[str] = []
         for m in msgs:
             val = m.to_responses_value(vision_enabled=vision_active)
             if isinstance(val, str):
                 s = val.strip()
-                if not s:
-                    continue
-                instructions = (
-                    s if instructions is None else f"{instructions}\n\n---\n\n{s}"
-                )
-            else:
-                if val:
-                    input_items.extend(val)
+                if s:
+                    if self.is_subscription:
+                        system_chunks.append(s)
+                    else:
+                        instructions = (
+                            s
+                            if instructions is None
+                            else f"{instructions}\n\n---\n\n{s}"
+                        )
+            elif val:
+                input_items.extend(val)
+        if self.is_subscription:
+            return transform_for_subscription(system_chunks, input_items)
         return instructions, input_items
     def get_token_count(self, messages: list[Message]) -> int:
@@ -1159,3 +1255,62 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
             if v is not None:
                 data[field_name] = v
         return cls(**data)
+    @classmethod
+    def subscription_login(
+        cls,
+        vendor: SupportedVendor,
+        model: str,
+        force_login: bool = False,
+        open_browser: bool = True,
+        **llm_kwargs,
+    ) -> LLM:
+        """Authenticate with a subscription service and return an LLM instance.
+        This method provides subscription-based access to LLM models that are
+        available through chat subscriptions (e.g., ChatGPT Plus/Pro) rather
+        than API credits. It handles credential caching, token refresh, and
+        the OAuth login flow.
+        Currently supported vendors:
+        - "openai": ChatGPT Plus/Pro subscription for Codex models
+        Supported OpenAI models:
+        - gpt-5.1-codex-max
+        - gpt-5.1-codex-mini
+        - gpt-5.2
+        - gpt-5.2-codex
+        Args:
+            vendor: The vendor/provider. Currently only "openai" is supported.
+            model: The model to use. Must be supported by the vendor's
+                subscription service.
+            force_login: If True, always perform a fresh login even if valid
+                credentials exist.
+            open_browser: Whether to automatically open the browser for the
+                OAuth login flow.
+            **llm_kwargs: Additional arguments to pass to the LLM constructor.
+        Returns:
+            An LLM instance configured for subscription-based access.
+        Raises:
+            ValueError: If the vendor or model is not supported.
+            RuntimeError: If authentication fails.
+        Example:
+            >>> from openhands.sdk import LLM
+            >>> # First time: opens browser for OAuth login
+            >>> llm = LLM.subscription_login(vendor="openai", model="gpt-5.2-codex")
+            >>> # Subsequent calls: reuses cached credentials
+            >>> llm = LLM.subscription_login(vendor="openai", model="gpt-5.2-codex")
+        """
+        from openhands.sdk.llm.auth.openai import subscription_login
+        return subscription_login(
+            vendor=vendor,
+            model=model,
+            force_login=force_login,
+            open_browser=open_browser,
+            **llm_kwargs,
+        )

openhands/sdk/llm/message.py CHANGED Viewed

@@ -170,21 +170,12 @@ class TextContent(BaseContent):
     model_config: ClassVar[ConfigDict] = ConfigDict(
         extra="forbid", populate_by_name=True
     )
-    enable_truncation: bool = True
     def to_llm_dict(self) -> list[dict[str, str | dict[str, str]]]:
         """Convert to LLM API format."""
-        text = self.text
-        if self.enable_truncation and len(text) > DEFAULT_TEXT_CONTENT_LIMIT:
-            logger.warning(
-                f"TextContent text length ({len(text)}) exceeds limit "
-                f"({DEFAULT_TEXT_CONTENT_LIMIT}), truncating"
-            )
-            text = maybe_truncate(text, DEFAULT_TEXT_CONTENT_LIMIT)
         data: dict[str, str | dict[str, str]] = {
             "type": self.type,
-            "text": text,
+            "text": self.text,
         }
         if self.cache_prompt:
             data["cache_control"] = {"type": "ephemeral"}
@@ -342,6 +333,8 @@ class Message(BaseModel):
         content = "\n".join(
             item.text for item in self.content if isinstance(item, TextContent)
         )
+        if self.role == "tool":
+            content = self._maybe_truncate_tool_text(content)
         message_dict: dict[str, Any] = {"content": content, "role": self.role}
         # tool call keys are added in to_chat_dict to centralize behavior
@@ -366,6 +359,12 @@ class Message(BaseModel):
             # All content types now return list[dict[str, Any]]
             item_dicts = item.to_llm_dict()
+            if self.role == "tool" and item_dicts:
+                for d in item_dicts:
+                    text_val = d.get("text")
+                    if d.get("type") == "text" and isinstance(text_val, str):
+                        d["text"] = self._maybe_truncate_tool_text(text_val)
             # We have to remove cache_prompt for tool content and move it up to the
             # message level
             # See discussion here for details: https://github.com/BerriAI/litellm/issues/6422#issuecomment-2438765472
@@ -551,17 +550,28 @@ class Message(BaseModel):
                 )
                 for c in self.content:
                     if isinstance(c, TextContent):
+                        output_text = self._maybe_truncate_tool_text(c.text)
                         items.append(
                             {
                                 "type": "function_call_output",
                                 "call_id": resp_call_id,
-                                "output": c.text,
+                                "output": output_text,
                             }
                         )
             return items
         return items
+    def _maybe_truncate_tool_text(self, text: str) -> str:
+        if not text or len(text) <= DEFAULT_TEXT_CONTENT_LIMIT:
+            return text
+        logger.warning(
+            "Tool TextContent text length (%s) exceeds limit (%s), truncating",
+            len(text),
+            DEFAULT_TEXT_CONTENT_LIMIT,
+        )
+        return maybe_truncate(text, DEFAULT_TEXT_CONTENT_LIMIT)
     @classmethod
     def from_llm_chat_message(cls, message: LiteLLMMessage) -> "Message":
         """Convert a LiteLLMMessage (Chat Completions) to our Message class.

openhands/sdk/llm/options/responses_options.py CHANGED Viewed

@@ -15,15 +15,16 @@ def select_responses_options(
 ) -> dict[str, Any]:
     """Behavior-preserving extraction of _normalize_responses_kwargs."""
     # Apply defaults for keys that are not forced by policy
-    out = apply_defaults_if_absent(
-        user_kwargs,
-        {
-            "max_output_tokens": llm.max_output_tokens,
-        },
-    )
+    # Note: max_output_tokens is not supported in subscription mode
+    defaults = {}
+    if not llm.is_subscription:
+        defaults["max_output_tokens"] = llm.max_output_tokens
+    out = apply_defaults_if_absent(user_kwargs, defaults)
     # Enforce sampling/tool behavior for Responses path
-    out["temperature"] = 1.0
+    # Note: temperature is not supported in subscription mode
+    if not llm.is_subscription:
+        out["temperature"] = 1.0
     out["tool_choice"] = "auto"
     # If user didn't set extra_headers, propagate from llm config

openhands/sdk/llm/utils/model_features.py CHANGED Viewed

@@ -155,6 +155,7 @@ FORCE_STRING_SERIALIZER_MODELS: list[str] = [
 # in the message input
 SEND_REASONING_CONTENT_MODELS: list[str] = [
     "kimi-k2-thinking",
+    "kimi-k2.5",
     "openrouter/minimax-m2",  # MiniMax-M2 via OpenRouter (interleaved thinking)
     "deepseek/deepseek-reasoner",
 ]
@@ -181,6 +182,7 @@ def get_features(model: str) -> ModelFeatures:
 # Each entry: (pattern, default_temperature)
 DEFAULT_TEMPERATURE_MODELS: list[tuple[str, float]] = [
     ("kimi-k2-thinking", 1.0),
+    ("kimi-k2.5", 1.0),
 ]

openhands/sdk/llm/utils/verified_models.py CHANGED Viewed

@@ -1,5 +1,6 @@
 VERIFIED_OPENAI_MODELS = [
     "gpt-5.2",
+    "gpt-5.2-codex",
     "gpt-5.1",
     "gpt-5.1-codex-max",
     "gpt-5.1-codex",
@@ -46,12 +47,14 @@ VERIFIED_OPENHANDS_MODELS = [
     "claude-opus-4-5-20251101",
     "claude-sonnet-4-5-20250929",
     "gpt-5.2",
+    "gpt-5.2-codex",
     "gpt-5.1-codex-max",
     "gpt-5.1-codex",
     "gpt-5.1",
     "gemini-3-pro-preview",
     "deepseek-chat",
     "kimi-k2-thinking",
+    "kimi-k2.5",
     "devstral-medium-2512",
     "devstral-2512",
 ]

openhands/sdk/mcp/tool.py CHANGED Viewed

@@ -29,6 +29,9 @@ from openhands.sdk.utils.models import DiscriminatedUnionMixin
 logger = get_logger(__name__)
+# Default timeout for MCP tool execution in seconds
+MCP_TOOL_TIMEOUT_SECONDS = 300
 # NOTE: We don't define MCPToolAction because it
 # will be a pydantic BaseModel dynamically created from the MCP tool schema.
@@ -45,10 +48,17 @@ class MCPToolExecutor(ToolExecutor):
     tool_name: str
     client: MCPClient
+    timeout: float
-    def __init__(self, tool_name: str, client: MCPClient):
+    def __init__(
+        self,
+        tool_name: str,
+        client: MCPClient,
+        timeout: float = MCP_TOOL_TIMEOUT_SECONDS,
+    ):
         self.tool_name = tool_name
         self.client = client
+        self.timeout = timeout
     @observe(name="MCPToolExecutor.call_tool", span_type="TOOL")
     async def call_tool(self, action: MCPToolAction) -> MCPToolObservation:
@@ -83,9 +93,22 @@ class MCPToolExecutor(ToolExecutor):
         conversation: "LocalConversation | None" = None,  # noqa: ARG002
     ) -> MCPToolObservation:
         """Execute an MCP tool call."""
-        return self.client.call_async_from_sync(
-            self.call_tool, action=action, timeout=300
-        )
+        try:
+            return self.client.call_async_from_sync(
+                self.call_tool, action=action, timeout=self.timeout
+            )
+        except TimeoutError:
+            error_msg = (
+                f"MCP tool '{self.tool_name}' timed out after {self.timeout} seconds. "
+                "The tool server may be unresponsive or the operation is taking "
+                "too long. Consider retrying or using an alternative approach."
+            )
+            logger.error(error_msg)
+            return MCPToolObservation.from_text(
+                text=error_msg,
+                is_error=True,
+                tool_name=self.tool_name,
+            )
 _mcp_dynamic_action_type: dict[str, type[Schema]] = {}

openhands/sdk/secret/secrets.py CHANGED Viewed

@@ -92,7 +92,19 @@ class LookupSecret(SecretSource):
         return result
-_SECRET_HEADERS = ["AUTHORIZATION", "KEY", "SECRET"]
+# Patterns used for substring matching against header names (case-insensitive).
+# Headers containing any of these patterns will be redacted during serialization.
+# Examples: X-Access-Token, Cookie, Authorization, X-API-Key, X-API-Secret
+_SECRET_HEADERS = [
+    "AUTHORIZATION",
+    "COOKIE",
+    "CREDENTIAL",
+    "KEY",
+    "PASSWORD",
+    "SECRET",
+    "SESSION",
+    "TOKEN",
+]
 def _is_secret_header(key: str):

openhands/sdk/workspace/remote/base.py CHANGED Viewed

@@ -50,12 +50,17 @@ class RemoteWorkspace(RemoteWorkspaceMixin, BaseWorkspace):
         if client is None:
             # Configure reasonable timeouts for HTTP requests
             # - connect: 10 seconds to establish connection
-            # - read: 60 seconds to read response (for LLM operations)
+            # - read: 600 seconds (10 minutes) to read response (for LLM operations)
             # - write: 10 seconds to send request
             # - pool: 10 seconds to get connection from pool
-            timeout = httpx.Timeout(connect=10.0, read=60.0, write=10.0, pool=10.0)
+            timeout = httpx.Timeout(
+                connect=10.0, read=self.read_timeout, write=10.0, pool=10.0
+            )
             client = httpx.Client(
-                base_url=self.host, timeout=timeout, headers=self._headers
+                base_url=self.host,
+                timeout=timeout,
+                headers=self._headers,
+                limits=httpx.Limits(max_connections=self.max_connections),
             )
             self._client = client
         return client

openhands/sdk/workspace/remote/remote_workspace_mixin.py CHANGED Viewed

@@ -25,6 +25,15 @@ class RemoteWorkspaceMixin(BaseModel):
     working_dir: str = Field(
         description="The working directory for agent operations and tool execution."
     )
+    read_timeout: float = Field(
+        default=600.0,
+        description="Timeout in seconds for reading operations of httpx.Client.",
+    )
+    max_connections: int | None = Field(
+        default=None,
+        description="Maximum number of connections for httpx.Client. "
+        "None means no limit, useful for running many conversations in parallel.",
+    )
     def model_post_init(self, context: Any) -> None:
         # Set up remote host
@@ -87,26 +96,50 @@ class RemoteWorkspaceMixin(BaseModel):
             stdout_parts = []
             stderr_parts = []
             exit_code = None
+            last_order = -1  # Track highest order seen to fetch only new events
+            seen_event_ids: set[str] = set()  # Track seen IDs to detect duplicates
             while time.time() - start_time < timeout:
-                # Search for all events
+                # Search for new events (order > last_order)
+                params: dict[str, str | int] = {
+                    "command_id__eq": command_id,
+                    "sort_order": "TIMESTAMP",
+                    "limit": 100,
+                    "kind__eq": "BashOutput",
+                }
+                if last_order >= 0:
+                    params["order__gt"] = last_order
                 response = yield {
                     "method": "GET",
                     "url": f"{self.host}/api/bash/bash_events/search",
-                    "params": {
-                        "command_id__eq": command_id,
-                        "sort_order": "TIMESTAMP",
-                        "limit": 100,
-                    },
+                    "params": params,
                     "headers": self._headers,
                     "timeout": timeout,
                 }
                 response.raise_for_status()
                 search_result = response.json()
-                # Filter for BashOutput events for this command
+                # Process BashOutput events
                 for event in search_result.get("items", []):
                     if event.get("kind") == "BashOutput":
+                        # Check for duplicates - safety check in case caller
+                        # forgets to add kind__eq filter or API has a bug
+                        event_id = event.get("id")
+                        if event_id is not None:
+                            if event_id in seen_event_ids:
+                                raise RuntimeError(
+                                    f"Duplicate event received: {event_id}. "
+                                    "This should not happen with order__gt "
+                                    "filtering and kind filtering."
+                                )
+                            seen_event_ids.add(event_id)
+                        # Track the highest order we've seen
+                        event_order = event.get("order")
+                        if event_order is not None and event_order > last_order:
+                            last_order = event_order
                         if event.get("stdout"):
                             stdout_parts.append(event["stdout"])
                         if event.get("stderr"):

{openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: openhands-sdk
-Version: 1.10.0
+Version: 1.11.1
 Summary: OpenHands SDK - Core functionality for building AI agents
 Project-URL: Source, https://github.com/OpenHands/software-agent-sdk
 Project-URL: Homepage, https://github.com/OpenHands/software-agent-sdk

openhands-sdk 1.10.0__py3-none-any.whl → 1.11.1__py3-none-any.whl

openhands-sdk 1.10.0py3-none-any.whl → 1.11.1py3-none-any.whl