PyPI - openhands-sdk - Versions diffs - 1.10.0__py3-none-any.whl → 1.11.1__py3-none-any.whl - Mend

openhands-sdk 1.10.0py3-none-any.whl → 1.11.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

openhands/sdk/agent/agent.py +60 -27
openhands/sdk/agent/base.py +1 -1
openhands/sdk/context/condenser/base.py +36 -3
openhands/sdk/context/condenser/llm_summarizing_condenser.py +65 -1
openhands/sdk/context/prompts/templates/system_message_suffix.j2 +2 -1
openhands/sdk/context/skills/skill.py +15 -30
openhands/sdk/conversation/base.py +31 -0
openhands/sdk/conversation/conversation.py +5 -0
openhands/sdk/conversation/impl/local_conversation.py +63 -13
openhands/sdk/conversation/impl/remote_conversation.py +128 -13
openhands/sdk/conversation/state.py +19 -0
openhands/sdk/conversation/stuck_detector.py +18 -9
openhands/sdk/llm/__init__.py +16 -0
openhands/sdk/llm/auth/__init__.py +28 -0
openhands/sdk/llm/auth/credentials.py +157 -0
openhands/sdk/llm/auth/openai.py +762 -0
openhands/sdk/llm/llm.py +175 -20
openhands/sdk/llm/message.py +21 -11
openhands/sdk/llm/options/responses_options.py +8 -7
openhands/sdk/llm/utils/model_features.py +2 -0
openhands/sdk/llm/utils/verified_models.py +3 -0
openhands/sdk/mcp/tool.py +27 -4
openhands/sdk/secret/secrets.py +13 -1
openhands/sdk/workspace/remote/base.py +8 -3
openhands/sdk/workspace/remote/remote_workspace_mixin.py +40 -7
{openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/METADATA +1 -1
{openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/RECORD +29 -26
{openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/WHEEL +0 -0
{openhands_sdk-1.10.0.dist-info → openhands_sdk-1.11.1.dist-info}/top_level.txt +0 -0

openhands/sdk/conversation/impl/remote_conversation.py CHANGED Viewed

@@ -6,7 +6,8 @@ import threading
 import time
 import uuid
 from collections.abc import Mapping
-from typing import SupportsIndex, overload
+from queue import Empty, Queue
+from typing import TYPE_CHECKING, SupportsIndex, overload
 from urllib.parse import urlparse
 import httpx
@@ -14,6 +15,10 @@ import websockets
 from openhands.sdk.agent.base import AgentBase
 from openhands.sdk.conversation.base import BaseConversation, ConversationStateProtocol
+if TYPE_CHECKING:
+    from openhands.sdk.tool.schema import Action, Observation
 from openhands.sdk.conversation.conversation_stats import ConversationStats
 from openhands.sdk.conversation.events_list_base import EventsListBase
 from openhands.sdk.conversation.exceptions import (
@@ -555,6 +560,8 @@ class RemoteConversation(BaseConversation):
     _client: httpx.Client
     _hook_processor: HookEventProcessor | None
     _cleanup_initiated: bool
+    _terminal_status_queue: Queue[str]  # Thread-safe queue for terminal status from WS
+    delete_on_close: bool = False
     def __init__(
         self,
@@ -573,6 +580,7 @@ class RemoteConversation(BaseConversation):
             type[ConversationVisualizerBase] | ConversationVisualizerBase | None
         ) = DefaultConversationVisualizer,
         secrets: Mapping[str, SecretValue] | None = None,
+        delete_on_close: bool = False,
         **_: object,
     ) -> None:
         """Remote conversation proxy that talks to an agent server.
@@ -607,6 +615,7 @@ class RemoteConversation(BaseConversation):
         self._client = workspace.client
         self._hook_processor = None
         self._cleanup_initiated = False
+        self._terminal_status_queue: Queue[str] = Queue()
         should_create = conversation_id is None
         if conversation_id is not None:
@@ -706,8 +715,21 @@ class RemoteConversation(BaseConversation):
             # No visualization (visualizer is None)
             self._visualizer = None
+        # Add a callback that signals when run completes via WebSocket
+        # This ensures we wait for all events to be delivered before run() returns
+        def run_complete_callback(event: Event) -> None:
+            if isinstance(event, ConversationStateUpdateEvent):
+                if event.key == "execution_status":
+                    try:
+                        status = ConversationExecutionStatus(event.value)
+                        if status.is_terminal():
+                            self._terminal_status_queue.put(event.value)
+                    except ValueError:
+                        pass  # Unknown status value, ignore
         # Compose all callbacks into a single callback
-        composed_callback = BaseConversation.compose_callbacks(self._callbacks)
+        all_callbacks = self._callbacks + [run_complete_callback]
+        composed_callback = BaseConversation.compose_callbacks(all_callbacks)
         # Initialize WebSocket client for callbacks
         self._ws_client = WebSocketCallbackClient(
@@ -765,6 +787,7 @@ class RemoteConversation(BaseConversation):
             )
             self._hook_processor = HookEventProcessor(hook_manager=hook_manager)
             self._hook_processor.run_session_start()
+        self.delete_on_close = delete_on_close
     def _create_llm_completion_log_callback(self) -> ConversationCallbackType:
         """Create a callback that writes LLM completion logs to client filesystem."""
@@ -859,6 +882,14 @@ class RemoteConversation(BaseConversation):
         Raises:
             ConversationRunError: If the run fails or times out.
         """
+        # Drain any stale terminal status events from previous runs.
+        # This prevents stale events from causing early returns.
+        while True:
+            try:
+                self._terminal_status_queue.get_nowait()
+            except Empty:
+                break
         # Trigger a run on the server using the dedicated run endpoint.
         # Let the server tell us if it's already running (409), avoiding an extra GET.
         try:
@@ -886,10 +917,20 @@ class RemoteConversation(BaseConversation):
         poll_interval: float = 1.0,
         timeout: float = 1800.0,
     ) -> None:
-        """Poll the server until the conversation is no longer running.
+        """Wait for the conversation run to complete.
+        This method waits for the run to complete by listening for the terminal
+        status event via WebSocket. This ensures all events are delivered before
+        returning, avoiding the race condition where polling sees "finished"
+        status before WebSocket delivers the final events.
+        As a fallback, it also polls the server periodically. If the WebSocket
+        is delayed or disconnected, we return after multiple consecutive polls
+        show a terminal status, and reconcile events to catch any that were
+        missed via WebSocket.
         Args:
-            poll_interval: Time in seconds between status polls.
+            poll_interval: Time in seconds between status polls (fallback).
             timeout: Maximum time in seconds to wait.
         Raises:
@@ -898,6 +939,14 @@ class RemoteConversation(BaseConversation):
                 responses are retried until timeout.
         """
         start_time = time.monotonic()
+        consecutive_terminal_polls = 0
+        # Return after this many consecutive terminal polls (fallback for WS issues).
+        # We use 3 polls to balance latency vs reliability:
+        # - 1 poll could be a transient state during shutdown
+        # - 2 polls might still catch a race condition
+        # - 3 polls (with default 1s interval = 3s total) provides high confidence
+        #   that the run is truly complete while keeping fallback latency reasonable
+        TERMINAL_POLL_THRESHOLD = 3
         while True:
             elapsed = time.monotonic() - start_time
@@ -910,20 +959,57 @@ class RemoteConversation(BaseConversation):
                     ),
                 )
+            # Wait for either:
+            # 1. WebSocket delivers terminal status event (preferred)
+            # 2. Poll interval expires (fallback - check status via REST)
+            try:
+                ws_status = self._terminal_status_queue.get(timeout=poll_interval)
+                # Handle ERROR/STUCK states - raises ConversationRunError
+                self._handle_conversation_status(ws_status)
+                logger.info(
+                    "Run completed via WebSocket notification "
+                    "(status: %s, elapsed: %.1fs)",
+                    ws_status,
+                    elapsed,
+                )
+                return
+            except Empty:
+                pass  # Queue.get() timed out, fall through to REST polling
+            # Poll the server for status as a health check and fallback.
+            # This catches ERROR/STUCK states that need immediate attention,
+            # and provides a fallback if WebSocket is delayed/disconnected.
             try:
                 status = self._poll_status_once()
             except Exception as exc:
                 self._handle_poll_exception(exc)
+                consecutive_terminal_polls = 0  # Reset on error
             else:
-                if self._handle_conversation_status(status):
-                    logger.info(
-                        "Run completed with status: %s (elapsed: %.1fs)",
-                        status,
-                        elapsed,
-                    )
-                    return
-            time.sleep(poll_interval)
+                # Raises ConversationRunError for ERROR/STUCK states
+                self._handle_conversation_status(status)
+                # Track consecutive terminal polls as a fallback for WS issues.
+                # If WebSocket is delayed/disconnected, we return after multiple
+                # consecutive polls confirm the terminal status.
+                if status and ConversationExecutionStatus(status).is_terminal():
+                    consecutive_terminal_polls += 1
+                    if consecutive_terminal_polls >= TERMINAL_POLL_THRESHOLD:
+                        logger.info(
+                            "Run completed via REST fallback after %d consecutive "
+                            "terminal polls (status: %s, elapsed: %.1fs). "
+                            "Reconciling events...",
+                            consecutive_terminal_polls,
+                            status,
+                            elapsed,
+                        )
+                        # Reconcile events to catch any that were missed via WS.
+                        # This is only called in the fallback path, so it doesn't
+                        # add overhead in the common case where WS works.
+                        self._state.events.reconcile()
+                        return
+                else:
+                    consecutive_terminal_polls = 0
     def _poll_status_once(self) -> str | None:
         """Fetch the current execution status from the remote conversation."""
@@ -1113,6 +1199,28 @@ class RemoteConversation(BaseConversation):
         """
         _send_request(self._client, "POST", f"/api/conversations/{self._id}/condense")
+    def execute_tool(self, tool_name: str, action: "Action") -> "Observation":
+        """Execute a tool directly without going through the agent loop.
+        Note: This method is not yet supported for RemoteConversation.
+        Tool execution for remote conversations happens on the server side
+        during the normal agent loop.
+        Args:
+            tool_name: The name of the tool to execute
+            action: The action to pass to the tool executor
+        Raises:
+            NotImplementedError: Always, as this feature is not yet supported
+                for remote conversations.
+        """
+        raise NotImplementedError(
+            "execute_tool is not yet supported for RemoteConversation. "
+            "Tool execution for remote conversations happens on the server side "
+            "during the normal agent loop. Use LocalConversation for direct "
+            "tool execution."
+        )
     def close(self) -> None:
         """Close the conversation and clean up resources.
@@ -1134,6 +1242,13 @@ class RemoteConversation(BaseConversation):
             pass
         self._end_observability_span()
+        if self.delete_on_close:
+            try:
+                # trigger server-side delete_conversation to release resources
+                # like tmux sessions
+                _send_request(self._client, "DELETE", f"/api/conversations/{self.id}")
+            except Exception:
+                pass
     def __del__(self) -> None:
         try:

openhands/sdk/conversation/state.py CHANGED Viewed

@@ -45,6 +45,25 @@ class ConversationExecutionStatus(str, Enum):
     STUCK = "stuck"  # Conversation is stuck in a loop or unable to proceed
     DELETING = "deleting"  # Conversation is in the process of being deleted
+    def is_terminal(self) -> bool:
+        """Check if this status represents a terminal state.
+        Terminal states indicate the run has completed and the agent is no longer
+        actively processing. These are: FINISHED, ERROR, STUCK.
+        Note: IDLE is NOT a terminal state - it's the initial state of a conversation
+        before any run has started. Including IDLE would cause false positives when
+        the WebSocket delivers the initial state update during connection.
+        Returns:
+            True if this is a terminal status, False otherwise.
+        """
+        return self in (
+            ConversationExecutionStatus.FINISHED,
+            ConversationExecutionStatus.ERROR,
+            ConversationExecutionStatus.STUCK,
+        )
 class ConversationState(OpenHandsModel):
     # ===== Public, validated fields =====

openhands/sdk/conversation/stuck_detector.py CHANGED Viewed

@@ -15,6 +15,12 @@ from openhands.sdk.logger import get_logger
 logger = get_logger(__name__)
+# Maximum recent events to scan for stuck detection.
+# This window should be large enough to capture repetitive patterns
+# (4 repeats × 2 events per cycle = 8 events minimum, plus buffer for user messages)
+MAX_EVENTS_TO_SCAN_FOR_STUCK_DETECTION: int = 20
 class StuckDetector:
     """Detects when an agent is stuck in repetitive or unproductive patterns.
@@ -54,8 +60,14 @@ class StuckDetector:
         return self.thresholds.alternating_pattern
     def is_stuck(self) -> bool:
-        """Check if the agent is currently stuck."""
-        events = list(self.state.events)
+        """Check if the agent is currently stuck.
+        Note: To avoid materializing potentially large file-backed event histories,
+        only the last MAX_EVENTS_TO_SCAN_FOR_STUCK_DETECTION events are analyzed.
+        If a user message exists within this window, only events after it are checked.
+        Otherwise, all events in the window are analyzed.
+        """
+        events = list(self.state.events[-MAX_EVENTS_TO_SCAN_FOR_STUCK_DETECTION:])
         # Only look at history after the last user message
         last_user_msg_index = next(
@@ -66,11 +78,8 @@ class StuckDetector:
             ),
             -1,  # Default to -1 if no user message found
         )
-        if last_user_msg_index == -1:
-            logger.warning("No user message found in history, skipping stuck detection")
-            return False
-        events = events[last_user_msg_index + 1 :]
+        if last_user_msg_index != -1:
+            events = events[last_user_msg_index + 1 :]
         # Determine minimum events needed
         min_threshold = min(
@@ -253,10 +262,10 @@ class StuckDetector:
         return False
     def _is_stuck_context_window_error(self, _events: list[Event]) -> bool:
-        """Detects if we're stuck in a loop of context window errors.
+        """Detects if we are stuck in a loop of context window errors.
         This happens when we repeatedly get context window errors and try to trim,
-        but the trimming doesn't work, causing us to get more context window errors.
+        but the trimming does not work, causing us to get more context window errors.
         The pattern is repeated AgentCondensationObservation events without any other
         events between them.
         """

openhands/sdk/llm/__init__.py CHANGED Viewed

@@ -1,3 +1,9 @@
+from openhands.sdk.llm.auth import (
+    OPENAI_CODEX_MODELS,
+    CredentialStore,
+    OAuthCredentials,
+    OpenAISubscriptionAuth,
+)
 from openhands.sdk.llm.llm import LLM
 from openhands.sdk.llm.llm_registry import LLMRegistry, RegistryEvent
 from openhands.sdk.llm.llm_response import LLMResponse
@@ -22,11 +28,18 @@ from openhands.sdk.llm.utils.verified_models import VERIFIED_MODELS
 __all__ = [
+    # Auth
+    "CredentialStore",
+    "OAuthCredentials",
+    "OpenAISubscriptionAuth",
+    "OPENAI_CODEX_MODELS",
+    # Core
     "LLMResponse",
     "LLM",
     "LLMRegistry",
     "RouterLLM",
     "RegistryEvent",
+    # Messages
     "Message",
     "MessageToolCall",
     "TextContent",
@@ -35,10 +48,13 @@ __all__ = [
     "RedactedThinkingBlock",
     "ReasoningItemModel",
     "content_to_str",
+    # Streaming
     "LLMStreamChunk",
     "TokenCallbackType",
+    # Metrics
     "Metrics",
     "MetricsSnapshot",
+    # Models
     "VERIFIED_MODELS",
     "UNVERIFIED_MODELS_EXCLUDING_BEDROCK",
     "get_unverified_models",

openhands/sdk/llm/auth/__init__.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""Authentication module for LLM subscription-based access.
+This module provides OAuth-based authentication for LLM providers that support
+subscription-based access (e.g., ChatGPT Plus/Pro for OpenAI Codex models).
+"""
+from openhands.sdk.llm.auth.credentials import (
+    CredentialStore,
+    OAuthCredentials,
+)
+from openhands.sdk.llm.auth.openai import (
+    OPENAI_CODEX_MODELS,
+    OpenAISubscriptionAuth,
+    SupportedVendor,
+    inject_system_prefix,
+    transform_for_subscription,
+)
+__all__ = [
+    "CredentialStore",
+    "OAuthCredentials",
+    "OpenAISubscriptionAuth",
+    "OPENAI_CODEX_MODELS",
+    "SupportedVendor",
+    "inject_system_prefix",
+    "transform_for_subscription",
+]

openhands/sdk/llm/auth/credentials.py ADDED Viewed

@@ -0,0 +1,157 @@
+"""Credential storage and retrieval for OAuth-based LLM authentication."""
+from __future__ import annotations
+import json
+import os
+import time
+import warnings
+from pathlib import Path
+from typing import Literal
+from pydantic import BaseModel, Field
+from openhands.sdk.logger import get_logger
+logger = get_logger(__name__)
+def get_credentials_dir() -> Path:
+    """Get the directory for storing credentials.
+    Uses XDG_DATA_HOME if set, otherwise defaults to ~/.local/share/openhands.
+    """
+    return Path.home() / ".openhands" / "auth"
+class OAuthCredentials(BaseModel):
+    """OAuth credentials for subscription-based LLM access."""
+    type: Literal["oauth"] = "oauth"
+    vendor: str = Field(description="The vendor/provider (e.g., 'openai')")
+    access_token: str = Field(description="The OAuth access token")
+    refresh_token: str = Field(description="The OAuth refresh token")
+    expires_at: int = Field(
+        description="Unix timestamp (ms) when the access token expires"
+    )
+    def is_expired(self) -> bool:
+        """Check if the access token is expired."""
+        # Add 60 second buffer to avoid edge cases
+        # Add 60 second buffer to avoid edge cases where token expires during request
+        return self.expires_at < (int(time.time() * 1000) + 60_000)
+class CredentialStore:
+    """Store and retrieve OAuth credentials for LLM providers."""
+    def __init__(self, credentials_dir: Path | None = None):
+        """Initialize the credential store.
+        Args:
+            credentials_dir: Optional custom directory for storing credentials.
+                           Defaults to ~/.local/share/openhands/auth/
+        """
+        self._credentials_dir = credentials_dir or get_credentials_dir()
+        logger.info(f"Using credentials directory: {self._credentials_dir}")
+    @property
+    def credentials_dir(self) -> Path:
+        """Get the credentials directory, creating it if necessary."""
+        self._credentials_dir.mkdir(parents=True, exist_ok=True)
+        # Set directory permissions to owner-only (rwx------)
+        if os.name != "nt":
+            self._credentials_dir.chmod(0o700)
+        return self._credentials_dir
+    def _get_credentials_file(self, vendor: str) -> Path:
+        """Get the path to the credentials file for a vendor."""
+        return self.credentials_dir / f"{vendor}_oauth.json"
+    def get(self, vendor: str) -> OAuthCredentials | None:
+        """Get stored credentials for a vendor.
+        Args:
+            vendor: The vendor/provider name (e.g., 'openai')
+        Returns:
+            OAuthCredentials if found and valid, None otherwise
+        """
+        creds_file = self._get_credentials_file(vendor)
+        if not creds_file.exists():
+            return None
+        try:
+            with open(creds_file) as f:
+                data = json.load(f)
+            return OAuthCredentials.model_validate(data)
+        except (json.JSONDecodeError, ValueError):
+            # Invalid credentials file, remove it
+            creds_file.unlink(missing_ok=True)
+            return None
+    def save(self, credentials: OAuthCredentials) -> None:
+        """Save credentials for a vendor.
+        Args:
+            credentials: The OAuth credentials to save
+        """
+        creds_file = self._get_credentials_file(credentials.vendor)
+        with open(creds_file, "w") as f:
+            json.dump(credentials.model_dump(), f, indent=2)
+        # Set restrictive permissions (owner read/write only)
+        # Note: On Windows, NTFS ACLs should be used instead
+        if os.name != "nt":  # Not Windows
+            creds_file.chmod(0o600)
+        else:
+            warnings.warn(
+                "File permissions on Windows should be manually restricted",
+                stacklevel=2,
+            )
+    def delete(self, vendor: str) -> bool:
+        """Delete stored credentials for a vendor.
+        Args:
+            vendor: The vendor/provider name
+        Returns:
+            True if credentials were deleted, False if they didn't exist
+        """
+        creds_file = self._get_credentials_file(vendor)
+        if creds_file.exists():
+            creds_file.unlink()
+            return True
+        return False
+    def update_tokens(
+        self,
+        vendor: str,
+        access_token: str,
+        refresh_token: str | None,
+        expires_in: int,
+    ) -> OAuthCredentials | None:
+        """Update tokens for an existing credential.
+        Args:
+            vendor: The vendor/provider name
+            access_token: New access token
+            refresh_token: New refresh token (if provided)
+            expires_in: Token expiry in seconds
+        Returns:
+            Updated credentials, or None if no existing credentials found
+        """
+        existing = self.get(vendor)
+        if existing is None:
+            return None
+        updated = OAuthCredentials(
+            vendor=vendor,
+            access_token=access_token,
+            refresh_token=refresh_token or existing.refresh_token,
+            expires_at=int(time.time() * 1000) + (expires_in * 1000),
+        )
+        self.save(updated)
+        return updated

openhands-sdk 1.10.0__py3-none-any.whl → 1.11.1__py3-none-any.whl

openhands-sdk 1.10.0py3-none-any.whl → 1.11.1py3-none-any.whl