PyPI - openhands-agent-server - Versions diffs - 1.24.0__tar.gz → 1.26.0__tar.gz - Mend

openhands-agent-server 1.24.0tar.gz → 1.26.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{openhands_agent_server-1.24.0 → openhands_agent_server-1.26.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: openhands-agent-server
-Version: 1.24.0
+Version: 1.26.0
 Summary: OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent
 Project-URL: Source, https://github.com/OpenHands/software-agent-sdk
 Project-URL: Homepage, https://github.com/OpenHands/software-agent-sdk

{openhands_agent_server-1.24.0 → openhands_agent_server-1.26.0}/openhands/agent_server/bash_service.py RENAMED Viewed

@@ -41,11 +41,13 @@ class BashEventService:
         self.bash_events_dir.mkdir(parents=True, exist_ok=True)
     def _timestamp_to_str(self, timestamp: datetime) -> str:
-        result = timestamp.strftime("%Y%m%d%H%M%S")
-        return result
+        # Include microseconds so filename-based ordering reflects emission
+        # order for sub-second bursts (e.g. fast `yes`-style floods that
+        # emit several BashOutput chunks in the same wall-clock second).
+        return timestamp.strftime("%Y%m%d%H%M%S%f")
     def _get_event_filename(self, event: BashEventBase) -> str:
-        """Generate filename using YYYYMMDDHHMMSS_eventId_actionId format."""
+        """Generate filename using YYYYMMDDHHMMSSffffff_eventId_actionId format."""
         result = [self._timestamp_to_str(event.timestamp), event.kind]
         command_id = getattr(event, "command_id", None)
         if command_id:

{openhands_agent_server-1.24.0 → openhands_agent_server-1.26.0}/openhands/agent_server/docker/Dockerfile RENAMED Viewed

@@ -172,7 +172,7 @@ RUN set -ux; \
         PATH="$ACP_NODE_DIR/bin:$PATH"; \
         if "$ACP_NODE_DIR/bin/npm" install -g \
             @agentclientprotocol/claude-agent-acp@0.30.0 \
-            @zed-industries/codex-acp@0.11.1 \
+            @zed-industries/codex-acp@0.15.0 \
             @google/gemini-cli@0.38.0; then \
           # Create wrappers in /usr/local/bin that prepend ACP's Node 22 to PATH.
           # This ensures the ACP binary's #!/usr/bin/env node shebang resolves

{openhands_agent_server-1.24.0 → openhands_agent_server-1.26.0}/openhands/agent_server/event_service.py RENAMED Viewed

@@ -18,8 +18,13 @@ from openhands.agent_server.models import (
 )
 from openhands.agent_server.pub_sub import PubSub, Subscriber
 from openhands.sdk import LLM, AgentBase, Event, Message, get_logger
+from openhands.sdk.agent import ACPAgent
 from openhands.sdk.conversation.base import BaseConversation
-from openhands.sdk.conversation.impl.local_conversation import LocalConversation
+from openhands.sdk.conversation.impl.local_conversation import (
+    ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID,
+    ACP_SUPERSEDE_INFLIGHT_PROMPT,
+    LocalConversation,
+)
 from openhands.sdk.conversation.response_utils import get_agent_final_response
 from openhands.sdk.conversation.secret_registry import SecretValue
 from openhands.sdk.conversation.state import (
@@ -71,6 +76,15 @@ class EventService:
     # Set when a send_message(run=True) is rejected because a run is still
     # wrapping up; consumed by _run_and_publish to re-run the stranded message.
     _rerun_requested: bool = field(default=False, init=False)
+    # Set only for the internal ACP interrupt/restart path triggered by a new
+    # send_message(run=True). Explicit user pause/interrupt clears it so user
+    # stop intent wins over an earlier automatic restart request.
+    _acp_internal_rerun_requested: bool = field(default=False, init=False)
+    # Incremented for explicit user pause/interrupt requests. Internal ACP
+    # supersede restarts compare this generation after their interrupt drains
+    # so a later Stop/Pause cannot be overwritten by an automatic restart.
+    _explicit_interrupt_generation: int = field(default=0, init=False)
+    _closing: bool = field(default=False, init=False)
     _run_lock: asyncio.Lock = field(default_factory=asyncio.Lock, init=False)
     _callback_wrapper: AsyncCallbackWrapper | None = field(default=None, init=False)
     _lease: ConversationLease | None = field(default=None, init=False)
@@ -372,6 +386,23 @@ class EventService:
         loop = asyncio.get_running_loop()
         return await loop.run_in_executor(None, self._get_execution_status_sync)
+    def _mark_error_status_sync(self) -> None:
+        """Force the conversation into ERROR status (idempotent backstop).
+        Called when a run task raised before the conversation could set its own
+        ERROR status — e.g. an exception in ``init_state``, which executes
+        outside ``run()``/``arun()``'s try-block (via ``_ensure_agent_ready()``).
+        Without this, the run's finally would publish a stale non-error status
+        (IDLE/RUNNING) and the failure would look like a clean stop. No-op once
+        the status is already ERROR. Best-effort: never raises (the caller is an
+        error handler).
+        """
+        if not self._conversation:
+            return
+        with self._conversation._state as state:
+            if state.execution_status != ConversationExecutionStatus.ERROR:
+                state.execution_status = ConversationExecutionStatus.ERROR
     def _create_state_update_event_sync(self) -> ConversationStateUpdateEvent:
         if not self._conversation:
             raise ValueError("inactive_service")
@@ -419,11 +450,28 @@ class EventService:
     async def send_message(self, message: Message, run: bool = False):
         if not self._conversation:
             raise ValueError("inactive_service")
+        explicit_interrupt_generation = self._explicit_interrupt_generation
         loop = asyncio.get_running_loop()
         await loop.run_in_executor(None, self._conversation.send_message, message)
         if run:
+            if self._explicit_interrupt_generation != explicit_interrupt_generation:
+                return
+            (
+                did_mark_acp_prompt_superseded,
+                active_acp_prompt_has_latest_message,
+            ) = await self._mark_running_acp_prompt_superseded()
+            interrupted_acp = False
+            if did_mark_acp_prompt_superseded:
+                self._acp_internal_rerun_requested = True
+                interrupted_acp = True
+                await self.interrupt(internal_acp_rerun=True)
+                if self._explicit_interrupt_generation != explicit_interrupt_generation:
+                    return
             try:
-                await self.run()
+                await self.run(
+                    acp_internal_rerun_generation=explicit_interrupt_generation
+                )
+                self._acp_internal_rerun_requested = False
             except ValueError as e:
                 # run() refused. If a run is still wrapping up (its
                 # wait_for_pending tail), the message we just appended won't be
@@ -433,8 +481,53 @@ class EventService:
                 # is what keeps a deliberate run=False append, or an IDLE reached
                 # via another path, from triggering an unwanted run.
                 # "inactive_service" is terminal and must not re-arm.
-                if str(e) == "conversation_already_running":
+                if (
+                    str(e) == "conversation_already_running"
+                    and not active_acp_prompt_has_latest_message
+                ):
                     self._rerun_requested = True
+                    if interrupted_acp:
+                        self._acp_internal_rerun_requested = True
+    def _mark_running_acp_prompt_superseded_sync(self) -> tuple[bool, bool]:
+        """Mark the currently running ACP prompt superseded if needed.
+        The tuple is ``(did_mark_superseded, active_prompt_has_latest_message)``.
+        If the running ACP prompt has already advanced to the newly appended
+        user message, interrupting it would cancel the replacement prompt and
+        strand that message behind the persisted cursor.
+        """
+        if not self._conversation:
+            return (False, False)
+        if self._run_task is None or self._run_task.done():
+            return (False, False)
+        if not isinstance(self._conversation.agent, ACPAgent):
+            return (False, False)
+        with self._conversation._state as state:
+            if state.execution_status != ConversationExecutionStatus.RUNNING:
+                return (False, False)
+            inflight_prompt_user_message_id = state.agent_state.get(
+                ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID
+            )
+            last_user_message_id = state.last_user_message_id
+            if inflight_prompt_user_message_id is None or last_user_message_id is None:
+                return (False, False)
+            active_prompt_has_latest_message = (
+                inflight_prompt_user_message_id == last_user_message_id
+            )
+            if active_prompt_has_latest_message:
+                return (False, True)
+            state.agent_state = {
+                **state.agent_state,
+                ACP_SUPERSEDE_INFLIGHT_PROMPT: True,
+            }
+            return (True, False)
+    async def _mark_running_acp_prompt_superseded(self) -> tuple[bool, bool]:
+        loop = asyncio.get_running_loop()
+        return await loop.run_in_executor(
+            None, self._mark_running_acp_prompt_superseded_sync
+        )
     async def subscribe_to_events(self, subscriber: Subscriber[Event]) -> UUID:
         subscriber_id = self._pub_sub.subscribe(subscriber)
@@ -624,41 +717,53 @@ class EventService:
             self._pub_sub, loop=asyncio.get_running_loop()
         )
-        # Only wire token streaming if at least one LLM has stream=True.
-        # The LLM silently ignores on_token when stream is off, but skipping
-        # the wiring lets us log the decision so operators can tell from a
-        # log line whether deltas will flow.
-        streaming_enabled = any(llm.stream for llm in agent.get_all_llms())
+        # Only wire token streaming for agents that can actually emit token
+        # callbacks. SDK LLM agents need stream=True, while ACP agents emit
+        # AgentMessageChunk text through their bridge without exposing an LLM.
+        streaming_enabled = isinstance(agent, ACPAgent) or any(
+            llm.stream for llm in agent.get_all_llms()
+        )
         logger.debug(
             "Token streaming: %s",
             "enabled" if streaming_enabled else "disabled (no LLM has stream=True)",
         )
-        def _token_streaming_callback(chunk: LLMStreamChunk) -> None:
+        def _publish_stream_delta(
+            content: str | None = None,
+            reasoning_content: str | None = None,
+        ) -> None:
             # Published directly to _pub_sub (not via _callback_wrapper) so
             # deltas reach subscribers but are NOT persisted to
             # ConversationState.events. See StreamingDeltaEvent docstring.
             if not self._main_loop or not self._main_loop.is_running():
                 return
+            # Use `is not None` rather than truthiness: some providers
+            # emit legitimate empty-string chunks at stream boundaries
+            # (e.g. after a tool call) that we still want to forward.
+            if content is None and reasoning_content is None:
+                return
+            event = StreamingDeltaEvent(
+                content=content,
+                reasoning_content=reasoning_content,
+            )
+            with suppress(RuntimeError):  # main loop already closed during teardown
+                asyncio.run_coroutine_threadsafe(self._pub_sub(event), self._main_loop)
+        def _token_streaming_callback(chunk: LLMStreamChunk | str) -> None:
+            if isinstance(chunk, str):
+                _publish_stream_delta(content=chunk)
+                return
             for choice in chunk.choices or ():
                 delta = choice.delta
                 if delta is None:
                     continue
                 content = getattr(delta, "content", None)
                 reasoning = getattr(delta, "reasoning_content", None)
-                # Use `is not None` rather than truthiness: some providers
-                # emit legitimate empty-string chunks at stream boundaries
-                # (e.g. after a tool call) that we still want to forward.
-                if content is None and reasoning is None:
-                    continue
-                event = StreamingDeltaEvent(
+                _publish_stream_delta(
                     content=content if isinstance(content, str) else None,
                     reasoning_content=reasoning if isinstance(reasoning, str) else None,
                 )
-                with suppress(RuntimeError):
-                    asyncio.run_coroutine_threadsafe(
-                        self._pub_sub(event), self._main_loop
-                    )
         conversation = LocalConversation(
             agent=agent,
@@ -733,7 +838,7 @@ class EventService:
         # Publish initial state update
         await self._publish_state_update()
-    async def run(self):
+    async def run(self, acp_internal_rerun_generation: int | None = None):
         """Run the conversation asynchronously in the background.
         This method starts the conversation run in a background task and returns
@@ -747,7 +852,7 @@ class EventService:
         Raises:
             ValueError: If the service is inactive or conversation is already running.
         """
-        if not self._conversation:
+        if not self._conversation or self._closing:
             raise ValueError("inactive_service")
         # Use lock to make check-and-set atomic, preventing race conditions
@@ -757,6 +862,13 @@ class EventService:
                 == ConversationExecutionStatus.RUNNING
             ):
                 raise ValueError("conversation_already_running")
+            if self._closing:
+                raise ValueError("inactive_service")
+            if (
+                acp_internal_rerun_generation is not None
+                and self._explicit_interrupt_generation != acp_internal_rerun_generation
+            ):
+                return
             # Check if there's already a running task
             if self._run_task is not None and not self._run_task.done():
@@ -798,6 +910,13 @@ class EventService:
                         await loop.run_in_executor(self._run_executor, conversation.run)
                 except Exception:
                     logger.exception("Error during conversation run")
+                    # Backstop: a run that raised before reaching its own error
+                    # handling (e.g. an ACP cold-start failure in init_state,
+                    # which runs outside run()/arun()'s try-block) can leave the
+                    # status at IDLE/RUNNING. Force ERROR so the finally's
+                    # _publish_state_update() surfaces the failure instead of a
+                    # misleading non-error state.
+                    await loop.run_in_executor(None, self._mark_error_status_sync)
                 finally:
                     # Wait for all pending events to be published via
                     # AsyncCallbackWrapper before publishing the final state update.
@@ -817,21 +936,53 @@ class EventService:
                     # wrapping up. A send_message(run=True) that arrived during
                     # the wait_for_pending() tail above had its run() rejected as
                     # "conversation_already_running" and suppressed, setting
-                    # _rerun_requested. Honor it only while the conversation is
-                    # still IDLE — i.e. that message is genuinely pending. If the
-                    # run loop was still alive it already absorbed the message
-                    # (LocalConversation.run() keeps looping on FINISHED) and we
-                    # are FINISHED here, so the IDLE guard avoids a redundant run.
-                    # A deliberate run=False append, or an IDLE reached via
-                    # another path, never sets the flag.
-                    if self._rerun_requested:
-                        self._rerun_requested = False
-                        if (
-                            await self._get_execution_status()
-                            == ConversationExecutionStatus.IDLE
-                        ):
-                            with suppress(ValueError):
-                                await self.run()
+                    # _rerun_requested. Honor it while the conversation is IDLE
+                    # (pending input) or internally ACP-interrupted PAUSED (the
+                    # old task finished its interrupt before the replacement run
+                    # could start). Explicit user pause/interrupt clears the
+                    # internal ACP flag, so user stop intent wins over an older
+                    # automatic restart request. If the run loop was still alive
+                    # it already absorbed the message and we are FINISHED here,
+                    # so the guard avoids a redundant run. A deliberate
+                    # run=False append, or an IDLE reached via another path,
+                    # never sets the flag.
+                    rerun_requested = self._rerun_requested
+                    acp_internal_rerun_requested = self._acp_internal_rerun_requested
+                    rerun_generation = self._explicit_interrupt_generation
+                    self._rerun_requested = False
+                    self._acp_internal_rerun_requested = False
+                    if rerun_requested:
+                        status = await self._get_execution_status()
+                        rerun_generation_still_valid = (
+                            self._explicit_interrupt_generation == rerun_generation
+                        )
+                        acp_internal_rerun_still_valid = (
+                            acp_internal_rerun_requested
+                            and rerun_generation_still_valid
+                        )
+                        should_restart = rerun_generation_still_valid and (
+                            status == ConversationExecutionStatus.IDLE
+                            or (
+                                acp_internal_rerun_still_valid
+                                and status == ConversationExecutionStatus.PAUSED
+                                and isinstance(conversation.agent, ACPAgent)
+                            )
+                        )
+                        if should_restart:
+                            try:
+                                await self.run(
+                                    acp_internal_rerun_generation=rerun_generation
+                                    if acp_internal_rerun_still_valid
+                                    else None
+                                )
+                            except ValueError as e:
+                                if str(e) == "conversation_already_running":
+                                    self._rerun_requested = True
+                                    self._acp_internal_rerun_requested = (
+                                        acp_internal_rerun_requested
+                                    )
+                                else:
+                                    raise
             # Create task but don't await it - runs in background
             self._run_task = asyncio.create_task(_run_and_publish())
@@ -862,12 +1013,15 @@ class EventService:
     async def pause(self):
         if self._conversation:
+            self._explicit_interrupt_generation += 1
+            self._rerun_requested = False
+            self._acp_internal_rerun_requested = False
             loop = asyncio.get_running_loop()
             await loop.run_in_executor(None, self._conversation.pause)
             # Publish state update after pause to ensure stats are updated
             await self._publish_state_update()
-    async def interrupt(self):
+    async def interrupt(self, *, internal_acp_rerun: bool = False):
         """Immediately cancel an in-flight async LLM call.
         Delegates to :meth:`LocalConversation.interrupt` which cancels the
@@ -875,12 +1029,18 @@ class EventService:
         back to :meth:`pause`.
         """
         if self._conversation:
+            if not internal_acp_rerun:
+                self._explicit_interrupt_generation += 1
+                self._rerun_requested = False
+                self._acp_internal_rerun_requested = False
             self._conversation.interrupt()
             # Wait for the run task to finish so we can publish the final
-            # state update (PAUSED + InterruptEvent) cleanly.
+            # state update (PAUSED + InterruptEvent) cleanly. The shield keeps
+            # the 5s timeout from force-cancelling a cleanup that still needs
+            # to drain its ACP prompt/cancel handshake.
             if self._run_task is not None and not self._run_task.done():
                 with suppress(Exception):
-                    await asyncio.wait_for(self._run_task, timeout=5.0)
+                    await asyncio.wait_for(asyncio.shield(self._run_task), timeout=5.0)
                 # Only clear _run_task if it actually finished; if
                 # wait_for timed out the task may still be running and
                 # clearing prematurely would allow a second run() to
@@ -940,6 +1100,10 @@ class EventService:
         await self.save_meta()
     async def close(self):
+        self._closing = True
+        self._explicit_interrupt_generation += 1
+        self._rerun_requested = False
+        self._acp_internal_rerun_requested = False
         if self._lease_task is not None:
             self._lease_task.cancel()
             with suppress(asyncio.CancelledError):

{openhands_agent_server-1.24.0 → openhands_agent_server-1.26.0}/openhands/agent_server/mcp_router.py RENAMED Viewed

@@ -6,9 +6,12 @@ to settings, where a misconfiguration would otherwise surface only at
 conversation start (and there manifest as a noisy traceback that aborts
 agent initialization).
-The endpoint is intentionally side-effect-free: it spins up the MCP
-connection, lists the advertised tools, then tears the connection down.
-It never mutates server state or touches stored settings.
+The endpoint never mutates server state or touches stored settings: it
+spins up the MCP connection, lists the advertised tools, optionally invokes
+one caller-chosen tool (``tool_call``), then tears the connection down.
+The optional tool call exists because listing tools does not exercise the
+credentials many servers only use inside tool handlers (e.g. the Slack MCP
+server starts fine with a bogus token); callers must pick a read-only tool.
 """
 from __future__ import annotations
@@ -16,12 +19,16 @@ from __future__ import annotations
 import asyncio
 from typing import Annotated, Any, Literal
-from fastapi import APIRouter
+import mcp.types
+from fastapi import APIRouter, Request
 from pydantic import BaseModel, Field, model_validator
+from openhands.agent_server._secrets_exposure import get_cipher
 from openhands.sdk.logger import get_logger
 from openhands.sdk.mcp import create_mcp_tools
 from openhands.sdk.mcp.exceptions import MCPError, MCPTimeoutError
+from openhands.sdk.utils.cipher import Cipher
+from openhands.sdk.utils.pydantic_secrets import decrypt_str_with_cipher_or_keep
 logger = get_logger(__name__)
@@ -85,6 +92,22 @@ class _RemoteMCPServerSpec(BaseModel):
         return out
+class MCPToolCallSpec(BaseModel):
+    """A single tool invocation to run as part of the connection test.
+    Listing tools does not exercise the credentials many servers only use
+    inside tool handlers, so callers can name one tool to invoke after the
+    listing succeeds. Callers are responsible for choosing a read-only tool;
+    the endpoint executes it verbatim.
+    """
+    name: str = Field(..., min_length=1, description="Name of the tool to invoke")
+    arguments: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Arguments passed to the tool unchanged.",
+    )
 class MCPTestRequest(BaseModel):
     """Body for ``POST /api/mcp/test``."""
@@ -108,6 +131,15 @@ class MCPTestRequest(BaseModel):
         le=120,
         description="Seconds to wait for connection + tools/list to complete.",
     )
+    tool_call: MCPToolCallSpec | None = Field(
+        default=None,
+        description=(
+            "Optional read-only tool to invoke after listing succeeds, so "
+            "callers can verify credentials the server only exercises on "
+            "tool invocation. Its outcome is reported verbatim in "
+            "`tool_result` without affecting `ok`."
+        ),
+    )
     @model_validator(mode="after")
     def _strip_name(self) -> MCPTestRequest:
@@ -117,6 +149,19 @@ class MCPTestRequest(BaseModel):
         return self
+class MCPToolCallResult(BaseModel):
+    """Verbatim outcome of the requested ``tool_call``.
+    The endpoint stays provider-neutral: many servers report upstream
+    failures (e.g. Slack's ``{"ok": false, "error": "invalid_auth"}``)
+    as ordinary text content with ``isError`` unset, so interpreting the
+    payload is the caller's job.
+    """
+    is_error: bool = Field(description="The MCP-level isError flag of the result.")
+    text: str = Field(description="Concatenated text content of the result.")
 class MCPTestSuccess(BaseModel):
     """Response when the candidate server connects and lists its tools."""
@@ -125,6 +170,10 @@ class MCPTestSuccess(BaseModel):
         default_factory=list,
         description="Names of tools advertised by the MCP server.",
     )
+    tool_result: MCPToolCallResult | None = Field(
+        default=None,
+        description=("Outcome of the requested `tool_call`, when one was supplied."),
+    )
 class MCPTestFailure(BaseModel):
@@ -151,18 +200,81 @@ MCPTestResponse = MCPTestSuccess | MCPTestFailure
 # ---------------------------------------------------------------------------
-def _server_to_fastmcp_dict(spec: _StdioMCPServerSpec | _RemoteMCPServerSpec) -> dict:
+def _decrypt_mapping(cipher: Cipher | None, mapping: dict[str, str]) -> dict[str, str]:
+    """Decrypt Fernet-encrypted values round-tripped from settings.
+    The GUI fetches stored settings with ``X-Expose-Secrets: encrypted`` and
+    forwards the ciphertext unchanged so the edit flow can test the *real*
+    stored credentials without ever seeing them. Plaintext values (the
+    common case: freshly typed input) pass through untouched.
+    """
+    if cipher is None:
+        return dict(mapping)
+    return {
+        key: decrypt_str_with_cipher_or_keep(
+            cipher, value, description="MCP test env/headers"
+        )
+        for key, value in mapping.items()
+    }
+def _server_to_fastmcp_dict(
+    spec: _StdioMCPServerSpec | _RemoteMCPServerSpec, cipher: Cipher | None
+) -> dict:
     if isinstance(spec, _StdioMCPServerSpec):
         out: dict[str, Any] = {"command": spec.command, "args": list(spec.args)}
         if spec.env:
-            out["env"] = dict(spec.env)
+            out["env"] = _decrypt_mapping(cipher, spec.env)
         if spec.cwd:
             out["cwd"] = spec.cwd
         return out
-    return spec.to_fastmcp_dict()
+    remote = spec.to_fastmcp_dict()
+    if "headers" in remote:
+        remote["headers"] = _decrypt_mapping(cipher, remote["headers"])
+    return remote
+def _run_tool_call(
+    client: Any, spec: MCPToolCallSpec, tool_names: list[str], timeout: float
+) -> MCPToolCallResult:
+    """Invoke the requested tool on the connected client.
+    Uses ``call_tool_mcp`` (not ``call_tool``, which raises on ``isError``)
+    so in-band failures come back as data -- mirrors ``MCPToolExecutor``.
+    A timeout is reported as an errored result rather than failing the
+    whole test: the server did connect and list, which is still useful.
+    """
+    if spec.name not in tool_names:
+        return MCPToolCallResult(
+            is_error=True,
+            text=(
+                f"Tool {spec.name!r} not advertised by server "
+                f"(available: {', '.join(tool_names) or 'none'})"
+            ),
+        )
+    try:
+        result: mcp.types.CallToolResult = client.call_async_from_sync(
+            client.call_tool_mcp,
+            name=spec.name,
+            arguments=spec.arguments,
+            timeout=timeout,
+        )
+    except TimeoutError:
+        return MCPToolCallResult(
+            is_error=True,
+            text=f"Tool {spec.name!r} call timed out after {timeout} seconds",
+        )
+    text = "\n".join(
+        block.text
+        for block in result.content
+        if isinstance(block, mcp.types.TextContent)
+    )
+    return MCPToolCallResult(is_error=bool(result.isError), text=text)
-def _probe_mcp_server(request: MCPTestRequest) -> MCPTestResponse:
+def _probe_mcp_server(
+    request: MCPTestRequest, cipher: Cipher | None
+) -> MCPTestResponse:
     """Synchronous probe -- safe to run inside ``run_in_executor``.
     ``create_mcp_tools`` already runs its own event loop in a background
@@ -171,14 +283,22 @@ def _probe_mcp_server(request: MCPTestRequest) -> MCPTestResponse:
     threadpool first.
     """
-    config = {"mcpServers": {request.name: _server_to_fastmcp_dict(request.server)}}
+    config = {
+        "mcpServers": {request.name: _server_to_fastmcp_dict(request.server, cipher)}
+    }
     try:
         # ``create_mcp_tools`` returns a client that owns a background loop
         # and a (possibly long-lived) subprocess. Use the context-manager
         # form so we always tear it down, even when listing succeeded.
         with create_mcp_tools(config, timeout=request.timeout) as client:
-            return MCPTestSuccess(tools=[tool.name for tool in client.tools])
+            tool_names = [tool.name for tool in client.tools]
+            tool_result: MCPToolCallResult | None = None
+            if request.tool_call is not None:
+                tool_result = _run_tool_call(
+                    client, request.tool_call, tool_names, request.timeout
+                )
+            return MCPTestSuccess(tools=tool_names, tool_result=tool_result)
     except MCPTimeoutError as exc:
         logger.info("MCP test timed out for server %r: %s", request.name, exc)
         return MCPTestFailure(error=str(exc), error_kind="timeout")
@@ -215,11 +335,21 @@ def _probe_mcp_server(request: MCPTestRequest) -> MCPTestResponse:
         "Attempt to connect to a candidate MCP server and list its tools, "
         "without persisting any settings. Useful for validating user input "
         "in 'add MCP server' flows before storing the config. "
+        "Optionally invokes one caller-chosen (read-only) tool via "
+        "`tool_call` and reports its outcome in `tool_result`, so callers "
+        "can verify credentials that are only exercised on tool invocation. "
+        "Encrypted `env`/`headers` values round-tripped from settings are "
+        "decrypted before the connection is attempted. "
         "Returns 200 with `ok=false` for connection / timeout failures "
         "(those are expected during validation, not server errors)."
     ),
 )
-async def test_mcp_server(request: MCPTestRequest) -> MCPTestResponse:
+async def test_mcp_server(
+    request: MCPTestRequest, http_request: Request
+) -> MCPTestResponse:
     """Probe a single MCP server config and report whether it works."""
+    # Resolve the cipher here: the threadpool function below must not
+    # reach back into ``http_request.app.state``.
+    cipher = get_cipher(http_request)
     loop = asyncio.get_running_loop()
-    return await loop.run_in_executor(None, _probe_mcp_server, request)
+    return await loop.run_in_executor(None, _probe_mcp_server, request, cipher)

{openhands_agent_server-1.24.0 → openhands_agent_server-1.26.0}/openhands/agent_server/persistence/models.py RENAMED Viewed

@@ -32,23 +32,67 @@ from openhands.sdk.utils.pydantic_secrets import serialize_secret, validate_secr
 class SettingsUpdatePayload(TypedDict, total=False):
-    """Typed payload for PersistedSettings.update() method."""
+    """Typed payload for PersistedSettings.update() method.
+    The ``*_diff`` dicts are deep-merged via :func:`_deep_merge`: nested
+    objects merge recursively, and a ``None`` value *inside a nested map*
+    deletes that entry (the "unset" primitive) — e.g. send
+    ``{"acp_env": {"NAME": None}}`` to drop one env-var without re-sending the
+    whole map. A ``None`` on a top-level *field* is not treated as delete; it
+    flows to validation as before.
+    """
     agent_settings_diff: dict[str, Any]
     conversation_settings_diff: dict[str, Any]
     active_profile: str | None
-def _deep_merge(base: dict[str, Any], overlay: dict[str, Any]) -> dict[str, Any]:
-    """Recursively merge overlay dict into base dict.
-    For nested dicts, merges recursively. For other types, overlay wins.
+def _deep_merge(
+    base: dict[str, Any],
+    overlay: dict[str, Any],
+    *,
+    unset_nulls: bool = False,
+) -> dict[str, Any]:
+    """Recursively merge ``overlay`` into ``base``.
+    - Nested dicts are merged recursively.
+    - **Inside a nested map** a ``None`` value **removes** that key — the
+      "unset" primitive a plain deep-merge lacks. It lets a
+      ``PATCH /api/settings`` diff delete a single map entry (one
+      ``acp_env`` / MCP ``env`` key) without round-tripping the whole map::
+          {"agent_settings_diff": {"acp_env": {"STALE_KEY": null}}}
+    - **At the top level** (a settings *field* like ``confirmation_mode`` or
+      ``acp_env`` itself) a ``None`` is left as-is and flows to model
+      validation — exactly as before this primitive existed. So a stray
+      ``{"confirmation_mode": null}`` still fails loudly (422) instead of
+      silently resetting a field to its default. This scoping is deliberate:
+      ``unset`` is for *entries within* a map, not for nulling whole fields.
+    - For any other scalar/list value, the overlay wins.
+    ``unset_nulls`` is ``False`` for the top-level call and ``True`` for every
+    recursive (nested) call — that's what draws the field-vs-entry line above.
+    Corner case: a key **absent from** ``base`` whose overlay value is a dict
+    is assigned wholesale (no recursion), so any ``null`` entries inside that
+    dict are stored as-is rather than treated as deletes. This is intentional
+    — you can't delete an entry from a map that doesn't exist yet — but it
+    means "initialize a new map and unset a key within it" in one diff won't
+    strip the null; downstream validation handles the resulting value.
     """
     result = dict(base)
     for key, value in overlay.items():
-        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
-            result[key] = _deep_merge(result[key], value)
+        if value is None and unset_nulls:
+            # Nested map entry: a null member removes the key (no-op if absent).
+            result.pop(key, None)
+        elif (
+            key in result and isinstance(result[key], dict) and isinstance(value, dict)
+        ):
+            result[key] = _deep_merge(result[key], value, unset_nulls=True)
         else:
+            # Top-level null (unset_nulls=False) falls here: set as-is and let
+            # model validation decide (preserves pre-existing behavior).
             result[key] = value
     return result
@@ -102,6 +146,11 @@ class PersistedSettings(BaseModel):
         apply any schema migrations if the incoming diff contains an older
         schema version.
+        When ``agent_kind`` changes in the diff, the update is treated as a
+        variant replacement: the incoming diff is validated as-is rather than
+        merged with the old variant's fields. Same-kind updates retain deep-merge
+        behavior for incremental field edits.
         Thread Safety:
             This method is NOT thread-safe for concurrent in-memory updates.
             The assignments to ``agent_settings`` and ``conversation_settings``
@@ -132,12 +181,35 @@ class PersistedSettings(BaseModel):
         try:
             if isinstance(agent_update, dict):
-                agent_merged = _deep_merge(
-                    self.agent_settings.model_dump(
-                        mode="json", context={"expose_secrets": "plaintext"}
-                    ),
-                    agent_update,
-                )
+                # Check if this is a variant (agent_kind) switch
+                old_kind = self.agent_settings.agent_kind
+                new_kind = agent_update.get("agent_kind")
+                is_kind_switch = new_kind is not None and new_kind != old_kind
+                if is_kind_switch:
+                    # Variant replacement: validate the diff as-is rather than
+                    # deep-merging it onto the old variant. A kind switch picks a
+                    # different member of the AgentSettingsConfig union, and the
+                    # old variant's serialized fields are not a valid base for the
+                    # new one (e.g. ACP's acp_command has no place in
+                    # OpenHandsAgentSettings and would fail validation).
+                    #
+                    # Consequence (intentional): fields the two variants happen to
+                    # share (e.g. ``llm``) are NOT carried over — they fall back to
+                    # the new variant's defaults unless the caller restates them in
+                    # this same diff. Switching kinds is a fresh start on the new
+                    # variant, mirroring the frontend's "fresh base on kind switch"
+                    # behaviour. Callers that want to preserve a shared field must
+                    # include it in the switch payload.
+                    agent_merged = agent_update
+                else:
+                    # Same-kind update: deep-merge for incremental field edits
+                    agent_merged = _deep_merge(
+                        self.agent_settings.model_dump(
+                            mode="json", context={"expose_secrets": "plaintext"}
+                        ),
+                        agent_update,
+                    )
                 try:
                     new_agent = validate_agent_settings(agent_merged)
                 except Exception as e:

{openhands_agent_server-1.24.0 → openhands_agent_server-1.26.0}/openhands/agent_server/profiles_router.py RENAMED Viewed

@@ -105,38 +105,6 @@ def _has_api_key(llm: LLM) -> bool:
     return bool(llm.api_key.get_secret_value().strip())
-def _model_to_profile_name(model: str) -> str:
-    """Convert a model name to a valid profile name.
-    Transforms model names like "openai/gpt-4o" or "anthropic/claude-3-opus"
-    into valid profile names by:
-    - Taking just the model part after provider prefix (if present)
-    - Replacing invalid characters with dashes
-    - Truncating to max 64 characters
-    """
-    import re
-    # Extract model name after provider prefix (e.g., "openai/gpt-4o" -> "gpt-4o")
-    if "/" in model:
-        model = model.rsplit("/", 1)[-1]
-    # Replace any character that's not alphanumeric, dash, underscore, or dot
-    # Profile names must match: ^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$
-    sanitized = re.sub(r"[^A-Za-z0-9._-]", "-", model)
-    # Ensure it starts with alphanumeric (required by profile name pattern)
-    if sanitized and not sanitized[0].isalnum():
-        sanitized = "m" + sanitized
-    # Truncate to max 64 characters
-    sanitized = sanitized[:64]
-    # Remove trailing non-alphanumeric characters
-    sanitized = sanitized.rstrip("._-")
-    return sanitized or "default"
 @profiles_router.get("", response_model=ProfileListResponse)
 async def list_profiles(request: Request) -> ProfileListResponse:
     """List all saved LLM profiles.
@@ -144,17 +112,7 @@ async def list_profiles(request: Request) -> ProfileListResponse:
     Returns the list of profiles along with the currently active profile name,
     if one has been activated. The active_profile tracks which LLM profile
     configuration is currently in use.
-    Auto-creates a profile named after the model if:
-    - No profiles exist
-    - agent_settings.llm has an API key configured
-    The API key check ensures we only auto-create when the user has actually
-    configured their LLM (not just relying on defaults). This allows users
-    with existing LLM configurations to see their settings as a profile
-    without manual creation.
     """
-    cipher = get_cipher(request)
     config = get_config(request)
     settings_store = get_settings_store(config)
     settings = settings_store.load() or PersistedSettings()
@@ -163,42 +121,9 @@ async def list_profiles(request: Request) -> ProfileListResponse:
     with _store_errors():
         summaries = store.list_summaries()
-    active_profile = settings.active_profile
-    # Auto-create profile from existing LLM settings if no profiles exist
-    # but an API key is configured. Use the model name as the profile name.
-    if not summaries and settings.llm_api_key_is_set:
-        llm = settings.agent_settings.llm
-        profile_name = _model_to_profile_name(llm.model or "default")
-        try:
-            with _store_errors():
-                store.save(
-                    profile_name,
-                    llm,
-                    include_secrets=True,
-                    cipher=cipher,
-                )
-            # Update settings to mark this as active
-            def set_active(s: PersistedSettings) -> PersistedSettings:
-                s.active_profile = profile_name
-                return s
-            settings_store.update(set_active)
-            active_profile = profile_name
-            # Refresh summaries to include the new profile
-            summaries = store.list_summaries()
-            logger.info(
-                f"Auto-created '{profile_name}' profile from existing LLM settings"
-            )
-        except Exception as e:
-            # Log but don't fail - auto-creation is a convenience feature
-            logger.warning(f"Failed to auto-create profile: {e}")
     return ProfileListResponse(
         profiles=[ProfileInfo(**s) for s in summaries],
-        active_profile=active_profile,
+        active_profile=settings.active_profile,
     )

{openhands_agent_server-1.24.0 → openhands_agent_server-1.26.0}/openhands/agent_server/settings_router.py RENAMED Viewed

@@ -170,7 +170,22 @@ async def update_settings(
     """Update settings with partial changes.
     Accepts ``agent_settings_diff`` and/or ``conversation_settings_diff``
-    for incremental updates. Values are deep-merged with existing settings.
+    for incremental updates. Diffs are deep-merged; nested objects merge
+    recursively, and a ``null`` value **inside a nested map deletes that
+    entry** — the "unset" primitive that lets a client remove a single map
+    key without round-tripping the whole map. To drop one ACP env-var::
+        PATCH /api/settings
+        {"agent_settings_diff": {"acp_env": {"STALE_KEY": null}}}
+    or to remove one MCP server's header::
+        {"agent_settings_diff":
+            {"mcp_config": {"mcpServers": {"svc": {"headers": {"X-Old": null}}}}}}
+    A ``null`` on a top-level *field* (e.g. ``{"confirmation_mode": null}``)
+    is **not** an unset — it flows to model validation as before, so it still
+    fails loudly rather than silently resetting the field to its default.
     Uses file locking to prevent concurrent updates from overwriting each other.

{openhands_agent_server-1.24.0 → openhands_agent_server-1.26.0}/openhands/agent_server/skills_service.py RENAMED Viewed

@@ -40,7 +40,7 @@ from openhands.sdk.skills import (
 )
 from openhands.sdk.skills.skill import (
     DEFAULT_MARKETPLACE_PATH,
-    PUBLIC_SKILLS_BRANCH,
+    PUBLIC_SKILLS_REF,
     PUBLIC_SKILLS_REPO,
     _invalidate_public_skills_cache,
     load_skills_from_dir,
@@ -391,7 +391,7 @@ def sync_public_skills() -> tuple[bool, str]:
     try:
         cache_dir = get_skills_cache_dir()
         result = update_skills_repository(
-            PUBLIC_SKILLS_REPO, PUBLIC_SKILLS_BRANCH, cache_dir
+            PUBLIC_SKILLS_REPO, PUBLIC_SKILLS_REF, cache_dir
         )
         if result:
@@ -634,7 +634,7 @@ def _fetch_catalog_entries(marketplace_path: str) -> list[_CatalogEntry]:
     """
     cache_dir = get_skills_cache_dir()
     repo_path = update_skills_repository(
-        PUBLIC_SKILLS_REPO, PUBLIC_SKILLS_BRANCH, cache_dir
+        PUBLIC_SKILLS_REPO, PUBLIC_SKILLS_REF, cache_dir
     )
     if repo_path is None:

{openhands_agent_server-1.24.0 → openhands_agent_server-1.26.0}/openhands_agent_server.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: openhands-agent-server
-Version: 1.24.0
+Version: 1.26.0
 Summary: OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent
 Project-URL: Source, https://github.com/OpenHands/software-agent-sdk
 Project-URL: Homepage, https://github.com/OpenHands/software-agent-sdk

{openhands_agent_server-1.24.0 → openhands_agent_server-1.26.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "openhands-agent-server"
-version = "1.24.0"
+version = "1.26.0"
 description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
 requires-python = ">=3.12"