PyPI - bareagent-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

bareagent-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

bareagent/__init__.py +10 -0
bareagent/concurrency/__init__.py +6 -0
bareagent/concurrency/background.py +97 -0
bareagent/concurrency/notification.py +61 -0
bareagent/concurrency/scheduler.py +136 -0
bareagent/config.toml +299 -0
bareagent/core/__init__.py +1 -0
bareagent/core/config_paths.py +49 -0
bareagent/core/context.py +127 -0
bareagent/core/fileutil.py +103 -0
bareagent/core/goal.py +214 -0
bareagent/core/handlers/__init__.py +1 -0
bareagent/core/handlers/bash.py +79 -0
bareagent/core/handlers/file_edit.py +47 -0
bareagent/core/handlers/file_read.py +270 -0
bareagent/core/handlers/file_write.py +34 -0
bareagent/core/handlers/glob_search.py +30 -0
bareagent/core/handlers/goal.py +60 -0
bareagent/core/handlers/grep_search.py +52 -0
bareagent/core/handlers/memory.py +71 -0
bareagent/core/handlers/plan.py +106 -0
bareagent/core/handlers/search_utils.py +77 -0
bareagent/core/handlers/skill.py +87 -0
bareagent/core/handlers/subagent_send.py +70 -0
bareagent/core/handlers/web_fetch.py +126 -0
bareagent/core/handlers/web_search.py +165 -0
bareagent/core/handlers/workflow.py +190 -0
bareagent/core/loop.py +535 -0
bareagent/core/retry.py +131 -0
bareagent/core/sandbox.py +27 -0
bareagent/core/schema.py +21 -0
bareagent/core/tools.py +779 -0
bareagent/core/workflow.py +517 -0
bareagent/core/workflow_registry.py +219 -0
bareagent/debug/__init__.py +0 -0
bareagent/debug/interaction_log.py +263 -0
bareagent/debug/viewer.html +1750 -0
bareagent/debug/web_viewer.py +157 -0
bareagent/hooks/__init__.py +32 -0
bareagent/hooks/config.py +118 -0
bareagent/hooks/engine.py +197 -0
bareagent/hooks/errors.py +14 -0
bareagent/hooks/events.py +22 -0
bareagent/lsp/__init__.py +63 -0
bareagent/lsp/config.py +134 -0
bareagent/lsp/coord.py +118 -0
bareagent/lsp/diagnostics.py +240 -0
bareagent/lsp/errors.py +24 -0
bareagent/lsp/manager.py +866 -0
bareagent/lsp/tools.py +629 -0
bareagent/lsp/workspace_edit.py +305 -0
bareagent/main.py +4205 -0
bareagent/mcp/__init__.py +69 -0
bareagent/mcp/_sse.py +69 -0
bareagent/mcp/client.py +341 -0
bareagent/mcp/config.py +169 -0
bareagent/mcp/errors.py +32 -0
bareagent/mcp/manager.py +318 -0
bareagent/mcp/protocol.py +187 -0
bareagent/mcp/registry.py +557 -0
bareagent/mcp/transport/__init__.py +15 -0
bareagent/mcp/transport/base.py +149 -0
bareagent/mcp/transport/http_legacy.py +192 -0
bareagent/mcp/transport/http_streamable.py +217 -0
bareagent/mcp/transport/stdio.py +202 -0
bareagent/memory/__init__.py +1 -0
bareagent/memory/compact.py +203 -0
bareagent/memory/conversation_io.py +226 -0
bareagent/memory/embedding.py +194 -0
bareagent/memory/persistent.py +515 -0
bareagent/memory/token_counter.py +67 -0
bareagent/memory/token_tracker.py +262 -0
bareagent/memory/transcript.py +100 -0
bareagent/permission/__init__.py +1 -0
bareagent/permission/guard.py +329 -0
bareagent/permission/rules.py +19 -0
bareagent/planning/__init__.py +19 -0
bareagent/planning/agent_types.py +169 -0
bareagent/planning/skill_gen.py +141 -0
bareagent/planning/skill_store.py +173 -0
bareagent/planning/skills.py +146 -0
bareagent/planning/subagent.py +355 -0
bareagent/planning/subagent_registry.py +77 -0
bareagent/planning/tasks.py +348 -0
bareagent/planning/todo.py +153 -0
bareagent/planning/worktree.py +122 -0
bareagent/provider/__init__.py +1 -0
bareagent/provider/anthropic.py +348 -0
bareagent/provider/base.py +136 -0
bareagent/provider/factory.py +130 -0
bareagent/provider/openai.py +881 -0
bareagent/provider/presets.py +72 -0
bareagent/provider/setup.py +356 -0
bareagent/skills/.gitkeep +1 -0
bareagent/skills/code-review/SKILL.md +68 -0
bareagent/skills/git/SKILL.md +68 -0
bareagent/skills/test/SKILL.md +70 -0
bareagent/team/__init__.py +17 -0
bareagent/team/autonomous.py +193 -0
bareagent/team/mailbox.py +239 -0
bareagent/team/manager.py +155 -0
bareagent/team/protocols.py +129 -0
bareagent/tracing/__init__.py +12 -0
bareagent/tracing/_api.py +92 -0
bareagent/tracing/_proxy.py +60 -0
bareagent/tracing/composite.py +115 -0
bareagent/tracing/json_file.py +115 -0
bareagent/tracing/langfuse.py +139 -0
bareagent/tracing/otel.py +107 -0
bareagent/tracing/setup.py +85 -0
bareagent/ui/__init__.py +24 -0
bareagent/ui/console.py +167 -0
bareagent/ui/prompt.py +78 -0
bareagent/ui/protocol.py +24 -0
bareagent/ui/stream.py +66 -0
bareagent/ui/theme.py +240 -0
bareagent_cli-0.1.0.dist-info/METADATA +331 -0
bareagent_cli-0.1.0.dist-info/RECORD +121 -0
bareagent_cli-0.1.0.dist-info/WHEEL +4 -0
bareagent_cli-0.1.0.dist-info/entry_points.txt +2 -0
bareagent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0

bareagent/provider/openai.py ADDED Viewed

@@ -0,0 +1,881 @@
+from __future__ import annotations
+import itertools
+import json
+from typing import Any
+import openai
+from bareagent.provider.base import BaseLLMProvider, LLMResponse, StreamEvent, ToolCall
+_PROTECTED_CHAT_KEYS = frozenset({"model", "messages", "tools"})
+_PROTECTED_RESPONSES_KEYS = frozenset({"model", "input", "tools", "instructions"})
+_OPENAI_OFFICIAL_HOSTS = frozenset({"api.openai.com"})
+def _stringify_block(value: Any) -> str:
+    """Render an arbitrary content block as compact JSON for the tool role.
+    Mirrors ``BaseLLMProvider._stringify_content`` for single items so the lift
+    helper stays a free function (and therefore reusable from both the
+    chat_completions and Responses-API code paths).
+    """
+    if isinstance(value, dict) and value.get("type") == "text":
+        return str(value.get("text", ""))
+    return json.dumps(value, ensure_ascii=False, default=str)
+def _lift_image_blocks(
+    tool_result_content: Any,
+) -> tuple[str, list[dict[str, Any]]]:
+    """Split multimodal MCP tool_result content into (text, image_blocks).
+    OpenAI's ``tool`` role (chat_completions) and the Responses API's
+    ``function_call_output`` item both refuse image attachments — image
+    content must be lifted into a follow-up ``user`` message. Both code paths
+    share this helper so the lift rules (placeholder text when there is no
+    text part, image_url shape, non-base64 source degradation) stay aligned.
+    Returns ``(text_for_tool_role, image_blocks)``. ``text_for_tool_role`` is
+    always a string: empty content yields a placeholder when images exist, or
+    a stringified fallback otherwise. ``image_blocks`` is a list of
+    chat-completion-shaped ``{type:"image_url", image_url:{url:"data:..."}}``
+    blocks; the Responses-API caller translates them to ``input_image`` parts.
+    """
+    if not isinstance(tool_result_content, list):
+        return _stringify_block(tool_result_content), []
+    text_parts: list[str] = []
+    image_blocks: list[dict[str, Any]] = []
+    for item in tool_result_content:
+        if not isinstance(item, dict):
+            text_parts.append(_stringify_block(item))
+            continue
+        item_type = item.get("type")
+        if item_type == "text":
+            text = item.get("text", "")
+            if isinstance(text, str):
+                text_parts.append(text)
+            continue
+        if item_type == "image":
+            source = item.get("source")
+            if not isinstance(source, dict) or source.get("type") != "base64":
+                text_parts.append(_stringify_block(item))
+                continue
+            data = source.get("data", "")
+            if not isinstance(data, str) or not data:
+                text_parts.append(_stringify_block(item))
+                continue
+            mime = source.get("media_type", "image/png")
+            image_blocks.append(
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:{mime};base64,{data}"},
+                }
+            )
+            continue
+        text_parts.append(_stringify_block(item))
+    text = "\n".join(part for part in text_parts if part)
+    if not text and image_blocks:
+        text = "[Tool returned image(s); see next message]"
+    return text, image_blocks
+class OpenAIProvider(BaseLLMProvider):
+    def __init__(
+        self,
+        api_key: str,
+        model: str,
+        base_url: str | None = None,
+        wire_api: str | None = None,
+    ) -> None:
+        # The app layer (src/core/retry.py) owns retries exclusively; disable
+        # the SDK's built-in retries to avoid 2xN compound amplification.
+        self.client = openai.OpenAI(api_key=api_key, base_url=base_url, max_retries=0)
+        self.model = model
+        self.base_url = base_url
+        self.wire_api = (wire_api or "chat_completions").strip().lower()
+    def create(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        **kwargs: Any,
+    ) -> LLMResponse:
+        if self.wire_api == "responses":
+            return self._create_via_responses(messages, tools, **kwargs)
+        params = self._build_chat_request_params(messages, tools, **kwargs)
+        response = self.client.chat.completions.create(**params)
+        return self._parse_response(response)
+    def create_stream(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        **kwargs: Any,
+    ):
+        if self.wire_api == "responses":
+            return (yield from self._create_stream_via_responses(messages, tools, **kwargs))
+        return (yield from self._create_stream_via_chat(messages, tools, **kwargs))
+    def _create_via_responses(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        **kwargs: Any,
+    ) -> LLMResponse:
+        params = self._build_responses_request_params(messages, tools, **kwargs)
+        raw_response = self.client.responses.create(**params)
+        return self._parse_responses_api_response(raw_response)
+    def _is_openai_official_api(self) -> bool:
+        if not self.base_url:
+            return True
+        from urllib.parse import urlparse
+        host = urlparse(self.base_url).hostname or ""
+        return host in _OPENAI_OFFICIAL_HOSTS
+    def _create_stream_via_chat(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        **kwargs: Any,
+    ):
+        params = self._build_chat_request_params(messages, tools, **kwargs)
+        params["stream"] = True
+        if "stream_options" not in params and self._is_openai_official_api():
+            params["stream_options"] = {"include_usage": True}
+        text_parts: list[str] = []
+        pending_tool_calls: dict[int, dict[str, str]] = {}
+        emitted_tool_call_ids: set[str] = set()
+        usage_prompt_tokens = 0
+        usage_completion_tokens = 0
+        usage_cached_tokens = 0
+        stop_reason = ""
+        stream = self.client.chat.completions.create(**params)
+        for chunk in stream:
+            usage = getattr(chunk, "usage", None)
+            if usage is not None:
+                val = getattr(usage, "prompt_tokens", None)
+                if val is not None:
+                    usage_prompt_tokens = val
+                val = getattr(usage, "completion_tokens", None)
+                if val is not None:
+                    usage_completion_tokens = val
+                cached = self._extract_cached_tokens(usage)
+                if cached:
+                    usage_cached_tokens = cached
+            choices = getattr(chunk, "choices", None) or []
+            if not choices:
+                continue
+            choice = choices[0]
+            if choice.finish_reason:
+                stop_reason = choice.finish_reason
+            # Some OpenAI-compatible relays/proxies (and reasoning models) emit
+            # chunks with a null ``delta`` — e.g. a trailing usage/finish chunk
+            # or a keep-alive. Guard the member accesses so a null delta does not
+            # crash the stream with "'NoneType' object has no attribute 'content'".
+            delta = getattr(choice, "delta", None)
+            if delta is not None:
+                if delta.content:
+                    text_parts.append(delta.content)
+                    yield StreamEvent(type="text", text=delta.content)
+                for tool_delta in delta.tool_calls or []:
+                    call_state = pending_tool_calls.setdefault(
+                        tool_delta.index,
+                        {"id": "", "name": "", "arguments": ""},
+                    )
+                    if tool_delta.id:
+                        call_state["id"] = tool_delta.id
+                    function = tool_delta.function
+                    if function is None:
+                        continue
+                    if function.name:
+                        call_state["name"] = function.name
+                    if function.arguments:
+                        call_state["arguments"] += function.arguments
+            if choice.finish_reason == "tool_calls":
+                for tool_call in self._iter_new_tool_calls(
+                    self._finalize_tool_calls(pending_tool_calls),
+                    emitted_tool_call_ids,
+                ):
+                    yield StreamEvent(
+                        type="tool_call",
+                        tool_call_id=tool_call.id,
+                        name=tool_call.name,
+                        input=tool_call.input,
+                    )
+        tool_calls = self._finalize_tool_calls(pending_tool_calls)
+        for tool_call in self._iter_new_tool_calls(tool_calls, emitted_tool_call_ids):
+            yield StreamEvent(
+                type="tool_call",
+                tool_call_id=tool_call.id,
+                name=tool_call.name,
+                input=tool_call.input,
+            )
+        return LLMResponse(
+            text="".join(text_parts),
+            tool_calls=tool_calls,
+            stop_reason="tool_calls" if tool_calls else (stop_reason or "stop"),
+            input_tokens=max(usage_prompt_tokens - usage_cached_tokens, 0),
+            output_tokens=usage_completion_tokens,
+            cache_read_input_tokens=usage_cached_tokens,
+        )
+    def _create_stream_via_responses(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        **kwargs: Any,
+    ):
+        params = self._build_responses_request_params(messages, tools, **kwargs)
+        params["stream"] = True
+        final_payload: Any = None
+        yielded_tool_calls: set[str] = set()
+        streamed_text_parts: list[str] = []
+        streamed_tool_calls: list[ToolCall] = []
+        stream = self.client.responses.create(**params)
+        for event in stream:
+            event_type = getattr(event, "type", "")
+            if event_type == "response.output_text.delta":
+                delta = getattr(event, "delta", "")
+                if delta:
+                    streamed_text_parts.append(delta)
+                    yield StreamEvent(type="text", text=delta)
+                continue
+            if event_type == "response.output_item.done":
+                item = getattr(event, "item", None)
+                if getattr(item, "type", "") != "function_call":
+                    continue
+                tool_call = ToolCall(
+                    id=getattr(item, "call_id", "") or getattr(item, "id", ""),
+                    name=getattr(item, "name", ""),
+                    input=self._parse_tool_input(getattr(item, "arguments", "{}")),
+                )
+                for emitted_tool_call in self._iter_new_tool_calls(
+                    [tool_call],
+                    yielded_tool_calls,
+                ):
+                    streamed_tool_calls.append(
+                        ToolCall(
+                            id=emitted_tool_call.id,
+                            name=emitted_tool_call.name,
+                            input=dict(emitted_tool_call.input),
+                        )
+                    )
+                    yield StreamEvent(
+                        type="tool_call",
+                        tool_call_id=emitted_tool_call.id,
+                        name=emitted_tool_call.name,
+                        input=emitted_tool_call.input,
+                    )
+                continue
+            if event_type == "response.completed":
+                final_payload = getattr(event, "response", None)
+                continue
+            if event_type == "response.incomplete":
+                final_payload = getattr(event, "response", None)
+                continue
+            if event_type == "response.failed":
+                response = getattr(event, "response", None)
+                raise RuntimeError(self._extract_responses_error(response) or "Response failed.")
+            if event_type == "error":
+                raise RuntimeError(getattr(event, "message", "Responses stream error."))
+        if final_payload is None:
+            raise RuntimeError("Responses stream ended without a completed response.")
+        return self._merge_streamed_responses_result(
+            self._parse_responses_api_response(final_payload),
+            streamed_text_parts=streamed_text_parts,
+            streamed_tool_calls=streamed_tool_calls,
+        )
+    def _build_chat_request_params(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        params: dict[str, Any] = {
+            "model": self.model,
+            "messages": self._convert_messages(messages),
+        }
+        converted_tools = self._convert_tools(tools)
+        if converted_tools:
+            params["tools"] = converted_tools
+        params.update({k: v for k, v in kwargs.items() if k not in _PROTECTED_CHAT_KEYS})
+        return params
+    def _build_responses_request_params(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        instructions, input_items = self._convert_messages_for_responses(messages)
+        params: dict[str, Any] = {
+            "model": self.model,
+            "input": input_items,
+        }
+        if instructions:
+            params["instructions"] = instructions
+        converted_tools = self._convert_tools_for_responses(tools)
+        if converted_tools:
+            params["tools"] = converted_tools
+        response_kwargs = dict(kwargs)
+        if "max_tokens" in response_kwargs and "max_output_tokens" not in response_kwargs:
+            response_kwargs["max_output_tokens"] = response_kwargs.pop("max_tokens")
+        params.update(
+            {k: v for k, v in response_kwargs.items() if k not in _PROTECTED_RESPONSES_KEYS}
+        )
+        return params
+    def _convert_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        converted: list[dict[str, Any]] = []
+        for message in messages:
+            role = message["role"]
+            content = message.get("content", "")
+            if role in {"system", "user"}:
+                converted.extend(self._convert_non_assistant_message(role, content))
+                continue
+            if role == "assistant":
+                converted.append(self._convert_assistant_message(content))
+                continue
+            converted.append({"role": role, "content": self._stringify_content(content)})
+        return converted
+    def _convert_messages_for_responses(
+        self,
+        messages: list[dict[str, Any]],
+    ) -> tuple[str | None, list[dict[str, Any]]]:
+        instruction_parts: list[str] = []
+        converted: list[dict[str, Any]] = []
+        for message in messages:
+            role = message["role"]
+            content = message.get("content", "")
+            if role in {"system", "developer"}:
+                instruction_text = self._stringify_content(content)
+                if instruction_text:
+                    instruction_parts.append(instruction_text)
+                continue
+            if role in {"user", "assistant"}:
+                converted.extend(self._convert_response_message(role, content))
+                continue
+            converted.append(
+                self._make_response_text_message(role, self._stringify_content(content))
+            )
+        instructions = "\n\n".join(part for part in instruction_parts if part) or None
+        return instructions, converted
+    def _convert_response_message(self, role: str, content: Any) -> list[dict[str, Any]]:
+        if isinstance(content, str):
+            return [self._make_response_text_message(role, content)]
+        if not isinstance(content, list):
+            return [self._make_response_text_message(role, self._stringify_content(content))]
+        converted: list[dict[str, Any]] = []
+        text_parts: list[str] = []
+        # Image blocks lifted out of multimodal tool_result content. They must
+        # be attached to a user message *after* the function_call_output (the
+        # Responses API does not accept ``image_url`` inside the output value
+        # itself), mirroring the chat_completions lift logic.
+        deferred_image_messages: list[dict[str, Any]] = []
+        for block in content:
+            block_type = block.get("type")
+            if block_type == "tool_result":
+                tool_use_id = block.get("tool_use_id", "")
+                raw_content = block.get("content", "")
+                output_text, image_blocks = _lift_image_blocks(raw_content)
+                converted.append(
+                    {
+                        "type": "function_call_output",
+                        "call_id": tool_use_id,
+                        "output": output_text,
+                    }
+                )
+                if image_blocks:
+                    deferred_image_messages.append(
+                        self._build_responses_image_user_message(image_blocks)
+                    )
+                continue
+            if block_type == "tool_use":
+                converted.append(
+                    {
+                        "type": "function_call",
+                        "call_id": block.get("id", ""),
+                        "name": block.get("name", ""),
+                        "arguments": json.dumps(block.get("input", {}), ensure_ascii=False),
+                    }
+                )
+                continue
+            if block_type == "text":
+                text_parts.append(str(block.get("text", "")))
+                continue
+            text_parts.append(self._stringify_content(block))
+        text = "\n".join(part for part in text_parts if part)
+        if text:
+            converted.insert(0, self._make_response_text_message(role, text))
+        # Image-bearing follow-ups always go after the function_call_output
+        # they came from. Multiple tool_results in one message keep their
+        # relative ordering — first lifted, first appended.
+        converted.extend(deferred_image_messages)
+        return converted
+    def _build_responses_image_user_message(
+        self, image_blocks: list[dict[str, Any]]
+    ) -> dict[str, Any]:
+        """Build a Responses-API ``message`` carrying lifted image blocks.
+        The Responses API expects ``input_image`` parts (not ``image_url``),
+        so we translate from the chat_completions shape stored in
+        ``image_blocks`` into the Responses-API native shape here.
+        """
+        count = len(image_blocks)
+        parts: list[dict[str, Any]] = [
+            {
+                "type": "input_text",
+                "text": f"[Tool returned {count} image(s)]",
+            }
+        ]
+        for block in image_blocks:
+            url = block.get("image_url", {}).get("url", "")
+            parts.append({"type": "input_image", "image_url": url})
+        return {
+            "type": "message",
+            "role": "user",
+            "content": parts,
+        }
+    def _make_response_text_message(self, role: str, text: str) -> dict[str, Any]:
+        content_type = "output_text" if role == "assistant" else "input_text"
+        return {
+            "type": "message",
+            "role": role,
+            "content": [{"type": content_type, "text": text}],
+        }
+    def _convert_non_assistant_message(
+        self,
+        role: str,
+        content: Any,
+    ) -> list[dict[str, Any]]:
+        if role != "user":
+            return [{"role": role, "content": self._stringify_content(content)}]
+        if isinstance(content, str):
+            return [{"role": "user", "content": content}]
+        if not isinstance(content, list):
+            return [{"role": "user", "content": self._stringify_content(content)}]
+        converted: list[dict[str, Any]] = []
+        pending_text: list[str] = []
+        def _flush_text() -> None:
+            if pending_text:
+                text = "\n".join(p for p in pending_text if p)
+                if text:
+                    converted.append({"role": "user", "content": text})
+                pending_text.clear()
+        for block in content:
+            if block.get("type") == "tool_result":
+                _flush_text()
+                converted.extend(self._convert_tool_result_for_openai(block))
+                continue
+            if block.get("type") == "text":
+                pending_text.append(str(block.get("text", "")))
+                continue
+            pending_text.append(self._stringify_content(block))
+        _flush_text()
+        return converted
+    def _convert_tool_result_for_openai(
+        self,
+        block: dict[str, Any],
+    ) -> list[dict[str, Any]]:
+        """Convert one tool_result block into one or more chat-completion messages.
+        - ``str`` content keeps the legacy single ``tool`` message shape.
+        - ``list`` content (multimodal MCP path) puts text into the ``tool``
+          message and lifts image blocks into a follow-up ``user`` message,
+          because OpenAI's ``tool`` role does not accept ``image_url`` parts.
+          The lift logic is shared with the Responses-API path via the
+          module-level ``_lift_image_blocks`` helper.
+        """
+        tool_use_id = block.get("tool_use_id", "")
+        raw_content = block.get("content", "")
+        if not isinstance(raw_content, list):
+            return [
+                {
+                    "role": "tool",
+                    "tool_call_id": tool_use_id,
+                    "content": self._stringify_content(raw_content),
+                }
+            ]
+        tool_text, image_blocks = _lift_image_blocks(raw_content)
+        messages: list[dict[str, Any]] = [
+            {
+                "role": "tool",
+                "tool_call_id": tool_use_id,
+                "content": tool_text,
+            }
+        ]
+        if image_blocks:
+            count = len(image_blocks)
+            messages.append(
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": f"[Tool returned {count} image(s)]",
+                        },
+                        *image_blocks,
+                    ],
+                }
+            )
+        return messages
+    def _convert_assistant_message(self, content: Any) -> dict[str, Any]:
+        if isinstance(content, str):
+            return {"role": "assistant", "content": content}
+        if not isinstance(content, list):
+            return {"role": "assistant", "content": self._stringify_content(content)}
+        text_parts: list[str] = []
+        tool_calls: list[dict[str, Any]] = []
+        for block in content:
+            block_type = block.get("type")
+            if block_type == "text":
+                text_parts.append(str(block.get("text", "")))
+                continue
+            if block_type == "tool_use":
+                tool_calls.append(
+                    {
+                        "id": block.get("id", ""),
+                        "type": "function",
+                        "function": {
+                            "name": block.get("name", ""),
+                            "arguments": json.dumps(
+                                block.get("input", {}),
+                                ensure_ascii=False,
+                            ),
+                        },
+                    }
+                )
+        assistant_message: dict[str, Any] = {
+            "role": "assistant",
+            "content": "\n".join(part for part in text_parts if part) or None,
+        }
+        if tool_calls:
+            assistant_message["tool_calls"] = tool_calls
+        return assistant_message
+    def _convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        return [
+            {
+                "type": "function",
+                "function": {
+                    "name": tool["name"],
+                    "description": tool.get("description", ""),
+                    "parameters": tool.get(
+                        "parameters",
+                        {"type": "object", "properties": {}},
+                    ),
+                },
+            }
+            for tool in tools
+        ]
+    def _convert_tools_for_responses(
+        self,
+        tools: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        return [
+            {
+                "type": "function",
+                "name": tool["name"],
+                "description": tool.get("description", ""),
+                "parameters": tool.get(
+                    "parameters",
+                    {"type": "object", "properties": {}},
+                ),
+                "strict": False,
+            }
+            for tool in tools
+        ]
+    def _parse_response(self, response: Any) -> LLMResponse:
+        if not response.choices:
+            raise RuntimeError("OpenAI returned empty choices (content may have been filtered).")
+        choice = response.choices[0]
+        message = choice.message
+        tool_calls: list[ToolCall] = []
+        for tool_call in message.tool_calls or []:
+            tool_calls.append(
+                ToolCall(
+                    id=tool_call.id,
+                    name=tool_call.function.name,
+                    input=self._parse_tool_input(tool_call.function.arguments or "{}"),
+                )
+            )
+        usage = getattr(response, "usage", None)
+        prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0
+        cached_tokens = self._extract_cached_tokens(usage)
+        return LLMResponse(
+            text=message.content or "",
+            tool_calls=tool_calls,
+            stop_reason=choice.finish_reason or "",
+            input_tokens=max(prompt_tokens - cached_tokens, 0),
+            output_tokens=getattr(usage, "completion_tokens", 0) or 0,
+            cache_read_input_tokens=cached_tokens,
+        )
+    @staticmethod
+    def _extract_cached_tokens(usage: Any) -> int:
+        """Read auto-cache hit tokens from an OpenAI/DeepSeek ``usage`` object.
+        Normalizes both provider shapes into a single cached-token count (a
+        subset of ``prompt_tokens``, so callers compute full-price input as
+        ``prompt_tokens - cached``):
+          - OpenAI:   ``usage.prompt_tokens_details.cached_tokens``
+          - DeepSeek: ``usage.prompt_cache_hit_tokens``
+        Defensive against missing fields / dict-vs-attr shapes — absent fields
+        degrade to 0 (no behavior change from the pre-caching baseline).
+        """
+        if usage is None:
+            return 0
+        details = getattr(usage, "prompt_tokens_details", None)
+        if details is None and isinstance(usage, dict):
+            details = usage.get("prompt_tokens_details")
+        if details is not None:
+            cached = getattr(details, "cached_tokens", None)
+            if cached is None and isinstance(details, dict):
+                cached = details.get("cached_tokens")
+            if cached:
+                return int(cached)
+        hit = getattr(usage, "prompt_cache_hit_tokens", None)
+        if hit is None and isinstance(usage, dict):
+            hit = usage.get("prompt_cache_hit_tokens")
+        if hit:
+            return int(hit)
+        return 0
+    def _parse_responses_api_response(self, response: Any) -> LLMResponse:
+        payload = self._coerce_responses_payload(response)
+        output_items = payload.get("output", [])
+        text_parts: list[str] = []
+        content_blocks: list[dict[str, Any]] = []
+        tool_calls: list[ToolCall] = []
+        for item in output_items:
+            item_type = item.get("type")
+            if item_type == "message":
+                for part in item.get("content", []):
+                    if part.get("type") != "output_text":
+                        continue
+                    text = str(part.get("text", ""))
+                    text_parts.append(text)
+                    content_blocks.append({"type": "text", "text": text})
+                continue
+            if item_type != "function_call":
+                continue
+            call_id = str(item.get("call_id", item.get("id", "")))
+            name = str(item.get("name", ""))
+            parsed_input = self._parse_tool_input(item.get("arguments", "{}"))
+            tool_calls.append(ToolCall(id=call_id, name=name, input=parsed_input))
+            content_blocks.append(
+                {
+                    "type": "tool_use",
+                    "id": call_id,
+                    "name": name,
+                    "input": parsed_input,
+                }
+            )
+        usage = payload.get("usage", {}) or {}
+        input_tokens = int(usage.get("input_tokens", 0) or 0)
+        output_tokens = int(usage.get("output_tokens", 0) or 0)
+        # Responses API exposes auto-cache hits under input_tokens_details;
+        # input_tokens includes cached, so subtract to get full-price input.
+        cached_tokens = 0
+        details = usage.get("input_tokens_details")
+        if isinstance(details, dict):
+            cached_tokens = int(details.get("cached_tokens", 0) or 0)
+        stop_reason = "tool_calls" if tool_calls else str(payload.get("status", "completed"))
+        return LLMResponse(
+            text="".join(text_parts),
+            tool_calls=tool_calls,
+            stop_reason=stop_reason,
+            input_tokens=max(input_tokens - cached_tokens, 0),
+            output_tokens=output_tokens,
+            cache_read_input_tokens=cached_tokens,
+            content_blocks=content_blocks,
+        )
+    def _merge_streamed_responses_result(
+        self,
+        response: LLMResponse,
+        *,
+        streamed_text_parts: list[str],
+        streamed_tool_calls: list[ToolCall],
+    ) -> LLMResponse:
+        if not streamed_text_parts and not streamed_tool_calls:
+            return response
+        merged_text = response.text or "".join(streamed_text_parts)
+        merged_tool_calls = list(response.tool_calls)
+        if not merged_tool_calls and streamed_tool_calls:
+            merged_tool_calls = [
+                ToolCall(
+                    id=tool_call.id,
+                    name=tool_call.name,
+                    input=dict(tool_call.input),
+                )
+                for tool_call in streamed_tool_calls
+            ]
+        merged_content_blocks = [dict(block) for block in response.content_blocks]
+        has_text_block = any(block.get("type") == "text" for block in merged_content_blocks)
+        if merged_text and not has_text_block:
+            merged_content_blocks.insert(0, {"type": "text", "text": merged_text})
+        existing_tool_ids = {
+            str(block.get("id", ""))
+            for block in merged_content_blocks
+            if block.get("type") == "tool_use"
+        }
+        for tool_call in merged_tool_calls:
+            if tool_call.id in existing_tool_ids:
+                continue
+            merged_content_blocks.append(
+                {
+                    "type": "tool_use",
+                    "id": tool_call.id,
+                    "name": tool_call.name,
+                    "input": dict(tool_call.input),
+                }
+            )
+        stop_reason = response.stop_reason
+        if merged_tool_calls and stop_reason != "tool_calls":
+            stop_reason = "tool_calls"
+        return LLMResponse(
+            text=merged_text,
+            tool_calls=merged_tool_calls,
+            stop_reason=stop_reason,
+            input_tokens=response.input_tokens,
+            output_tokens=response.output_tokens,
+            cache_creation_input_tokens=response.cache_creation_input_tokens,
+            cache_read_input_tokens=response.cache_read_input_tokens,
+            thinking=response.thinking,
+            content_blocks=merged_content_blocks,
+        )
+    def _coerce_responses_payload(self, response: Any) -> dict[str, Any]:
+        if isinstance(response, str):
+            return self._parse_responses_sse(response)
+        if isinstance(response, dict):
+            return response
+        if hasattr(response, "to_dict"):
+            payload = response.to_dict()
+            if isinstance(payload, dict):
+                return payload
+        raise TypeError(f"Unsupported Responses API payload: {type(response).__name__}")
+    def _parse_responses_sse(self, payload: str) -> dict[str, Any]:
+        last_response: dict[str, Any] | None = None
+        for line in payload.splitlines():
+            if not line.startswith("data: "):
+                continue
+            raw_json = line[6:].strip()
+            if not raw_json:
+                continue
+            event = json.loads(raw_json)
+            if event.get("type") == "response.completed":
+                return dict(event.get("response", {}))
+            if isinstance(event.get("response"), dict):
+                last_response = dict(event["response"])
+        if last_response is not None:
+            return last_response
+        raise ValueError("Could not parse Responses API payload.")
+    def _extract_responses_error(self, response: Any) -> str:
+        error = getattr(response, "error", None)
+        if error is None:
+            return ""
+        message = getattr(error, "message", None)
+        if message:
+            return str(message)
+        return self._stringify_content(error)
+    def _finalize_tool_calls(
+        self,
+        pending_tool_calls: dict[int, dict[str, str]],
+    ) -> list[ToolCall]:
+        tool_calls: list[ToolCall] = []
+        for index in sorted(pending_tool_calls):
+            tool_call = pending_tool_calls[index]
+            tool_calls.append(
+                ToolCall(
+                    id=tool_call["id"],
+                    name=tool_call["name"],
+                    input=self._parse_tool_input(tool_call["arguments"] or "{}"),
+                )
+            )
+        return tool_calls
+    _fallback_counter = itertools.count(1)
+    def _iter_new_tool_calls(
+        self,
+        tool_calls: list[ToolCall],
+        emitted_tool_call_ids: set[str],
+    ):
+        for tool_call in tool_calls:
+            if not tool_call.id:
+                tool_call.id = f"_fallback_{next(OpenAIProvider._fallback_counter)}"
+            if tool_call.id in emitted_tool_call_ids:
+                continue
+            emitted_tool_call_ids.add(tool_call.id)
+            yield tool_call
+    def _parse_tool_input(self, arguments: str) -> dict[str, Any]:
+        try:
+            parsed_input = json.loads(arguments)
+        except json.JSONDecodeError:
+            parsed_input = {"raw_arguments": arguments}
+        if not isinstance(parsed_input, dict):
+            parsed_input = {"value": parsed_input}
+        return parsed_input