PyPI - agentkernel-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

agentkernel-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

agentkernel/__init__.py +7 -0
agentkernel/__main__.py +5 -0
agentkernel/agent.py +311 -0
agentkernel/approval/__init__.py +23 -0
agentkernel/approval/base.py +34 -0
agentkernel/approval/cli.py +129 -0
agentkernel/approval/policy.py +58 -0
agentkernel/approval/risk.py +91 -0
agentkernel/approval/sandbox.py +201 -0
agentkernel/budget.py +64 -0
agentkernel/checkpoint.py +50 -0
agentkernel/cli.py +1482 -0
agentkernel/config.py +224 -0
agentkernel/context/__init__.py +17 -0
agentkernel/context/manager.py +216 -0
agentkernel/context/truncate.py +35 -0
agentkernel/cron.py +146 -0
agentkernel/curation.py +183 -0
agentkernel/doctor.py +141 -0
agentkernel/embeddings.py +132 -0
agentkernel/evaluation.py +186 -0
agentkernel/improvement.py +133 -0
agentkernel/insights.py +141 -0
agentkernel/kanban.py +114 -0
agentkernel/knowledge.py +383 -0
agentkernel/loops.py +145 -0
agentkernel/mcp/__init__.py +23 -0
agentkernel/mcp/client.py +181 -0
agentkernel/mcp/config.py +59 -0
agentkernel/mcp/tools.py +96 -0
agentkernel/memory.py +1208 -0
agentkernel/paths.py +73 -0
agentkernel/plugins.py +76 -0
agentkernel/profiles.py +70 -0
agentkernel/progress.py +89 -0
agentkernel/providers/__init__.py +35 -0
agentkernel/providers/_http.py +157 -0
agentkernel/providers/anthropic.py +282 -0
agentkernel/providers/base.py +38 -0
agentkernel/providers/credentials.py +65 -0
agentkernel/providers/local.py +34 -0
agentkernel/providers/openai.py +260 -0
agentkernel/redaction.py +77 -0
agentkernel/semantic_index.py +139 -0
agentkernel/semantic_memory.py +253 -0
agentkernel/skills.py +268 -0
agentkernel/subagent.py +161 -0
agentkernel/telemetry.py +199 -0
agentkernel/templates/README.md +35 -0
agentkernel/templates/SKILL.md +28 -0
agentkernel/templates/eval-suite.toml +22 -0
agentkernel/templates/loop.toml +29 -0
agentkernel/templates/mcp-servers.toml +22 -0
agentkernel/templates/profile.toml +29 -0
agentkernel/templates/tool_module.py +64 -0
agentkernel/tools/__init__.py +5 -0
agentkernel/tools/base.py +100 -0
agentkernel/tools/builtin/__init__.py +37 -0
agentkernel/tools/builtin/checkpoint_tool.py +33 -0
agentkernel/tools/builtin/clarify.py +60 -0
agentkernel/tools/builtin/files.py +221 -0
agentkernel/tools/builtin/kanban_tool.py +100 -0
agentkernel/tools/builtin/search.py +225 -0
agentkernel/tools/builtin/shell.py +67 -0
agentkernel/tools/builtin/todo.py +106 -0
agentkernel/tui/__init__.py +50 -0
agentkernel/tui/app.py +594 -0
agentkernel/types.py +127 -0
agentkernel/worktree.py +64 -0
agentkernel_cli-0.1.0.dist-info/METADATA +426 -0
agentkernel_cli-0.1.0.dist-info/RECORD +74 -0
agentkernel_cli-0.1.0.dist-info/WHEEL +4 -0
agentkernel_cli-0.1.0.dist-info/entry_points.txt +2 -0
agentkernel_cli-0.1.0.dist-info/licenses/LICENSE +201 -0

agentkernel/providers/anthropic.py ADDED Viewed

@@ -0,0 +1,282 @@
+"""Anthropic Messages API adapter (design §5, §8.1, §9.3).
+Wire shape: assistant ``tool_use`` content blocks; all tool results for a turn
+go in a single ``user`` message of ``tool_result`` blocks keyed by
+``tool_use_id``. The stable prefix (system + tool defs) carries
+``cache_control: ephemeral`` on its final element so Anthropic serves it from
+cache. No Anthropic dict escapes this module except inside ``CompletionResponse.raw``.
+"""
+from __future__ import annotations
+import json
+from collections.abc import Callable, Iterable
+from typing import Any
+from agentkernel.providers._http import ProviderError, post_json_pooled, stream_sse
+from agentkernel.providers.credentials import CredentialPool
+from agentkernel.tools import ToolSpec
+from agentkernel.types import CompletionResponse, Message, ToolCall, Usage
+API_URL = "https://api.anthropic.com/v1/messages"
+API_VERSION = "2023-06-01"
+DEFAULT_CONTEXT_WINDOW = 200_000
+_EPHEMERAL = {"type": "ephemeral"}
+# --- translation: canonical -> wire (pure, offline-testable) ---------------
+def render_tools(tools: list[ToolSpec]) -> list[dict[str, Any]]:
+    """Render tool specs to Anthropic's schema, caching the prefix at the last
+    tool. Order is preserved (never re-sorted) so the prefix stays byte-stable."""
+    wire: list[dict[str, Any]] = [
+        {"name": t.name, "description": t.description, "input_schema": t.parameters}
+        for t in tools
+    ]
+    if wire:
+        wire[-1]["cache_control"] = _EPHEMERAL  # prefix boundary (design §9.3)
+    return wire
+_THINKING_BUDGET = {"low": 1024, "medium": 4096, "high": 8192}
+def thinking_config(reasoning: str | None, max_tokens: int) -> dict[str, Any] | None:
+    """Map a reasoning level to an extended-thinking block, or None.
+    The budget is capped below ``max_tokens`` (thinking must leave room for the
+    reply); if there isn't enough room, thinking is skipped rather than erroring.
+    Thinking blocks in the response are already ignored by ``parse_response``.
+    """
+    if not reasoning:
+        return None
+    headroom = max_tokens - 1024
+    if headroom < 1024:
+        return None
+    budget = min(_THINKING_BUDGET.get(reasoning, 4096), headroom)
+    return {"type": "enabled", "budget_tokens": budget}
+def render_system(system: str | None) -> list[dict[str, Any]] | None:
+    """System prompt as a cached text block, or None when absent."""
+    if not system:
+        return None
+    return [{"type": "text", "text": system, "cache_control": _EPHEMERAL}]
+def render_messages(messages: list[Message]) -> list[dict[str, Any]]:
+    out: list[dict[str, Any]] = []
+    for m in messages:
+        if m.role == "user":
+            out.append({"role": "user", "content": m.content})
+        elif m.role == "assistant":
+            if m.tool_calls:
+                blocks: list[dict[str, Any]] = []
+                if m.content:
+                    blocks.append({"type": "text", "text": m.content})
+                blocks.extend(
+                    {
+                        "type": "tool_use",
+                        "id": tc.id,
+                        "name": tc.name,
+                        "input": tc.arguments,
+                    }
+                    for tc in m.tool_calls
+                )
+                out.append({"role": "assistant", "content": blocks})
+            else:
+                out.append({"role": "assistant", "content": m.content})
+        elif m.role == "tool":
+            # All results for the turn in one user message (design §8.1).
+            out.append(
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": r.call_id,
+                            "content": r.content,
+                            "is_error": r.is_error,
+                        }
+                        for r in m.tool_results
+                    ],
+                }
+            )
+        # role == "system" is delivered via the `system` param, not as a message.
+    return out
+# --- translation: wire -> canonical ----------------------------------------
+def parse_response(data: dict[str, Any]) -> CompletionResponse:
+    text_parts: list[str] = []
+    tool_calls: list[ToolCall] = []
+    for block in data.get("content", []):
+        btype = block.get("type")
+        if btype == "text":
+            text_parts.append(block.get("text", ""))
+        elif btype == "tool_use":
+            args = block.get("input")
+            tool_calls.append(
+                ToolCall(
+                    id=block["id"],
+                    name=block["name"],
+                    arguments=args if isinstance(args, dict) else {},
+                )
+            )
+    u = data.get("usage", {})
+    usage = Usage(
+        input_tokens=u.get("input_tokens", 0),
+        output_tokens=u.get("output_tokens", 0),
+        cache_read_tokens=u.get("cache_read_input_tokens", 0),
+        cache_write_tokens=u.get("cache_creation_input_tokens", 0),
+    )
+    return CompletionResponse(
+        message=Message(
+            role="assistant", content="".join(text_parts), tool_calls=tool_calls
+        ),
+        usage=usage,
+        stop_reason=data.get("stop_reason", ""),
+        raw=data,
+    )
+def accumulate_stream(
+    events: Iterable[dict[str, Any]],
+    on_text: Callable[[str], None] | None = None,
+) -> dict[str, Any]:
+    """Fold Anthropic SSE events into a single non-streaming response dict.
+    Text deltas forward to ``on_text``; tool_use blocks accumulate their
+    ``input_json`` fragments and are parsed at the end. The result is exactly
+    what ``parse_response`` consumes."""
+    blocks: dict[int, dict[str, Any]] = {}
+    usage: dict[str, Any] = {}
+    stop_reason = ""
+    for event in events:
+        etype = event.get("type")
+        if etype == "message_start":
+            usage.update(event.get("message", {}).get("usage", {}) or {})
+        elif etype == "content_block_start":
+            cb = event.get("content_block", {})
+            blocks[event.get("index", 0)] = {
+                "type": cb.get("type"),
+                "text": cb.get("text", "") or "",
+                "id": cb.get("id"),
+                "name": cb.get("name"),
+                "json": "",
+            }
+        elif etype == "content_block_delta":
+            block = blocks.setdefault(
+                event.get("index", 0), {"type": "text", "text": "", "json": ""}
+            )
+            delta = event.get("delta", {})
+            if delta.get("type") == "text_delta":
+                text = delta.get("text", "")
+                block["text"] += text
+                if on_text is not None and text:
+                    on_text(text)
+            elif delta.get("type") == "input_json_delta":
+                block["json"] += delta.get("partial_json", "")
+            elif delta.get("type") == "thinking_delta":
+                # Extended thinking: shown live but not part of the answer.
+                if on_text is not None:
+                    on_text(delta.get("thinking", ""))
+        elif etype == "message_delta":
+            stop_reason = event.get("delta", {}).get("stop_reason", stop_reason)
+            usage.update(event.get("usage", {}) or {})
+    content: list[dict[str, Any]] = []
+    for _index, block in sorted(blocks.items()):
+        if block.get("type") == "text":
+            content.append({"type": "text", "text": block["text"]})
+        elif block.get("type") == "tool_use":
+            try:
+                parsed = json.loads(block["json"] or "{}")
+            except json.JSONDecodeError:
+                parsed = {}
+            content.append(
+                {"type": "tool_use", "id": block["id"], "name": block["name"], "input": parsed}
+            )
+    return {"content": content, "stop_reason": stop_reason, "usage": usage}
+class AnthropicProvider:
+    name = "anthropic"
+    def __init__(
+        self,
+        model: str,
+        *,
+        api_key: str | None = None,
+        context_window: int = DEFAULT_CONTEXT_WINDOW,
+    ) -> None:
+        self.model = model
+        self.context_window = context_window
+        self._pool = (
+            CredentialPool([api_key]) if api_key
+            else CredentialPool.from_env("ANTHROPIC_API_KEY")
+        )
+    def with_model(self, model: str) -> AnthropicProvider:
+        """A copy of this provider bound to a different model (shares credentials)."""
+        clone = AnthropicProvider.__new__(AnthropicProvider)
+        clone.model = model
+        clone.context_window = self.context_window
+        clone._pool = self._pool
+        return clone
+    def complete(
+        self,
+        messages: list[Message],
+        tools: list[ToolSpec],
+        *,
+        max_tokens: int,
+        temperature: float = 1.0,
+        system: str | None = None,
+        reasoning: str | None = None,
+        on_text: Callable[[str], None] | None = None,
+    ) -> CompletionResponse:
+        if self._pool.current() is None:
+            raise ProviderError("ANTHROPIC_API_KEY is not set in the environment")
+        thinking = thinking_config(reasoning, max_tokens)
+        payload: dict[str, Any] = {
+            "model": self.model,
+            "max_tokens": max_tokens,
+            # Extended thinking requires temperature 1; otherwise honor the caller.
+            "temperature": 1.0 if thinking else temperature,
+            "messages": render_messages(messages),
+        }
+        if thinking is not None:
+            payload["thinking"] = thinking
+        if tools:
+            payload["tools"] = render_tools(tools)
+        sys_blocks = render_system(system)
+        if sys_blocks is not None:
+            payload["system"] = sys_blocks
+        def header_for_key(key: str | None) -> dict[str, str]:
+            return {
+                "x-api-key": key or "",
+                "anthropic-version": API_VERSION,
+                "content-type": "application/json",
+            }
+        if on_text is not None:
+            # Best-effort streaming with a non-streaming fallback on any fault.
+            try:
+                events = stream_sse(
+                    API_URL,
+                    headers=header_for_key(self._pool.current()),
+                    payload={**payload, "stream": True},
+                )
+                return parse_response(accumulate_stream(events, on_text))
+            except ProviderError:
+                pass
+        return parse_response(
+            post_json_pooled(
+                API_URL, header_for_key=header_for_key, payload=payload, pool=self._pool
+            )
+        )

agentkernel/providers/base.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""The Provider protocol (design §5.1).
+An adapter translates the canonical message/tool types to a provider's wire
+format, calls the API, and translates the reply back into one
+``CompletionResponse``. No provider-specific object escapes an adapter except
+inside ``CompletionResponse.raw``.
+"""
+from __future__ import annotations
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Protocol
+from agentkernel.types import CompletionResponse, Message
+if TYPE_CHECKING:
+    from agentkernel.tools import ToolSpec
+class Provider(Protocol):
+    name: str
+    context_window: int  # total token capacity of the selected model
+    def complete(
+        self,
+        messages: list[Message],
+        tools: list[ToolSpec],
+        *,
+        max_tokens: int,
+        temperature: float = 1.0,
+        system: str | None = None,
+        reasoning: str | None = None,
+        on_text: Callable[[str], None] | None = None,
+    ) -> CompletionResponse:
+        """Complete one turn. When ``on_text`` is given, the adapter streams and
+        calls it with each text delta; the returned ``CompletionResponse`` is the
+        same as the non-streaming result (the loop contract is unchanged)."""
+        ...

agentkernel/providers/credentials.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""Credential pools for providers (design §18.5).
+A provider can be given several API keys and rotate to the next one when the
+current key is rate-limited or exhausted. Keys still come only from the
+environment (design §11): a pool is read from one env var that may hold a
+comma-separated list, plus numbered siblings ``<VAR>_1``, ``<VAR>_2``, …
+A single key is just a pool of one, so existing single-key setups are unchanged.
+"""
+from __future__ import annotations
+import os
+class CredentialPool:
+    """An ordered set of API keys with a rotating cursor."""
+    def __init__(self, keys: list[str]) -> None:
+        # Dedupe, preserving order; drop blanks.
+        seen: set[str] = set()
+        self._keys: list[str] = []
+        for k in keys:
+            k = (k or "").strip()
+            if k and k not in seen:
+                seen.add(k)
+                self._keys.append(k)
+        self._idx = 0
+        self._exhausted: set[int] = set()
+    @classmethod
+    def from_env(cls, env_var: str, *, env: dict[str, str] | None = None) -> CredentialPool:
+        """Collect keys from ``env_var`` (comma-separated) and ``env_var_1..N``."""
+        env = os.environ if env is None else env
+        keys: list[str] = [p.strip() for p in (env.get(env_var) or "").split(",")]
+        i = 1
+        while True:
+            value = env.get(f"{env_var}_{i}")
+            if not value:
+                break
+            keys.append(value)
+            i += 1
+        return cls(keys)
+    def __len__(self) -> int:
+        return len(self._keys)
+    def current(self) -> str | None:
+        """The active key, or None if the pool is empty."""
+        return self._keys[self._idx] if self._keys else None
+    def mark_exhausted(self) -> None:
+        """Flag the active key as exhausted (rate-limited) for this session."""
+        if self._keys:
+            self._exhausted.add(self._idx)
+    def rotate(self) -> bool:
+        """Advance to the next key that isn't exhausted. False if none remain."""
+        n = len(self._keys)
+        for step in range(1, n):
+            j = (self._idx + step) % n
+            if j not in self._exhausted:
+                self._idx = j
+                return True
+        return False

agentkernel/providers/local.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""Local / OpenAI-compatible endpoint adapter (design §5.2).
+Same wire shape as OpenAI (Ollama, vLLM, LM Studio, …) with a configurable
+``base_url`` and optional auth. No prompt caching is assumed, so the turn-2
+cache check in M1 does not apply to this provider.
+"""
+from __future__ import annotations
+from agentkernel.providers.openai import OpenAIProvider
+DEFAULT_BASE_URL = "http://localhost:11434/v1"  # Ollama default
+DEFAULT_CONTEXT_WINDOW = 8192
+class LocalProvider(OpenAIProvider):
+    def __init__(
+        self,
+        model: str,
+        *,
+        base_url: str = DEFAULT_BASE_URL,
+        api_key: str | None = None,
+        context_window: int = DEFAULT_CONTEXT_WINDOW,
+    ) -> None:
+        super().__init__(
+            model,
+            api_key=api_key,
+            base_url=base_url,
+            context_window=context_window,
+            name="local",
+            require_key=False,  # local endpoints commonly need no key
+            env_key="LOCAL_API_KEY",
+            send_reasoning=False,  # arbitrary local models may reject reasoning_effort
+        )

agentkernel/providers/openai.py ADDED Viewed

@@ -0,0 +1,260 @@
+"""OpenAI Chat Completions adapter (design §5, §8.1).
+Wire shape: assistant ``tool_calls`` array (arguments are JSON *strings*); each
+tool result is its own ``role: "tool"`` message keyed by ``tool_call_id``.
+OpenAI caches the prefix automatically, so there are no explicit cache markers —
+``cache_read_tokens`` is read back from ``usage.prompt_tokens_details``.
+"""
+from __future__ import annotations
+import json
+from collections.abc import Callable, Iterable
+from typing import Any
+from agentkernel.providers._http import ProviderError, post_json_pooled, stream_sse
+from agentkernel.providers.credentials import CredentialPool
+from agentkernel.tools import ToolSpec
+from agentkernel.types import CompletionResponse, Message, ToolCall, Usage
+DEFAULT_BASE_URL = "https://api.openai.com/v1"
+DEFAULT_CONTEXT_WINDOW = 128_000
+_STOP_REASONS = {"stop": "end_turn", "tool_calls": "tool_use", "length": "max_tokens"}
+# --- translation: canonical -> wire (pure, offline-testable) ---------------
+def render_tools(tools: list[ToolSpec]) -> list[dict[str, Any]]:
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": t.name,
+                "description": t.description,
+                "parameters": t.parameters,
+            },
+        }
+        for t in tools
+    ]
+def render_messages(
+    messages: list[Message], system: str | None = None
+) -> list[dict[str, Any]]:
+    out: list[dict[str, Any]] = []
+    if system:
+        out.append({"role": "system", "content": system})
+    for m in messages:
+        if m.role == "user":
+            out.append({"role": "user", "content": m.content})
+        elif m.role == "assistant":
+            msg: dict[str, Any] = {"role": "assistant", "content": m.content or None}
+            if m.tool_calls:
+                msg["tool_calls"] = [
+                    {
+                        "id": tc.id,
+                        "type": "function",
+                        "function": {
+                            "name": tc.name,
+                            "arguments": json.dumps(tc.arguments),
+                        },
+                    }
+                    for tc in m.tool_calls
+                ]
+            out.append(msg)
+        elif m.role == "tool":
+            # One message per result, keyed by tool_call_id (design §8.1).
+            out.extend(
+                {"role": "tool", "tool_call_id": r.call_id, "content": r.content}
+                for r in m.tool_results
+            )
+        # role == "system" messages are delivered via the `system` param.
+    return out
+# --- translation: wire -> canonical ----------------------------------------
+def parse_response(data: dict[str, Any]) -> CompletionResponse:
+    choice = data["choices"][0]
+    msg = choice.get("message", {})
+    tool_calls: list[ToolCall] = []
+    for tc in msg.get("tool_calls") or []:
+        fn = tc.get("function", {})
+        try:
+            args = json.loads(fn.get("arguments") or "{}")
+        except json.JSONDecodeError:
+            args = {}  # malformed JSON surfaces as a validation error in §6
+        tool_calls.append(
+            ToolCall(id=tc["id"], name=fn.get("name", ""), arguments=args)
+        )
+    u = data.get("usage", {})
+    cached = (u.get("prompt_tokens_details") or {}).get("cached_tokens", 0)
+    usage = Usage(
+        input_tokens=u.get("prompt_tokens", 0),
+        output_tokens=u.get("completion_tokens", 0),
+        cache_read_tokens=cached,
+    )
+    finish = choice.get("finish_reason", "")
+    return CompletionResponse(
+        message=Message(
+            role="assistant",
+            content=msg.get("content") or "",
+            tool_calls=tool_calls,
+        ),
+        usage=usage,
+        stop_reason=_STOP_REASONS.get(finish, finish),
+        raw=data,
+    )
+def accumulate_stream(
+    events: Iterable[dict[str, Any]],
+    on_text: Callable[[str], None] | None = None,
+) -> dict[str, Any]:
+    """Fold OpenAI streaming chunks into a single non-streaming response dict.
+    Text deltas are forwarded to ``on_text``; ``tool_calls`` deltas are
+    accumulated by index (id/name arrive once, arguments arrive in fragments).
+    The result is exactly what ``parse_response`` consumes."""
+    content: list[str] = []
+    tool_calls: dict[int, dict[str, str]] = {}
+    finish = ""
+    usage: dict[str, Any] = {}
+    for event in events:
+        if event.get("usage"):
+            usage = event["usage"]
+        for choice in event.get("choices", []):
+            delta = choice.get("delta", {})
+            text = delta.get("content")
+            if text:
+                content.append(text)
+                if on_text is not None:
+                    on_text(text)
+            # Reasoning models (e.g. via LM Studio) stream their thinking on a
+            # separate channel — show it live, but it is not part of the answer.
+            reasoning = delta.get("reasoning_content")
+            if reasoning and on_text is not None:
+                on_text(reasoning)
+            for tc in delta.get("tool_calls") or []:
+                slot = tool_calls.setdefault(
+                    tc.get("index", 0), {"id": "", "name": "", "arguments": ""}
+                )
+                if tc.get("id"):
+                    slot["id"] = tc["id"]
+                fn = tc.get("function", {})
+                if fn.get("name"):
+                    slot["name"] = fn["name"]
+                if fn.get("arguments"):
+                    slot["arguments"] += fn["arguments"]
+            if choice.get("finish_reason"):
+                finish = choice["finish_reason"]
+    message: dict[str, Any] = {"content": "".join(content) or None}
+    if tool_calls:
+        message["tool_calls"] = [
+            {
+                "id": slot["id"],
+                "type": "function",
+                "function": {"name": slot["name"], "arguments": slot["arguments"]},
+            }
+            for _index, slot in sorted(tool_calls.items())
+        ]
+    return {"choices": [{"message": message, "finish_reason": finish}], "usage": usage}
+class OpenAIProvider:
+    def __init__(
+        self,
+        model: str,
+        *,
+        api_key: str | None = None,
+        base_url: str = DEFAULT_BASE_URL,
+        context_window: int = DEFAULT_CONTEXT_WINDOW,
+        name: str = "openai",
+        require_key: bool = True,
+        env_key: str = "OPENAI_API_KEY",
+        send_reasoning: bool = True,
+    ) -> None:
+        self.name = name
+        self.model = model
+        self.context_window = context_window
+        self._base_url = base_url.rstrip("/")
+        self._require_key = require_key
+        self._send_reasoning = send_reasoning
+        self._pool = (
+            CredentialPool([api_key]) if api_key else CredentialPool.from_env(env_key)
+        )
+    def with_model(self, model: str) -> OpenAIProvider:
+        """A copy of this provider bound to a different model (shares credentials).
+        Used to honor a profile's ``model_override`` for one run without
+        rebuilding the credential pool or re-reading the environment."""
+        clone = OpenAIProvider.__new__(OpenAIProvider)
+        clone.name = self.name
+        clone.model = model
+        clone.context_window = self.context_window
+        clone._base_url = self._base_url
+        clone._require_key = self._require_key
+        clone._send_reasoning = self._send_reasoning
+        clone._pool = self._pool
+        return clone
+    def complete(
+        self,
+        messages: list[Message],
+        tools: list[ToolSpec],
+        *,
+        max_tokens: int,
+        temperature: float = 1.0,
+        system: str | None = None,
+        reasoning: str | None = None,
+        on_text: Callable[[str], None] | None = None,
+    ) -> CompletionResponse:
+        if self._require_key and self._pool.current() is None:
+            raise ProviderError(f"API key for provider {self.name!r} is not set")
+        payload: dict[str, Any] = {
+            "model": self.model,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "messages": render_messages(messages, system),
+        }
+        # reasoning_effort is honored by OpenAI reasoning models; only sent when a
+        # profile asks for it, and never for local endpoints that may reject it.
+        if reasoning and self._send_reasoning:
+            payload["reasoning_effort"] = reasoning
+        if tools:
+            payload["tools"] = render_tools(tools)
+        def header_for_key(key: str | None) -> dict[str, str]:
+            headers = {"content-type": "application/json"}
+            if key:
+                headers["Authorization"] = f"Bearer {key}"
+            return headers
+        url = f"{self._base_url}/chat/completions"
+        if on_text is not None:
+            # Best-effort streaming: on any transport/protocol fault, fall back to
+            # the non-streaming path so the turn still completes correctly.
+            try:
+                events = stream_sse(
+                    url,
+                    headers=header_for_key(self._pool.current()),
+                    payload={
+                        **payload,
+                        "stream": True,
+                        "stream_options": {"include_usage": True},
+                    },
+                )
+                return parse_response(accumulate_stream(events, on_text))
+            except ProviderError:
+                pass
+        return parse_response(
+            post_json_pooled(
+                url, header_for_key=header_for_key, payload=payload, pool=self._pool
+            )
+        )