PyPI - devrel-origin - Versions diffs - 0.2.14__py3-none-any.whl - Mend

devrel-origin 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

devrel_origin/__init__.py +15 -0
devrel_origin/cli/__init__.py +92 -0
devrel_origin/cli/_common.py +243 -0
devrel_origin/cli/analytics.py +28 -0
devrel_origin/cli/argus.py +497 -0
devrel_origin/cli/auth.py +227 -0
devrel_origin/cli/config.py +108 -0
devrel_origin/cli/content.py +259 -0
devrel_origin/cli/cost.py +108 -0
devrel_origin/cli/cro.py +298 -0
devrel_origin/cli/deliverables.py +65 -0
devrel_origin/cli/docs.py +91 -0
devrel_origin/cli/doctor.py +178 -0
devrel_origin/cli/experiment.py +29 -0
devrel_origin/cli/growth.py +97 -0
devrel_origin/cli/init.py +472 -0
devrel_origin/cli/intel.py +27 -0
devrel_origin/cli/kb.py +96 -0
devrel_origin/cli/listen.py +31 -0
devrel_origin/cli/marketing.py +66 -0
devrel_origin/cli/migrate.py +45 -0
devrel_origin/cli/run.py +46 -0
devrel_origin/cli/sales.py +57 -0
devrel_origin/cli/schedule.py +62 -0
devrel_origin/cli/synthesize.py +28 -0
devrel_origin/cli/triage.py +29 -0
devrel_origin/cli/video.py +35 -0
devrel_origin/core/__init__.py +58 -0
devrel_origin/core/agent_config.py +75 -0
devrel_origin/core/argus.py +964 -0
devrel_origin/core/atlas.py +1450 -0
devrel_origin/core/base.py +372 -0
devrel_origin/core/cyra.py +563 -0
devrel_origin/core/dex.py +708 -0
devrel_origin/core/echo.py +614 -0
devrel_origin/core/growth/__init__.py +27 -0
devrel_origin/core/growth/recommendations.py +219 -0
devrel_origin/core/growth/target_kinds.py +51 -0
devrel_origin/core/iris.py +513 -0
devrel_origin/core/kai.py +1367 -0
devrel_origin/core/llm.py +542 -0
devrel_origin/core/llm_backends.py +274 -0
devrel_origin/core/mox.py +514 -0
devrel_origin/core/nova.py +349 -0
devrel_origin/core/pax.py +1205 -0
devrel_origin/core/rex.py +532 -0
devrel_origin/core/sage.py +486 -0
devrel_origin/core/sentinel.py +385 -0
devrel_origin/core/types.py +98 -0
devrel_origin/core/video/__init__.py +22 -0
devrel_origin/core/video/assembler.py +131 -0
devrel_origin/core/video/browser_recorder.py +118 -0
devrel_origin/core/video/desktop_recorder.py +254 -0
devrel_origin/core/video/overlay_renderer.py +143 -0
devrel_origin/core/video/script_parser.py +147 -0
devrel_origin/core/video/tts_engine.py +82 -0
devrel_origin/core/vox.py +268 -0
devrel_origin/core/watchdog.py +321 -0
devrel_origin/project/__init__.py +1 -0
devrel_origin/project/config.py +75 -0
devrel_origin/project/cost_sink.py +61 -0
devrel_origin/project/init.py +104 -0
devrel_origin/project/paths.py +75 -0
devrel_origin/project/state.py +241 -0
devrel_origin/project/templates/__init__.py +4 -0
devrel_origin/project/templates/config.toml +24 -0
devrel_origin/project/templates/devrel.gitignore +10 -0
devrel_origin/project/templates/slop-blocklist.md +45 -0
devrel_origin/project/templates/style.md +24 -0
devrel_origin/project/templates/voice.md +29 -0
devrel_origin/quality/__init__.py +66 -0
devrel_origin/quality/editorial.py +357 -0
devrel_origin/quality/persona.py +84 -0
devrel_origin/quality/readability.py +148 -0
devrel_origin/quality/slop.py +167 -0
devrel_origin/quality/style.py +110 -0
devrel_origin/quality/voice.py +15 -0
devrel_origin/tools/__init__.py +9 -0
devrel_origin/tools/analytics.py +304 -0
devrel_origin/tools/api_client.py +393 -0
devrel_origin/tools/apollo_client.py +305 -0
devrel_origin/tools/code_validator.py +428 -0
devrel_origin/tools/github_tools.py +297 -0
devrel_origin/tools/instantly_client.py +412 -0
devrel_origin/tools/kb_harvester.py +340 -0
devrel_origin/tools/mcp_server.py +578 -0
devrel_origin/tools/notifications.py +245 -0
devrel_origin/tools/run_report.py +193 -0
devrel_origin/tools/scheduler.py +231 -0
devrel_origin/tools/search_tools.py +321 -0
devrel_origin/tools/self_improve.py +168 -0
devrel_origin/tools/sheets.py +236 -0
devrel_origin-0.2.14.dist-info/METADATA +354 -0
devrel_origin-0.2.14.dist-info/RECORD +98 -0
devrel_origin-0.2.14.dist-info/WHEEL +5 -0
devrel_origin-0.2.14.dist-info/entry_points.txt +2 -0
devrel_origin-0.2.14.dist-info/licenses/LICENSE +21 -0
devrel_origin-0.2.14.dist-info/top_level.txt +1 -0

devrel_origin/core/llm.py ADDED Viewed

@@ -0,0 +1,542 @@
+"""Shared LLM client wrapper for all agents.
+Multi-provider via the LLMBackend abstraction in core/llm_backends.py:
+AnthropicBackend (default) and OpenRouterBackend. Per-agent model overrides
+flow through `agent_models={agent_name: model_id}` so e.g. Argus can run on
+a cheap classification model while Kai sticks to a high-quality writer.
+Cost tracking, budget gating, and agent-attribution stay in this layer; the
+backend's only job is the actual chat call.
+"""
+import asyncio
+import json
+import logging
+from collections.abc import Awaitable
+from contextlib import contextmanager
+from contextvars import ContextVar
+from dataclasses import dataclass, field
+from typing import Any, Callable
+from devrel_origin.core.base import strip_markdown_fences
+from devrel_origin.core.llm_backends import (
+    ANTHROPIC_DEFAULT_MODEL,
+    ANTHROPIC_MODELS,
+    LLMBackend,
+    make_backend,
+)
+logger = logging.getLogger(__name__)
+_current_agent_var: ContextVar[str] = ContextVar("devrel_origin_current_agent", default="")
+DEFAULT_MODEL = ANTHROPIC_DEFAULT_MODEL
+DEFAULT_MAX_TOKENS = 4096
+# Backwards-compatible alias map. Re-exported for callers that imported it
+# directly; new code should rely on the backend's resolve_alias().
+MODELS = dict(ANTHROPIC_MODELS)
+# Cost per million tokens (USD), used for budget tracking. Anthropic ids
+# stay un-prefixed; OpenRouter pass-through entries (`anthropic/...`,
+# `openai/...`) are stripped to their base model id at lookup time so we
+# don't have to duplicate every Anthropic price under both keys.
+MODEL_COSTS: dict[str, dict[str, float]] = {
+    "claude-opus-4-0-20250514": {"input": 15.0, "output": 75.0},
+    DEFAULT_MODEL: {"input": 3.0, "output": 15.0},
+    "claude-haiku-4-5-20251001": {"input": 0.80, "output": 4.0},
+    # OpenAI models routed via OpenRouter; pricing per OpenAI public list.
+    "gpt-4o": {"input": 2.5, "output": 10.0},
+    "gpt-4o-mini": {"input": 0.15, "output": 0.60},
+    "gpt-4-turbo": {"input": 10.0, "output": 30.0},
+}
+def _lookup_cost(model: str) -> dict[str, float]:
+    """Return the per-million-token pricing for a model id.
+    Accepts both bare ids ('claude-sonnet-4-5-20250929') and OpenRouter-
+    style provider-prefixed ids ('anthropic/claude-sonnet-4-5-20250929',
+    'openai/gpt-4o-mini'); falls back to the bare id after splitting the
+    provider prefix once. Unknown models price at zero so the cost ledger
+    doesn't crash, but we lose accuracy.
+    """
+    if model in MODEL_COSTS:
+        return MODEL_COSTS[model]
+    if "/" in model:
+        bare = model.split("/", 1)[1]
+        if bare in MODEL_COSTS:
+            return MODEL_COSTS[bare]
+    return {"input": 0.0, "output": 0.0}
+_CRITIQUE_CRITERIA: dict[str, str] = {
+    "content": (
+        "1. ACCURACY — Are claims grounded in the provided context? Any hallucinated facts?\n"
+        "2. CLARITY — Is the writing clear, scannable, and free of jargon-for-jargon's-sake?\n"
+        "3. ACTIONABILITY — Does the reader leave with something concrete to do?\n"
+        "4. STRUCTURE — Logical flow, good heading hierarchy, appropriate length?\n"
+        "5. VOICE — Developer-authentic, not marketing fluff or AI slop?\n"
+        "6. CODE QUALITY — Are code examples complete, correct, and well-commented?"
+    ),
+    "sales": (
+        "1. ACCURACY — Are claims grounded in product facts? No overpromising?\n"
+        "2. CLARITY — Is the message scannable, short paragraphs, no filler?\n"
+        "3. PERSUASIVENESS — Does it sell the next step, not the whole product?\n"
+        "4. PERSONALIZATION — Does it reference the recipient's specific situation?\n"
+        "5. VOICE — Developer-aware, not corporate marketing speak?\n"
+        "6. CTA — One clear, low-friction call to action?"
+    ),
+    "marketing": (
+        "1. ACCURACY — Are claims grounded in product knowledge base?\n"
+        "2. CLARITY — Short paragraphs, clear hierarchy, mobile-readable?\n"
+        "3. DIFFERENTIATION — Does it position against alternatives with evidence?\n"
+        "4. STRUCTURE — Appropriate format for the content type (blog/landing/social)?\n"
+        "5. VOICE — Developer-authentic, storytelling over selling?\n"
+        "6. CTA — One clear next step per piece?"
+    ),
+}
+CRITIQUE_PROMPT = """You are a senior content editor. Review the following draft and
+provide a structured critique as JSON.
+## Draft
+{draft}
+## Evaluation Criteria
+{criteria}
+Return ONLY a JSON object:
+{{
+  "overall_score": <1-10>,
+  "issues": [
+    {{"criterion": "...", "severity": "high|medium|low", "description": "...", "fix": "..."}}
+  ],
+  "strengths": ["..."]
+}}"""
+REVISE_PROMPT = """Revise the following draft by applying all editorial feedback.
+For fixes that require additions (missing sections, examples), add them.
+For fixes that require cuts (over-long paragraphs, buzzwords), remove them.
+For fixes that require rewrites, rewrite only the affected section.
+Do not change sections that have no associated issues.
+Return ONLY the revised content, no preamble or commentary.
+## Original Draft
+{draft}
+## Editorial Feedback
+{critique}
+## Revised Draft"""
+@dataclass
+class CritiqueResult:
+    """Result of an editorial critique."""
+    overall_score: int = 0
+    issues: list[dict[str, str]] = field(default_factory=list)
+    strengths: list[str] = field(default_factory=list)
+    @property
+    def revision_needed(self) -> bool:
+        """Computed: revise if score < 7 or any high-severity issue exists."""
+        if self.overall_score < 7:
+            return True
+        return any(i.get("severity") == "high" for i in self.issues)
+    @classmethod
+    def from_json(cls, raw: str) -> "CritiqueResult":
+        cleaned = strip_markdown_fences(raw)
+        try:
+            data = json.loads(cleaned)
+        except json.JSONDecodeError:
+            return cls(overall_score=5)
+        return cls(
+            overall_score=data.get("overall_score", 5),
+            issues=data.get("issues", []),
+            strengths=data.get("strengths", []),
+        )
+@dataclass
+class RevisionTrace:
+    """Tracks the full revision history for a generation."""
+    drafts: list[str] = field(default_factory=list)
+    critiques: list[CritiqueResult] = field(default_factory=list)
+    final_score: int = 0
+    revision_rounds: int = 0
+@dataclass
+class TokenUsage:
+    """Tracks cumulative token usage, cost, and per-agent breakdown."""
+    total_input_tokens: int = 0
+    total_output_tokens: int = 0
+    total_calls: int = 0
+    total_cost_usd: float = 0.0
+    per_agent: dict[str, dict[str, Any]] = field(default_factory=dict)
+    def record(
+        self,
+        input_tokens: int,
+        output_tokens: int,
+        agent: str = "",
+        model: str = "",
+    ) -> None:
+        self.total_input_tokens += input_tokens
+        self.total_output_tokens += output_tokens
+        self.total_calls += 1
+        # Compute cost. _lookup_cost handles both bare Anthropic ids and
+        # OpenRouter-style 'provider/model' paths so the ledger works for
+        # whichever backend ran the call.
+        costs = _lookup_cost(model) if model else MODEL_COSTS[DEFAULT_MODEL]
+        call_cost = (
+            input_tokens * costs["input"] / 1_000_000 + output_tokens * costs["output"] / 1_000_000
+        )
+        self.total_cost_usd += call_cost
+        if agent:
+            if agent not in self.per_agent:
+                self.per_agent[agent] = {
+                    "input_tokens": 0,
+                    "output_tokens": 0,
+                    "calls": 0,
+                    "cost_usd": 0.0,
+                }
+            self.per_agent[agent]["input_tokens"] += input_tokens
+            self.per_agent[agent]["output_tokens"] += output_tokens
+            self.per_agent[agent]["calls"] += 1
+            self.per_agent[agent]["cost_usd"] += call_cost
+    def to_dict(self) -> dict:
+        return {
+            "total_input_tokens": self.total_input_tokens,
+            "total_output_tokens": self.total_output_tokens,
+            "total_calls": self.total_calls,
+            "total_cost_usd": round(self.total_cost_usd, 4),
+            "per_agent": {
+                k: {**v, "cost_usd": round(v["cost_usd"], 4)} for k, v in self.per_agent.items()
+            },
+        }
+class LLMClient:
+    """Async LLM client with multi-provider routing, per-agent model
+    overrides, and budget enforcement.
+    Supports per-call model overrides for cost optimization:
+    - Use "haiku" for classification/extraction tasks
+    - Use "opus" for high-quality content generation
+    - Use default "sonnet" for everything else
+    Per-agent overrides via `agent_models={agent_name: model_id}` are consulted
+    inside `_resolve_model` and win over the call-site default; explicit
+    `model=` arguments still win over the per-agent setting. The ContextVar
+    set by `agent_context()` drives the lookup so concurrent agents under
+    `asyncio.gather` each get their own configured model.
+    Budget enforcement: when total spend exceeds ``budget_limit_usd``, future
+    calls are forced to the backend's `cheap_model` (Haiku for Anthropic,
+    `anthropic/claude-haiku-4-5-...` for OpenRouter).
+    """
+    def __init__(
+        self,
+        api_key: str = "",
+        model: str = "",
+        max_tokens: int = DEFAULT_MAX_TOKENS,
+        budget_limit_usd: float = 0.0,
+        *,
+        backend: LLMBackend | None = None,
+        provider: str | None = None,
+        openrouter_api_key: str = "",
+        agent_models: dict[str, str] | None = None,
+    ):
+        # Backend resolution: caller-supplied wins; otherwise auto-detect from
+        # explicit `provider` arg or env vars (OPENROUTER_API_KEY presence).
+        self._backend: LLMBackend = backend or make_backend(
+            provider=provider,
+            anthropic_api_key=api_key,
+            openrouter_api_key=openrouter_api_key,
+        )
+        # Per-instance default model. Empty defers to the backend's default,
+        # which keeps callers that pass api_key but no model on the right
+        # default for whichever backend was auto-selected.
+        self.model = model or self._backend.default_model
+        self.max_tokens = max_tokens
+        self.budget_limit_usd = budget_limit_usd
+        self.agent_models: dict[str, str] = dict(agent_models or {})
+        self.usage = TokenUsage()
+        self._current_agent: str = ""
+        self._budget_exhausted = False
+        self._cost_sink: "Callable[[str, str, dict[str, Any]], Awaitable[None]] | None" = None
+    @property
+    def backend(self) -> LLMBackend:
+        return self._backend
+    @property
+    def _client(self):
+        """Back-compat shim. Pre-multi-provider tests reached into
+        ``client._client.messages.create`` to mock the Anthropic SDK; new code
+        should mock ``client._backend.chat`` (returns a ``BackendResponse``)
+        instead. Returns the AnthropicBackend's underlying SDK client when
+        present, otherwise None."""
+        return getattr(self._backend, "_client", None)
+    def _resolve_model(self, model_override: str | None) -> str:
+        """Resolve the model id to use for a call, in priority order:
+        1. Budget downgrade (forced cheap model, overrides everything)
+        2. Explicit ``model=`` argument at the call site
+        3. Per-agent override from ``agent_models[current_agent]``
+        4. The instance default (``self.model``)
+        Whatever value comes out is then run through the backend's
+        ``resolve_alias`` so shorthand ('haiku' / 'sonnet' / 'opus') ends up as
+        a real provider id before the chat call.
+        """
+        if self._budget_exhausted:
+            return self._backend.cheap_model
+        if model_override:
+            return self._backend.resolve_alias(model_override)
+        agent = _current_agent_var.get() or self._current_agent
+        if agent and agent in self.agent_models:
+            return self._backend.resolve_alias(self.agent_models[agent])
+        return self._backend.resolve_alias(self.model)
+    def _check_budget(self) -> None:
+        """Check if budget limit has been exceeded."""
+        if (
+            self.budget_limit_usd > 0
+            and not self._budget_exhausted
+            and self.usage.total_cost_usd >= self.budget_limit_usd * 0.95
+        ):
+            self._budget_exhausted = True
+            logger.warning(
+                "budget_limit_reached",
+                extra={
+                    "spent": round(self.usage.total_cost_usd, 4),
+                    "limit": self.budget_limit_usd,
+                    "action": "downgrading to haiku",
+                },
+            )
+    async def generate(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        temperature: float = 0.7,
+        max_tokens: int | None = None,
+        model: str | None = None,
+    ) -> str:
+        """Send a prompt and return the response text.
+        Args:
+            model: Optional model override, "haiku" / "sonnet" / "opus", or a
+                full model id (Anthropic bare or OpenRouter-style). Per-agent
+                overrides apply when this is left None. Budget downgrade still
+                wins over both.
+        """
+        resolved_model = self._resolve_model(model)
+        response = await self._backend.chat(
+            model=resolved_model,
+            system_prompt=system_prompt,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            max_tokens=max_tokens or self.max_tokens,
+        )
+        # The backend may have downgraded or upgraded the model id (provider
+        # routing); record against the actually-served model so cost lookups
+        # match what was billed.
+        served_model = response.model or resolved_model
+        agent_for_record = _current_agent_var.get() or self._current_agent
+        self.usage.record(
+            input_tokens=response.input_tokens,
+            output_tokens=response.output_tokens,
+            agent=agent_for_record,
+            model=served_model,
+        )
+        self._check_budget()
+        await self._emit_cost(
+            model=served_model,
+            input_tokens=response.input_tokens,
+            output_tokens=response.output_tokens,
+            cache_creation_input_tokens=response.cache_creation_input_tokens,
+            cache_read_input_tokens=response.cache_read_input_tokens,
+        )
+        logger.info(
+            "llm_call",
+            extra={
+                "agent": agent_for_record or "unknown",
+                "backend": self._backend.name,
+                "input_tokens": response.input_tokens,
+                "output_tokens": response.output_tokens,
+                "model": served_model,
+                "cost_usd": round(self.usage.total_cost_usd, 4),
+                "cumulative_calls": self.usage.total_calls,
+            },
+        )
+        return response.text
+    async def aclose(self) -> None:
+        """Release the underlying backend client (httpx pool / SDK client)."""
+        await self._backend.aclose()
+    def set_agent(self, agent_name: str) -> None:
+        """Set the current agent name for per-agent cost tracking."""
+        self._current_agent = agent_name
+    @contextmanager
+    def agent_context(self, agent_name: str):
+        """Set the cost-attribution agent for the duration of this context.
+        Async-task-local via ContextVar — safe under asyncio.gather() unlike
+        set_agent(), which mutates a shared instance attribute. Prefer this
+        over set_agent() when running agents concurrently.
+        """
+        token = _current_agent_var.set(agent_name)
+        try:
+            yield
+        finally:
+            _current_agent_var.reset(token)
+    def set_cost_sink(
+        self,
+        sink: "Callable[[str, str, dict[str, Any]], Awaitable[None]] | None",
+    ) -> None:
+        """Register async callback ``(agent, model, usage_dict) -> None``.
+        Called once per successful Anthropic API response. ``None`` clears
+        the sink. Sink exceptions are caught and logged at WARNING — they
+        never break the LLM call (cost recording is best-effort).
+        """
+        self._cost_sink = sink
+    async def _emit_cost(
+        self,
+        model: str,
+        input_tokens: int,
+        output_tokens: int,
+        cache_creation_input_tokens: int = 0,
+        cache_read_input_tokens: int = 0,
+    ) -> None:
+        if self._cost_sink is None:
+            return
+        # Shield the sink call so an outer cancellation (e.g. Atlas's per-agent
+        # timeout firing between the API response and this write) doesn't drop
+        # the cost row. The Anthropic call already returned and we've been billed;
+        # the sink coroutine has no inner awaits, so once the event loop schedules
+        # it, the SQLite commit completes synchronously even if the calling task
+        # is being torn down. CancelledError is BaseException in 3.8+ so it
+        # bypasses `except Exception`; re-raise it to preserve cancellation
+        # semantics for the caller.
+        coro = self._cost_sink(
+            _current_agent_var.get() or self._current_agent or "unknown",
+            model,
+            {
+                "input_tokens": input_tokens,
+                "output_tokens": output_tokens,
+                "cache_creation_input_tokens": cache_creation_input_tokens,
+                "cache_read_input_tokens": cache_read_input_tokens,
+            },
+        )
+        try:
+            await asyncio.shield(coro)
+        except asyncio.CancelledError:
+            raise
+        except Exception as e:
+            logger.warning("cost sink raised; ignoring: %s", e)
+    async def critique(
+        self,
+        draft: str,
+        content_type: str = "content",
+    ) -> CritiqueResult:
+        """Run editorial critique on a draft, return structured feedback.
+        Args:
+            draft: The content to critique.
+            content_type: One of "content" (tutorials/blogs), "sales"
+                (outreach/battle cards), "marketing" (landing pages/social).
+                Selects appropriate evaluation criteria.
+        """
+        criteria = _CRITIQUE_CRITERIA.get(content_type, _CRITIQUE_CRITERIA["content"])
+        raw = await self.generate(
+            system_prompt="You are a senior content editor.",
+            user_prompt=CRITIQUE_PROMPT.format(
+                draft=draft[:12000],
+                criteria=criteria,
+            ),
+            temperature=0.3,
+            max_tokens=2048,
+        )
+        return CritiqueResult.from_json(raw)
+    async def generate_with_revision(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        temperature: float = 0.7,
+        max_tokens: int | None = None,
+        max_rounds: int = 2,
+        min_score: int = 7,
+        content_type: str = "content",
+    ) -> tuple[str, RevisionTrace]:
+        """Generate content with a critique-then-revise loop.
+        Produces a draft, critiques it, and revises if the score is below
+        *min_score* or any high-severity issue is flagged. Repeats up to
+        *max_rounds* times.
+        Args:
+            content_type: Selects critique criteria — "content", "sales",
+                or "marketing".
+        Returns the final content and the full revision trace.
+        """
+        trace = RevisionTrace()
+        # Initial generation
+        draft = await self.generate(
+            system_prompt=system_prompt,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+        trace.drafts.append(draft)
+        for _round in range(max_rounds):
+            crit = await self.critique(draft, content_type=content_type)
+            trace.critiques.append(crit)
+            trace.final_score = crit.overall_score
+            if not crit.revision_needed and crit.overall_score >= min_score:
+                break
+            # Revise
+            critique_text = json.dumps(
+                {"issues": crit.issues, "strengths": crit.strengths},
+                indent=2,
+            )
+            draft = await self.generate(
+                system_prompt=system_prompt,
+                user_prompt=REVISE_PROMPT.format(
+                    draft=draft[:12000],
+                    critique=critique_text,
+                ),
+                temperature=max(temperature - 0.1, 0.2),
+                max_tokens=max_tokens,
+            )
+            trace.drafts.append(draft)
+            trace.revision_rounds += 1
+        return draft, trace
+    async def close(self) -> None:
+        """Close the underlying HTTP client."""
+        await self._client.close()