PyPI - prompture - Versions diffs - 0.0.35__py3-none-any.whl → 0.0.40.dev1__py3-none-any.whl - Mend

prompture 0.0.35py3-none-any.whl → 0.0.40.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

prompture/__init__.py +132 -3
prompture/_version.py +2 -2
prompture/agent.py +924 -0
prompture/agent_types.py +156 -0
prompture/async_agent.py +880 -0
prompture/async_conversation.py +208 -17
prompture/async_core.py +16 -0
prompture/async_driver.py +63 -0
prompture/async_groups.py +551 -0
prompture/conversation.py +222 -18
prompture/core.py +46 -12
prompture/cost_mixin.py +37 -0
prompture/discovery.py +132 -44
prompture/driver.py +77 -0
prompture/drivers/__init__.py +5 -1
prompture/drivers/async_azure_driver.py +11 -5
prompture/drivers/async_claude_driver.py +184 -9
prompture/drivers/async_google_driver.py +222 -28
prompture/drivers/async_grok_driver.py +11 -5
prompture/drivers/async_groq_driver.py +11 -5
prompture/drivers/async_lmstudio_driver.py +74 -5
prompture/drivers/async_ollama_driver.py +13 -3
prompture/drivers/async_openai_driver.py +162 -5
prompture/drivers/async_openrouter_driver.py +11 -5
prompture/drivers/async_registry.py +5 -1
prompture/drivers/azure_driver.py +10 -4
prompture/drivers/claude_driver.py +17 -1
prompture/drivers/google_driver.py +227 -33
prompture/drivers/grok_driver.py +11 -5
prompture/drivers/groq_driver.py +11 -5
prompture/drivers/lmstudio_driver.py +73 -8
prompture/drivers/ollama_driver.py +16 -5
prompture/drivers/openai_driver.py +26 -11
prompture/drivers/openrouter_driver.py +11 -5
prompture/drivers/vision_helpers.py +153 -0
prompture/group_types.py +147 -0
prompture/groups.py +530 -0
prompture/image.py +180 -0
prompture/ledger.py +252 -0
prompture/model_rates.py +112 -2
prompture/persistence.py +254 -0
prompture/persona.py +482 -0
prompture/serialization.py +218 -0
prompture/settings.py +1 -0
prompture-0.0.40.dev1.dist-info/METADATA +369 -0
prompture-0.0.40.dev1.dist-info/RECORD +78 -0
prompture-0.0.35.dist-info/METADATA +0 -464
prompture-0.0.35.dist-info/RECORD +0 -66
{prompture-0.0.35.dist-info → prompture-0.0.40.dev1.dist-info}/WHEEL +0 -0
{prompture-0.0.35.dist-info → prompture-0.0.40.dev1.dist-info}/entry_points.txt +0 -0
{prompture-0.0.35.dist-info → prompture-0.0.40.dev1.dist-info}/licenses/LICENSE +0 -0
{prompture-0.0.35.dist-info → prompture-0.0.40.dev1.dist-info}/top_level.txt +0 -0

prompture/discovery.py CHANGED Viewed

@@ -1,7 +1,11 @@
 """Discovery module for auto-detecting available models."""
+from __future__ import annotations
+import dataclasses
 import logging
 import os
+from typing import Any, overload
 import requests
@@ -22,23 +26,40 @@ from .settings import settings
 logger = logging.getLogger(__name__)
-def get_available_models() -> list[str]:
-    """
-    Auto-detects all available models based on configured drivers and environment variables.
+@overload
+def get_available_models(*, include_capabilities: bool = False, verified_only: bool = False) -> list[str]: ...
+@overload
+def get_available_models(*, include_capabilities: bool = True, verified_only: bool = False) -> list[dict[str, Any]]: ...
+def get_available_models(
+    *,
+    include_capabilities: bool = False,
+    verified_only: bool = False,
+) -> list[str] | list[dict[str, Any]]:
+    """Auto-detect available models based on configured drivers and environment variables.
-    Iterates through supported providers and checks if they are configured (e.g. API key present).
-    For static drivers, returns models from their MODEL_PRICING keys.
-    For dynamic drivers (like Ollama), attempts to fetch available models from the endpoint.
+    Iterates through supported providers and checks if they are configured
+    (e.g. API key present).  For static drivers, returns models from their
+    ``MODEL_PRICING`` keys.  For dynamic drivers (like Ollama), attempts to
+    fetch available models from the endpoint.
+    Args:
+        include_capabilities: When ``True``, return enriched dicts with
+            ``model``, ``provider``, ``model_id``, and ``capabilities``
+            fields instead of plain ``"provider/model_id"`` strings.
+        verified_only: When ``True``, only return models that have been
+            successfully used (as recorded by the usage ledger).
     Returns:
-        A list of unique model strings in the format "provider/model_id".
+        A sorted list of unique model strings (default) or enriched dicts.
     """
     available_models: set[str] = set()
     configured_providers: set[str] = set()
     # Map of provider name to driver class
-    # We need to map the registry keys to the actual classes to check MODEL_PRICING
-    # and instantiate for dynamic checks if needed.
     provider_classes = {
         "openai": OpenAIDriver,
         "azure": AzureDriver,
@@ -54,11 +75,6 @@ def get_available_models() -> list[str]:
     for provider, driver_cls in provider_classes.items():
         try:
-            # 1. Check if the provider is configured (has API key or endpoint)
-            # We can check this by looking at the settings or env vars that the driver uses.
-            # A simple way is to try to instantiate it with defaults, but that might fail if keys are missing.
-            # Instead, let's check the specific requirements for each known provider.
             is_configured = False
             if provider == "openai":
@@ -86,14 +102,11 @@ def get_available_models() -> list[str]:
             elif provider == "grok":
                 if settings.grok_api_key or os.getenv("GROK_API_KEY"):
                     is_configured = True
-            elif provider == "ollama":
-                # Ollama is always considered "configured" as it defaults to localhost
-                # We will check connectivity later
-                is_configured = True
-            elif provider == "lmstudio":
-                # LM Studio is similar to Ollama, defaults to localhost
-                is_configured = True
-            elif provider == "local_http" and (settings.local_http_endpoint or os.getenv("LOCAL_HTTP_ENDPOINT")):
+            elif (
+                provider == "ollama"
+                or provider == "lmstudio"
+                or (provider == "local_http" and os.getenv("LOCAL_HTTP_ENDPOINT"))
+            ):
                 is_configured = True
             if not is_configured:
@@ -101,36 +114,20 @@ def get_available_models() -> list[str]:
             configured_providers.add(provider)
-            # 2. Static Detection: Get models from MODEL_PRICING
+            # Static Detection: Get models from MODEL_PRICING
             if hasattr(driver_cls, "MODEL_PRICING"):
                 pricing = driver_cls.MODEL_PRICING
                 for model_id in pricing:
-                    # Skip "default" or generic keys if they exist
                     if model_id == "default":
                         continue
-                    # For Azure, the model_id in pricing is usually the base model name,
-                    # but the user needs to use the deployment ID.
-                    # However, our Azure driver implementation uses the deployment_id from init
-                    # as the "model" for the request, but expects the user to pass a model name
-                    # that maps to pricing?
-                    # Looking at AzureDriver:
-                    # kwargs = {"model": self.deployment_id, ...}
-                    # model = options.get("model", self.model) -> used for pricing lookup
-                    # So we should list the keys in MODEL_PRICING as available "models"
-                    # even though for Azure specifically it's a bit weird because of deployment IDs.
-                    # But for general discovery, listing supported models is correct.
                     available_models.add(f"{provider}/{model_id}")
-            # 3. Dynamic Detection: Specific logic for Ollama
+            # Dynamic Detection: Specific logic for Ollama
             if provider == "ollama":
                 try:
                     endpoint = settings.ollama_endpoint or os.getenv(
                         "OLLAMA_ENDPOINT", "http://localhost:11434/api/generate"
                     )
-                    # We need the base URL for tags, usually http://localhost:11434/api/tags
-                    # The configured endpoint might be .../api/generate or .../api/chat
                     base_url = endpoint.split("/api/")[0]
                     tags_url = f"{base_url}/api/tags"
@@ -141,13 +138,34 @@ def get_available_models() -> list[str]:
                         for model in models:
                             name = model.get("name")
                             if name:
-                                # Ollama model names often include tags like "llama3:latest"
-                                # We can keep them as is.
                                 available_models.add(f"ollama/{name}")
                 except Exception as e:
                     logger.debug(f"Failed to fetch Ollama models: {e}")
-            # Future: Add dynamic detection for LM Studio if they have an endpoint for listing models
+            # Dynamic Detection: LM Studio loaded models
+            if provider == "lmstudio":
+                try:
+                    endpoint = settings.lmstudio_endpoint or os.getenv(
+                        "LMSTUDIO_ENDPOINT", "http://127.0.0.1:1234/v1/chat/completions"
+                    )
+                    base_url = endpoint.split("/v1/")[0]
+                    models_url = f"{base_url}/v1/models"
+                    headers: dict[str, str] = {}
+                    api_key = settings.lmstudio_api_key or os.getenv("LMSTUDIO_API_KEY")
+                    if api_key:
+                        headers["Authorization"] = f"Bearer {api_key}"
+                    resp = requests.get(models_url, headers=headers, timeout=2)
+                    if resp.status_code == 200:
+                        data = resp.json()
+                        models = data.get("data", [])
+                        for model in models:
+                            model_id = model.get("id")
+                            if model_id:
+                                available_models.add(f"lmstudio/{model_id}")
+                except Exception as e:
+                    logger.debug(f"Failed to fetch LM Studio models: {e}")
         except Exception as e:
             logger.warning(f"Error detecting models for provider {provider}: {e}")
@@ -161,4 +179,74 @@ def get_available_models() -> list[str]:
             for model_id in get_all_provider_models(api_name):
                 available_models.add(f"{prompture_name}/{model_id}")
-    return sorted(list(available_models))
+    sorted_models = sorted(available_models)
+    # --- verified_only filtering ---
+    verified_set: set[str] | None = None
+    if verified_only or include_capabilities:
+        try:
+            from .ledger import _get_ledger
+            ledger = _get_ledger()
+            verified_set = ledger.get_verified_models()
+        except Exception:
+            logger.debug("Could not load ledger for verified models", exc_info=True)
+            verified_set = set()
+    if verified_only and verified_set is not None:
+        sorted_models = [m for m in sorted_models if m in verified_set]
+    if not include_capabilities:
+        return sorted_models
+    # Build enriched dicts with capabilities from models.dev
+    from .model_rates import get_model_capabilities
+    # Fetch all ledger stats for annotation (keyed by model_name)
+    ledger_stats: dict[str, dict[str, Any]] = {}
+    try:
+        from .ledger import _get_ledger
+        for row in _get_ledger().get_all_stats():
+            name = row["model_name"]
+            if name not in ledger_stats:
+                ledger_stats[name] = row
+            else:
+                # Aggregate across API key hashes
+                existing = ledger_stats[name]
+                existing["use_count"] += row["use_count"]
+                existing["total_tokens"] += row["total_tokens"]
+                existing["total_cost"] += row["total_cost"]
+                if row["last_used"] > existing["last_used"]:
+                    existing["last_used"] = row["last_used"]
+    except Exception:
+        logger.debug("Could not load ledger stats for enrichment", exc_info=True)
+    enriched: list[dict[str, Any]] = []
+    for model_str in sorted_models:
+        parts = model_str.split("/", 1)
+        provider = parts[0]
+        model_id = parts[1] if len(parts) > 1 else parts[0]
+        caps = get_model_capabilities(provider, model_id)
+        caps_dict = dataclasses.asdict(caps) if caps is not None else None
+        entry: dict[str, Any] = {
+            "model": model_str,
+            "provider": provider,
+            "model_id": model_id,
+            "capabilities": caps_dict,
+            "verified": verified_set is not None and model_str in verified_set,
+        }
+        stats = ledger_stats.get(model_str)
+        if stats:
+            entry["last_used"] = stats["last_used"]
+            entry["use_count"] = stats["use_count"]
+        else:
+            entry["last_used"] = None
+            entry["use_count"] = 0
+        enriched.append(entry)
+    return enriched

prompture/driver.py CHANGED Viewed

@@ -35,6 +35,7 @@ class Driver:
     supports_messages: bool = False
     supports_tool_use: bool = False
     supports_streaming: bool = False
+    supports_vision: bool = False
     callbacks: DriverCallbacks | None = None
@@ -52,6 +53,7 @@ class Driver:
         support message arrays should override this method and set
         ``supports_messages = True``.
         """
+        self._check_vision_support(messages)
         prompt = self._flatten_messages(messages)
         return self.generate(prompt, options)
@@ -171,6 +173,69 @@ class Driver:
         except Exception:
             logger.exception("Callback %s raised an exception", event)
+    def _validate_model_capabilities(
+        self,
+        provider: str,
+        model: str,
+        *,
+        using_tool_use: bool = False,
+        using_json_schema: bool = False,
+        using_vision: bool = False,
+    ) -> None:
+        """Log warnings when the model may not support a requested feature.
+        Uses models.dev metadata as a secondary signal.  Warnings only — the
+        API is the final authority and models.dev data may be stale.
+        """
+        from .model_rates import get_model_capabilities
+        caps = get_model_capabilities(provider, model)
+        if caps is None:
+            return
+        if using_tool_use and caps.supports_tool_use is False:
+            logger.warning(
+                "Model %s/%s may not support tool use according to models.dev metadata",
+                provider,
+                model,
+            )
+        if using_json_schema and caps.supports_structured_output is False:
+            logger.warning(
+                "Model %s/%s may not support structured output / JSON schema according to models.dev metadata",
+                provider,
+                model,
+            )
+        if using_vision and caps.supports_vision is False:
+            logger.warning(
+                "Model %s/%s may not support vision/image inputs according to models.dev metadata",
+                provider,
+                model,
+            )
+    def _check_vision_support(self, messages: list[dict[str, Any]]) -> None:
+        """Raise if messages contain image blocks and the driver lacks vision support."""
+        if self.supports_vision:
+            return
+        for msg in messages:
+            content = msg.get("content")
+            if isinstance(content, list):
+                for block in content:
+                    if isinstance(block, dict) and block.get("type") == "image":
+                        raise NotImplementedError(
+                            f"{self.__class__.__name__} does not support vision/image inputs. "
+                            "Use a vision-capable model."
+                        )
+    def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        """Transform universal message format into provider-specific wire format.
+        Vision-capable drivers override this to convert the universal image
+        blocks into their provider-specific format.  The base implementation
+        validates vision support and returns messages unchanged.
+        """
+        self._check_vision_support(messages)
+        return messages
     @staticmethod
     def _flatten_messages(messages: list[dict[str, Any]]) -> str:
         """Join messages into a single prompt string with role prefixes."""
@@ -178,6 +243,18 @@ class Driver:
         for msg in messages:
             role = msg.get("role", "user")
             content = msg.get("content", "")
+            # Handle content that is a list of blocks (vision messages)
+            if isinstance(content, list):
+                text_parts = []
+                for block in content:
+                    if isinstance(block, dict):
+                        if block.get("type") == "text":
+                            text_parts.append(block.get("text", ""))
+                        elif block.get("type") == "image":
+                            text_parts.append("[image]")
+                    elif isinstance(block, str):
+                        text_parts.append(block)
+                content = " ".join(text_parts)
             if role == "system":
                 parts.append(f"[System]: {content}")
             elif role == "assistant":

prompture/drivers/__init__.py CHANGED Viewed

@@ -84,7 +84,11 @@ register_driver(
 )
 register_driver(
     "lmstudio",
-    lambda model=None: LMStudioDriver(endpoint=settings.lmstudio_endpoint, model=model or settings.lmstudio_model),
+    lambda model=None: LMStudioDriver(
+        endpoint=settings.lmstudio_endpoint,
+        model=model or settings.lmstudio_model,
+        api_key=settings.lmstudio_api_key,
+    ),
     overwrite=True,
 )
 register_driver(

prompture/drivers/async_azure_driver.py CHANGED Viewed

@@ -18,6 +18,7 @@ from .azure_driver import AzureDriver
 class AsyncAzureDriver(CostMixin, AsyncDriver):
     supports_json_mode = True
     supports_json_schema = True
+    supports_vision = True
     MODEL_PRICING = AzureDriver.MODEL_PRICING
@@ -52,21 +53,26 @@ class AsyncAzureDriver(CostMixin, AsyncDriver):
     supports_messages = True
+    def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        from .vision_helpers import _prepare_openai_vision_messages
+        return _prepare_openai_vision_messages(messages)
     async def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
         messages = [{"role": "user", "content": prompt}]
         return await self._do_generate(messages, options)
     async def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
-        return await self._do_generate(messages, options)
+        return await self._do_generate(self._prepare_messages(messages), options)
     async def _do_generate(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
         if self.client is None:
             raise RuntimeError("openai package (>=1.0.0) with AsyncAzureOpenAI not installed")
         model = options.get("model", self.model)
-        model_info = self.MODEL_PRICING.get(model, {})
-        tokens_param = model_info.get("tokens_param", "max_tokens")
-        supports_temperature = model_info.get("supports_temperature", True)
+        model_config = self._get_model_config("azure", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
         opts = {"temperature": 1.0, "max_tokens": 512, **options}
@@ -107,7 +113,7 @@ class AsyncAzureDriver(CostMixin, AsyncDriver):
             "prompt_tokens": prompt_tokens,
             "completion_tokens": completion_tokens,
             "total_tokens": total_tokens,
-            "cost": total_cost,
+            "cost": round(total_cost, 6),
             "raw_response": resp.model_dump(),
             "model_name": model,
             "deployment_id": self.deployment_id,

prompture/drivers/async_claude_driver.py CHANGED Viewed

@@ -4,6 +4,7 @@ from __future__ import annotations
 import json
 import os
+from collections.abc import AsyncIterator
 from typing import Any
 try:
@@ -19,6 +20,9 @@ from .claude_driver import ClaudeDriver
 class AsyncClaudeDriver(CostMixin, AsyncDriver):
     supports_json_mode = True
     supports_json_schema = True
+    supports_tool_use = True
+    supports_streaming = True
+    supports_vision = True
     MODEL_PRICING = ClaudeDriver.MODEL_PRICING
@@ -28,12 +32,17 @@ class AsyncClaudeDriver(CostMixin, AsyncDriver):
     supports_messages = True
+    def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        from .vision_helpers import _prepare_claude_vision_messages
+        return _prepare_claude_vision_messages(messages)
     async def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
         messages = [{"role": "user", "content": prompt}]
         return await self._do_generate(messages, options)
     async def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
-        return await self._do_generate(messages, options)
+        return await self._do_generate(self._prepare_messages(messages), options)
     async def _do_generate(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
         if anthropic is None:
@@ -42,16 +51,17 @@ class AsyncClaudeDriver(CostMixin, AsyncDriver):
         opts = {**{"temperature": 0.0, "max_tokens": 512}, **options}
         model = options.get("model", self.model)
+        # Validate capabilities against models.dev metadata
+        self._validate_model_capabilities(
+            "claude",
+            model,
+            using_json_schema=bool(options.get("json_schema")),
+        )
         client = anthropic.AsyncAnthropic(api_key=self.api_key)
         # Anthropic requires system messages as a top-level parameter
-        system_content = None
-        api_messages = []
-        for msg in messages:
-            if msg.get("role") == "system":
-                system_content = msg.get("content", "")
-            else:
-                api_messages.append(msg)
+        system_content, api_messages = self._extract_system_and_messages(messages)
         # Build common kwargs
         common_kwargs: dict[str, Any] = {
@@ -99,9 +109,174 @@ class AsyncClaudeDriver(CostMixin, AsyncDriver):
             "prompt_tokens": prompt_tokens,
             "completion_tokens": completion_tokens,
             "total_tokens": total_tokens,
-            "cost": total_cost,
+            "cost": round(total_cost, 6),
             "raw_response": dict(resp),
             "model_name": model,
         }
         return {"text": text, "meta": meta}
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+    def _extract_system_and_messages(
+        self, messages: list[dict[str, Any]]
+    ) -> tuple[str | None, list[dict[str, Any]]]:
+        """Separate system message from conversation messages for Anthropic API."""
+        system_content = None
+        api_messages: list[dict[str, Any]] = []
+        for msg in messages:
+            if msg.get("role") == "system":
+                system_content = msg.get("content", "")
+            else:
+                api_messages.append(msg)
+        return system_content, api_messages
+    # ------------------------------------------------------------------
+    # Tool use
+    # ------------------------------------------------------------------
+    async def generate_messages_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Generate a response that may include tool calls (Anthropic)."""
+        if anthropic is None:
+            raise RuntimeError("anthropic package not installed")
+        opts = {**{"temperature": 0.0, "max_tokens": 512}, **options}
+        model = options.get("model", self.model)
+        self._validate_model_capabilities("claude", model, using_tool_use=True)
+        client = anthropic.AsyncAnthropic(api_key=self.api_key)
+        system_content, api_messages = self._extract_system_and_messages(messages)
+        # Convert tools from OpenAI format to Anthropic format if needed
+        anthropic_tools = []
+        for t in tools:
+            if "type" in t and t["type"] == "function":
+                # OpenAI format -> Anthropic format
+                fn = t["function"]
+                anthropic_tools.append({
+                    "name": fn["name"],
+                    "description": fn.get("description", ""),
+                    "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
+                })
+            elif "input_schema" in t:
+                # Already Anthropic format
+                anthropic_tools.append(t)
+            else:
+                anthropic_tools.append(t)
+        kwargs: dict[str, Any] = {
+            "model": model,
+            "messages": api_messages,
+            "temperature": opts["temperature"],
+            "max_tokens": opts["max_tokens"],
+            "tools": anthropic_tools,
+        }
+        if system_content:
+            kwargs["system"] = system_content
+        resp = await client.messages.create(**kwargs)
+        prompt_tokens = resp.usage.input_tokens
+        completion_tokens = resp.usage.output_tokens
+        total_tokens = prompt_tokens + completion_tokens
+        total_cost = self._calculate_cost("claude", model, prompt_tokens, completion_tokens)
+        meta = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": round(total_cost, 6),
+            "raw_response": dict(resp),
+            "model_name": model,
+        }
+        text = ""
+        tool_calls_out: list[dict[str, Any]] = []
+        for block in resp.content:
+            if block.type == "text":
+                text += block.text
+            elif block.type == "tool_use":
+                tool_calls_out.append({
+                    "id": block.id,
+                    "name": block.name,
+                    "arguments": block.input,
+                })
+        return {
+            "text": text,
+            "meta": meta,
+            "tool_calls": tool_calls_out,
+            "stop_reason": resp.stop_reason,
+        }
+    # ------------------------------------------------------------------
+    # Streaming
+    # ------------------------------------------------------------------
+    async def generate_messages_stream(
+        self,
+        messages: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> AsyncIterator[dict[str, Any]]:
+        """Yield response chunks via Anthropic streaming API."""
+        if anthropic is None:
+            raise RuntimeError("anthropic package not installed")
+        opts = {**{"temperature": 0.0, "max_tokens": 512}, **options}
+        model = options.get("model", self.model)
+        client = anthropic.AsyncAnthropic(api_key=self.api_key)
+        system_content, api_messages = self._extract_system_and_messages(messages)
+        kwargs: dict[str, Any] = {
+            "model": model,
+            "messages": api_messages,
+            "temperature": opts["temperature"],
+            "max_tokens": opts["max_tokens"],
+        }
+        if system_content:
+            kwargs["system"] = system_content
+        full_text = ""
+        prompt_tokens = 0
+        completion_tokens = 0
+        async with client.messages.stream(**kwargs) as stream:
+            async for event in stream:
+                if hasattr(event, "type"):
+                    if event.type == "content_block_delta" and hasattr(event, "delta"):
+                        delta_text = getattr(event.delta, "text", "")
+                        if delta_text:
+                            full_text += delta_text
+                            yield {"type": "delta", "text": delta_text}
+                    elif event.type == "message_delta" and hasattr(event, "usage"):
+                        completion_tokens = getattr(event.usage, "output_tokens", 0)
+                    elif event.type == "message_start" and hasattr(event, "message"):
+                        usage = getattr(event.message, "usage", None)
+                        if usage:
+                            prompt_tokens = getattr(usage, "input_tokens", 0)
+        total_tokens = prompt_tokens + completion_tokens
+        total_cost = self._calculate_cost("claude", model, prompt_tokens, completion_tokens)
+        yield {
+            "type": "done",
+            "text": full_text,
+            "meta": {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": total_tokens,
+                "cost": round(total_cost, 6),
+                "raw_response": {},
+                "model_name": model,
+            },
+        }

prompture 0.0.35__py3-none-any.whl → 0.0.40.dev1__py3-none-any.whl

prompture 0.0.35py3-none-any.whl → 0.0.40.dev1py3-none-any.whl