PyPI - prompture - Versions diffs - 0.0.38.dev2__py3-none-any.whl → 0.0.42__py3-none-any.whl - Mend

prompture 0.0.38.dev2py3-none-any.whl → 0.0.42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

prompture/__init__.py +12 -1
prompture/_version.py +2 -2
prompture/agent.py +11 -11
prompture/async_agent.py +11 -11
prompture/async_conversation.py +9 -0
prompture/async_core.py +16 -0
prompture/async_driver.py +39 -0
prompture/async_groups.py +63 -0
prompture/conversation.py +9 -0
prompture/core.py +16 -0
prompture/cost_mixin.py +62 -0
prompture/discovery.py +108 -43
prompture/driver.py +39 -0
prompture/drivers/__init__.py +39 -0
prompture/drivers/async_azure_driver.py +7 -6
prompture/drivers/async_claude_driver.py +177 -8
prompture/drivers/async_google_driver.py +10 -0
prompture/drivers/async_grok_driver.py +4 -4
prompture/drivers/async_groq_driver.py +4 -4
prompture/drivers/async_modelscope_driver.py +286 -0
prompture/drivers/async_moonshot_driver.py +312 -0
prompture/drivers/async_openai_driver.py +158 -6
prompture/drivers/async_openrouter_driver.py +196 -7
prompture/drivers/async_registry.py +30 -0
prompture/drivers/async_zai_driver.py +303 -0
prompture/drivers/azure_driver.py +6 -5
prompture/drivers/claude_driver.py +10 -0
prompture/drivers/google_driver.py +10 -0
prompture/drivers/grok_driver.py +4 -4
prompture/drivers/groq_driver.py +4 -4
prompture/drivers/modelscope_driver.py +303 -0
prompture/drivers/moonshot_driver.py +342 -0
prompture/drivers/openai_driver.py +22 -12
prompture/drivers/openrouter_driver.py +248 -44
prompture/drivers/zai_driver.py +318 -0
prompture/groups.py +42 -0
prompture/ledger.py +252 -0
prompture/model_rates.py +114 -2
prompture/settings.py +16 -1
{prompture-0.0.38.dev2.dist-info → prompture-0.0.42.dist-info}/METADATA +1 -1
prompture-0.0.42.dist-info/RECORD +84 -0
prompture-0.0.38.dev2.dist-info/RECORD +0 -77
{prompture-0.0.38.dev2.dist-info → prompture-0.0.42.dist-info}/WHEEL +0 -0
{prompture-0.0.38.dev2.dist-info → prompture-0.0.42.dist-info}/entry_points.txt +0 -0
{prompture-0.0.38.dev2.dist-info → prompture-0.0.42.dist-info}/licenses/LICENSE +0 -0
{prompture-0.0.38.dev2.dist-info → prompture-0.0.42.dist-info}/top_level.txt +0 -0

prompture/drivers/async_zai_driver.py ADDED Viewed

@@ -0,0 +1,303 @@
+"""Async Z.ai (Zhipu AI) driver using httpx.
+All pricing comes from models.dev (provider: "zai") — no hardcoded pricing.
+"""
+from __future__ import annotations
+import json
+import os
+from collections.abc import AsyncIterator
+from typing import Any
+import httpx
+from ..async_driver import AsyncDriver
+from ..cost_mixin import CostMixin, prepare_strict_schema
+from .zai_driver import ZaiDriver
+class AsyncZaiDriver(CostMixin, AsyncDriver):
+    supports_json_mode = True
+    supports_json_schema = True
+    supports_tool_use = True
+    supports_streaming = True
+    supports_vision = True
+    MODEL_PRICING = ZaiDriver.MODEL_PRICING
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model: str = "glm-4.7",
+        endpoint: str = "https://api.z.ai/api/paas/v4",
+    ):
+        self.api_key = api_key or os.getenv("ZHIPU_API_KEY")
+        if not self.api_key:
+            raise ValueError("Zhipu API key not found. Set ZHIPU_API_KEY env var.")
+        self.model = model
+        self.base_url = endpoint.rstrip("/")
+        self.headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+    supports_messages = True
+    def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        from .vision_helpers import _prepare_openai_vision_messages
+        return _prepare_openai_vision_messages(messages)
+    async def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
+        messages = [{"role": "user", "content": prompt}]
+        return await self._do_generate(messages, options)
+    async def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
+        return await self._do_generate(self._prepare_messages(messages), options)
+    async def _do_generate(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
+        model = options.get("model", self.model)
+        model_config = self._get_model_config("zai", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
+        self._validate_model_capabilities(
+            "zai",
+            model,
+            using_json_schema=bool(options.get("json_schema")),
+        )
+        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        data: dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+        }
+        data[tokens_param] = opts.get("max_tokens", 512)
+        if supports_temperature and "temperature" in opts:
+            data["temperature"] = opts["temperature"]
+        if options.get("json_mode"):
+            json_schema = options.get("json_schema")
+            if json_schema:
+                schema_copy = prepare_strict_schema(json_schema)
+                data["response_format"] = {
+                    "type": "json_schema",
+                    "json_schema": {
+                        "name": "extraction",
+                        "strict": True,
+                        "schema": schema_copy,
+                    },
+                }
+            else:
+                data["response_format"] = {"type": "json_object"}
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.post(
+                    f"{self.base_url}/chat/completions",
+                    headers=self.headers,
+                    json=data,
+                    timeout=120,
+                )
+                response.raise_for_status()
+                resp = response.json()
+            except httpx.HTTPStatusError as e:
+                error_msg = f"Z.ai API request failed: {e!s}"
+                raise RuntimeError(error_msg) from e
+            except Exception as e:
+                raise RuntimeError(f"Z.ai API request failed: {e!s}") from e
+        usage = resp.get("usage", {})
+        prompt_tokens = usage.get("prompt_tokens", 0)
+        completion_tokens = usage.get("completion_tokens", 0)
+        total_tokens = usage.get("total_tokens", 0)
+        total_cost = self._calculate_cost("zai", model, prompt_tokens, completion_tokens)
+        meta = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": round(total_cost, 6),
+            "raw_response": resp,
+            "model_name": model,
+        }
+        text = resp["choices"][0]["message"]["content"]
+        return {"text": text, "meta": meta}
+    # ------------------------------------------------------------------
+    # Tool use
+    # ------------------------------------------------------------------
+    async def generate_messages_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Generate a response that may include tool calls."""
+        model = options.get("model", self.model)
+        model_config = self._get_model_config("zai", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
+        self._validate_model_capabilities("zai", model, using_tool_use=True)
+        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        data: dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+            "tools": tools,
+        }
+        data[tokens_param] = opts.get("max_tokens", 512)
+        if supports_temperature and "temperature" in opts:
+            data["temperature"] = opts["temperature"]
+        if "tool_choice" in options:
+            data["tool_choice"] = options["tool_choice"]
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.post(
+                    f"{self.base_url}/chat/completions",
+                    headers=self.headers,
+                    json=data,
+                    timeout=120,
+                )
+                response.raise_for_status()
+                resp = response.json()
+            except httpx.HTTPStatusError as e:
+                error_msg = f"Z.ai API request failed: {e!s}"
+                raise RuntimeError(error_msg) from e
+            except Exception as e:
+                raise RuntimeError(f"Z.ai API request failed: {e!s}") from e
+        usage = resp.get("usage", {})
+        prompt_tokens = usage.get("prompt_tokens", 0)
+        completion_tokens = usage.get("completion_tokens", 0)
+        total_tokens = usage.get("total_tokens", 0)
+        total_cost = self._calculate_cost("zai", model, prompt_tokens, completion_tokens)
+        meta = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": round(total_cost, 6),
+            "raw_response": resp,
+            "model_name": model,
+        }
+        choice = resp["choices"][0]
+        text = choice["message"].get("content") or ""
+        stop_reason = choice.get("finish_reason")
+        tool_calls_out: list[dict[str, Any]] = []
+        for tc in choice["message"].get("tool_calls", []):
+            try:
+                args = json.loads(tc["function"]["arguments"])
+            except (json.JSONDecodeError, TypeError):
+                args = {}
+            tool_calls_out.append(
+                {
+                    "id": tc["id"],
+                    "name": tc["function"]["name"],
+                    "arguments": args,
+                }
+            )
+        return {
+            "text": text,
+            "meta": meta,
+            "tool_calls": tool_calls_out,
+            "stop_reason": stop_reason,
+        }
+    # ------------------------------------------------------------------
+    # Streaming
+    # ------------------------------------------------------------------
+    async def generate_messages_stream(
+        self,
+        messages: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> AsyncIterator[dict[str, Any]]:
+        """Yield response chunks via Z.ai streaming API."""
+        model = options.get("model", self.model)
+        model_config = self._get_model_config("zai", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
+        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        data: dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+            "stream": True,
+            "stream_options": {"include_usage": True},
+        }
+        data[tokens_param] = opts.get("max_tokens", 512)
+        if supports_temperature and "temperature" in opts:
+            data["temperature"] = opts["temperature"]
+        full_text = ""
+        prompt_tokens = 0
+        completion_tokens = 0
+        async with (
+            httpx.AsyncClient() as client,
+            client.stream(
+                "POST",
+                f"{self.base_url}/chat/completions",
+                headers=self.headers,
+                json=data,
+                timeout=120,
+            ) as response,
+        ):
+            response.raise_for_status()
+            async for line in response.aiter_lines():
+                if not line or not line.startswith("data: "):
+                    continue
+                payload = line[len("data: ") :]
+                if payload.strip() == "[DONE]":
+                    break
+                try:
+                    chunk = json.loads(payload)
+                except json.JSONDecodeError:
+                    continue
+                usage = chunk.get("usage")
+                if usage:
+                    prompt_tokens = usage.get("prompt_tokens", 0)
+                    completion_tokens = usage.get("completion_tokens", 0)
+                choices = chunk.get("choices", [])
+                if choices:
+                    delta = choices[0].get("delta", {})
+                    content = delta.get("content", "")
+                    if content:
+                        full_text += content
+                        yield {"type": "delta", "text": content}
+        total_tokens = prompt_tokens + completion_tokens
+        total_cost = self._calculate_cost("zai", model, prompt_tokens, completion_tokens)
+        yield {
+            "type": "done",
+            "text": full_text,
+            "meta": {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": total_tokens,
+                "cost": round(total_cost, 6),
+                "raw_response": {},
+                "model_name": model,
+            },
+        }

prompture/drivers/azure_driver.py CHANGED Viewed

@@ -10,7 +10,7 @@ try:
 except Exception:
     AzureOpenAI = None
-from ..cost_mixin import CostMixin
+from ..cost_mixin import CostMixin, prepare_strict_schema
 from ..driver import Driver
@@ -108,9 +108,9 @@ class AzureDriver(CostMixin, Driver):
             raise RuntimeError("openai package (>=1.0.0) with AzureOpenAI not installed")
         model = options.get("model", self.model)
-        model_info = self.MODEL_PRICING.get(model, {})
-        tokens_param = model_info.get("tokens_param", "max_tokens")
-        supports_temperature = model_info.get("supports_temperature", True)
+        model_config = self._get_model_config("azure", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
         opts = {"temperature": 1.0, "max_tokens": 512, **options}
@@ -128,12 +128,13 @@ class AzureDriver(CostMixin, Driver):
         if options.get("json_mode"):
             json_schema = options.get("json_schema")
             if json_schema:
+                schema_copy = prepare_strict_schema(json_schema)
                 kwargs["response_format"] = {
                     "type": "json_schema",
                     "json_schema": {
                         "name": "extraction",
                         "strict": True,
-                        "schema": json_schema,
+                        "schema": schema_copy,
                     },
                 }
             else:

prompture/drivers/claude_driver.py CHANGED Viewed

@@ -77,6 +77,13 @@ class ClaudeDriver(CostMixin, Driver):
         opts = {**{"temperature": 0.0, "max_tokens": 512}, **options}
         model = options.get("model", self.model)
+        # Validate capabilities against models.dev metadata
+        self._validate_model_capabilities(
+            "claude",
+            model,
+            using_json_schema=bool(options.get("json_schema")),
+        )
         client = anthropic.Anthropic(api_key=self.api_key)
         # Anthropic requires system messages as a top-level parameter
@@ -177,6 +184,9 @@ class ClaudeDriver(CostMixin, Driver):
         opts = {**{"temperature": 0.0, "max_tokens": 512}, **options}
         model = options.get("model", self.model)
+        self._validate_model_capabilities("claude", model, using_tool_use=True)
         client = anthropic.Anthropic(api_key=self.api_key)
         system_content, api_messages = self._extract_system_and_messages(messages)

prompture/drivers/google_driver.py CHANGED Viewed

@@ -228,6 +228,13 @@ class GoogleDriver(CostMixin, Driver):
     def _do_generate(self, messages: list[dict[str, str]], options: Optional[dict[str, Any]] = None) -> dict[str, Any]:
         gen_input, gen_kwargs, model_kwargs = self._build_generation_args(messages, options)
+        # Validate capabilities against models.dev metadata
+        self._validate_model_capabilities(
+            "google",
+            self.model,
+            using_json_schema=bool((options or {}).get("json_schema")),
+        )
         try:
             logger.debug(f"Initializing {self.model} for generation")
             model = genai.GenerativeModel(self.model, **model_kwargs)
@@ -263,6 +270,9 @@ class GoogleDriver(CostMixin, Driver):
         options: dict[str, Any],
     ) -> dict[str, Any]:
         """Generate a response that may include tool/function calls."""
+        model = options.get("model", self.model)
+        self._validate_model_capabilities("google", model, using_tool_use=True)
         gen_input, gen_kwargs, model_kwargs = self._build_generation_args(
             self._prepare_messages(messages), options
         )

prompture/drivers/grok_driver.py CHANGED Viewed

@@ -99,10 +99,10 @@ class GrokDriver(CostMixin, Driver):
         model = options.get("model", self.model)
-        # Lookup model-specific config
-        model_info = self.MODEL_PRICING.get(model, {})
-        tokens_param = model_info.get("tokens_param", "max_tokens")
-        supports_temperature = model_info.get("supports_temperature", True)
+        # Lookup model-specific config (live models.dev data + hardcoded fallback)
+        model_config = self._get_model_config("grok", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
         # Defaults
         opts = {"temperature": 1.0, "max_tokens": 512, **options}

prompture/drivers/groq_driver.py CHANGED Viewed

@@ -69,10 +69,10 @@ class GroqDriver(CostMixin, Driver):
         model = options.get("model", self.model)
-        # Lookup model-specific config
-        model_info = self.MODEL_PRICING.get(model, {})
-        tokens_param = model_info.get("tokens_param", "max_tokens")
-        supports_temperature = model_info.get("supports_temperature", True)
+        # Lookup model-specific config (live models.dev data + hardcoded fallback)
+        model_config = self._get_model_config("groq", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
         # Base configuration
         opts = {"temperature": 0.7, "max_tokens": 512, **options}

prompture 0.0.38.dev2__py3-none-any.whl → 0.0.42__py3-none-any.whl

prompture 0.0.38.dev2py3-none-any.whl → 0.0.42py3-none-any.whl