PyPI - prompture - Versions diffs - 0.0.40.dev1__py3-none-any.whl → 0.0.41__py3-none-any.whl - Mend

prompture 0.0.40.dev1py3-none-any.whl → 0.0.41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

prompture/_version.py +2 -2
prompture/drivers/__init__.py +39 -0
prompture/drivers/async_modelscope_driver.py +286 -0
prompture/drivers/async_moonshot_driver.py +311 -0
prompture/drivers/async_openrouter_driver.py +190 -2
prompture/drivers/async_registry.py +30 -0
prompture/drivers/async_zai_driver.py +302 -0
prompture/drivers/modelscope_driver.py +303 -0
prompture/drivers/moonshot_driver.py +341 -0
prompture/drivers/openrouter_driver.py +235 -39
prompture/drivers/zai_driver.py +317 -0
prompture/model_rates.py +2 -0
prompture/settings.py +15 -0
{prompture-0.0.40.dev1.dist-info → prompture-0.0.41.dist-info}/METADATA +1 -1
{prompture-0.0.40.dev1.dist-info → prompture-0.0.41.dist-info}/RECORD +19 -13
{prompture-0.0.40.dev1.dist-info → prompture-0.0.41.dist-info}/WHEEL +0 -0
{prompture-0.0.40.dev1.dist-info → prompture-0.0.41.dist-info}/entry_points.txt +0 -0
{prompture-0.0.40.dev1.dist-info → prompture-0.0.41.dist-info}/licenses/LICENSE +0 -0
{prompture-0.0.40.dev1.dist-info → prompture-0.0.41.dist-info}/top_level.txt +0 -0

prompture/drivers/openrouter_driver.py CHANGED Viewed

@@ -2,7 +2,9 @@
 Requires the `requests` package. Uses OPENROUTER_API_KEY env var.
 """
+import json
 import os
+from collections.abc import Iterator
 from typing import Any
 import requests
@@ -13,43 +15,52 @@ from ..driver import Driver
 class OpenRouterDriver(CostMixin, Driver):
     supports_json_mode = True
+    supports_json_schema = True
+    supports_tool_use = True
+    supports_streaming = True
     supports_vision = True
     # Approximate pricing per 1K tokens based on OpenRouter's pricing
     # https://openrouter.ai/docs#pricing
     MODEL_PRICING = {
-        "openai/gpt-3.5-turbo": {
-            "prompt": 0.0015,
-            "completion": 0.002,
+        "openai/gpt-4o": {
+            "prompt": 0.005,
+            "completion": 0.015,
             "tokens_param": "max_tokens",
             "supports_temperature": True,
         },
-        "anthropic/claude-2": {
-            "prompt": 0.008,
-            "completion": 0.024,
+        "openai/gpt-4o-mini": {
+            "prompt": 0.00015,
+            "completion": 0.0006,
             "tokens_param": "max_tokens",
             "supports_temperature": True,
         },
-        "google/palm-2-chat-bison": {
-            "prompt": 0.0005,
-            "completion": 0.0005,
+        "anthropic/claude-sonnet-4-20250514": {
+            "prompt": 0.003,
+            "completion": 0.015,
             "tokens_param": "max_tokens",
             "supports_temperature": True,
         },
-        "meta-llama/llama-2-70b-chat": {
-            "prompt": 0.0007,
-            "completion": 0.0007,
+        "google/gemini-2.0-flash-001": {
+            "prompt": 0.0001,
+            "completion": 0.0004,
+            "tokens_param": "max_tokens",
+            "supports_temperature": True,
+        },
+        "meta-llama/llama-3.1-70b-instruct": {
+            "prompt": 0.0004,
+            "completion": 0.0004,
             "tokens_param": "max_tokens",
             "supports_temperature": True,
         },
     }
-    def __init__(self, api_key: str | None = None, model: str = "openai/gpt-3.5-turbo"):
+    def __init__(self, api_key: str | None = None, model: str = "openai/gpt-4o-mini"):
         """Initialize OpenRouter driver.
         Args:
             api_key: OpenRouter API key. If not provided, will look for OPENROUTER_API_KEY env var
-            model: Model to use. Defaults to openai/gpt-3.5-turbo
+            model: Model to use. Defaults to openai/gpt-4o-mini
         """
         self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
         if not self.api_key:
@@ -90,6 +101,13 @@ class OpenRouterDriver(CostMixin, Driver):
         tokens_param = model_config["tokens_param"]
         supports_temperature = model_config["supports_temperature"]
+        # Validate capabilities against models.dev metadata
+        self._validate_model_capabilities(
+            "openrouter",
+            model,
+            using_json_schema=bool(options.get("json_schema")),
+        )
         # Defaults
         opts = {"temperature": 1.0, "max_tokens": 512, **options}
@@ -108,45 +126,223 @@ class OpenRouterDriver(CostMixin, Driver):
         # Native JSON mode support
         if options.get("json_mode"):
-            data["response_format"] = {"type": "json_object"}
+            json_schema = options.get("json_schema")
+            if json_schema:
+                data["response_format"] = {
+                    "type": "json_schema",
+                    "json_schema": {
+                        "name": "extraction",
+                        "strict": True,
+                        "schema": json_schema,
+                    },
+                }
+            else:
+                data["response_format"] = {"type": "json_object"}
+        try:
+            response = requests.post(
+                f"{self.base_url}/chat/completions",
+                headers=self.headers,
+                json=data,
+                timeout=120,
+            )
+            response.raise_for_status()
+            resp = response.json()
+        except requests.exceptions.HTTPError as e:
+            error_msg = f"OpenRouter API request failed: {e!s}"
+            raise RuntimeError(error_msg) from e
+        except requests.exceptions.RequestException as e:
+            raise RuntimeError(f"OpenRouter API request failed: {e!s}") from e
+        # Extract usage info
+        usage = resp.get("usage", {})
+        prompt_tokens = usage.get("prompt_tokens", 0)
+        completion_tokens = usage.get("completion_tokens", 0)
+        total_tokens = usage.get("total_tokens", 0)
+        # Calculate cost via shared mixin
+        total_cost = self._calculate_cost("openrouter", model, prompt_tokens, completion_tokens)
+        # Standardized meta object
+        meta = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": round(total_cost, 6),
+            "raw_response": resp,
+            "model_name": model,
+        }
+        text = resp["choices"][0]["message"]["content"]
+        return {"text": text, "meta": meta}
+    # ------------------------------------------------------------------
+    # Tool use
+    # ------------------------------------------------------------------
+    def generate_messages_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Generate a response that may include tool calls."""
+        if not self.api_key:
+            raise RuntimeError("OpenRouter API key not found")
+        model = options.get("model", self.model)
+        model_config = self._get_model_config("openrouter", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
+        self._validate_model_capabilities("openrouter", model, using_tool_use=True)
+        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        data: dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+            "tools": tools,
+        }
+        data[tokens_param] = opts.get("max_tokens", 512)
+        if supports_temperature and "temperature" in opts:
+            data["temperature"] = opts["temperature"]
         try:
             response = requests.post(
                 f"{self.base_url}/chat/completions",
                 headers=self.headers,
                 json=data,
+                timeout=120,
             )
             response.raise_for_status()
             resp = response.json()
+        except requests.exceptions.HTTPError as e:
+            error_msg = f"OpenRouter API request failed: {e!s}"
+            raise RuntimeError(error_msg) from e
+        except requests.exceptions.RequestException as e:
+            raise RuntimeError(f"OpenRouter API request failed: {e!s}") from e
+        usage = resp.get("usage", {})
+        prompt_tokens = usage.get("prompt_tokens", 0)
+        completion_tokens = usage.get("completion_tokens", 0)
+        total_tokens = usage.get("total_tokens", 0)
+        total_cost = self._calculate_cost("openrouter", model, prompt_tokens, completion_tokens)
+        meta = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": round(total_cost, 6),
+            "raw_response": resp,
+            "model_name": model,
+        }
+        choice = resp["choices"][0]
+        text = choice["message"].get("content") or ""
+        stop_reason = choice.get("finish_reason")
+        tool_calls_out: list[dict[str, Any]] = []
+        for tc in choice["message"].get("tool_calls", []):
+            try:
+                args = json.loads(tc["function"]["arguments"])
+            except (json.JSONDecodeError, TypeError):
+                args = {}
+            tool_calls_out.append({
+                "id": tc["id"],
+                "name": tc["function"]["name"],
+                "arguments": args,
+            })
+        return {
+            "text": text,
+            "meta": meta,
+            "tool_calls": tool_calls_out,
+            "stop_reason": stop_reason,
+        }
+    # ------------------------------------------------------------------
+    # Streaming
+    # ------------------------------------------------------------------
+    def generate_messages_stream(
+        self,
+        messages: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> Iterator[dict[str, Any]]:
+        """Yield response chunks via OpenRouter streaming API."""
+        if not self.api_key:
+            raise RuntimeError("OpenRouter API key not found")
+        model = options.get("model", self.model)
+        model_config = self._get_model_config("openrouter", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
-            # Extract usage info
-            usage = resp.get("usage", {})
-            prompt_tokens = usage.get("prompt_tokens", 0)
-            completion_tokens = usage.get("completion_tokens", 0)
-            total_tokens = usage.get("total_tokens", 0)
+        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        data: dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+            "stream": True,
+            "stream_options": {"include_usage": True},
+        }
+        data[tokens_param] = opts.get("max_tokens", 512)
+        if supports_temperature and "temperature" in opts:
+            data["temperature"] = opts["temperature"]
+        response = requests.post(
+            f"{self.base_url}/chat/completions",
+            headers=self.headers,
+            json=data,
+            stream=True,
+            timeout=120,
+        )
+        response.raise_for_status()
-            # Calculate cost via shared mixin
-            total_cost = self._calculate_cost("openrouter", model, prompt_tokens, completion_tokens)
+        full_text = ""
+        prompt_tokens = 0
+        completion_tokens = 0
-            # Standardized meta object
-            meta = {
+        for line in response.iter_lines(decode_unicode=True):
+            if not line or not line.startswith("data: "):
+                continue
+            payload = line[len("data: "):]
+            if payload.strip() == "[DONE]":
+                break
+            try:
+                chunk = json.loads(payload)
+            except json.JSONDecodeError:
+                continue
+            # Usage comes in the final chunk
+            usage = chunk.get("usage")
+            if usage:
+                prompt_tokens = usage.get("prompt_tokens", 0)
+                completion_tokens = usage.get("completion_tokens", 0)
+            choices = chunk.get("choices", [])
+            if choices:
+                delta = choices[0].get("delta", {})
+                content = delta.get("content", "")
+                if content:
+                    full_text += content
+                    yield {"type": "delta", "text": content}
+        total_tokens = prompt_tokens + completion_tokens
+        total_cost = self._calculate_cost("openrouter", model, prompt_tokens, completion_tokens)
+        yield {
+            "type": "done",
+            "text": full_text,
+            "meta": {
                 "prompt_tokens": prompt_tokens,
                 "completion_tokens": completion_tokens,
                 "total_tokens": total_tokens,
                 "cost": round(total_cost, 6),
-                "raw_response": resp,
+                "raw_response": {},
                 "model_name": model,
-            }
-            text = resp["choices"][0]["message"]["content"]
-            return {"text": text, "meta": meta}
-        except requests.exceptions.RequestException as e:
-            error_msg = f"OpenRouter API request failed: {e!s}"
-            if hasattr(e.response, "json"):
-                try:
-                    error_details = e.response.json()
-                    error_msg = f"{error_msg} - {error_details.get('error', {}).get('message', '')}"
-                except Exception:
-                    pass
-            raise RuntimeError(error_msg) from e
+            },
+        }

prompture/drivers/zai_driver.py ADDED Viewed

@@ -0,0 +1,317 @@
+"""Z.ai (Zhipu AI) driver implementation.
+Requires the `requests` package. Uses ZHIPU_API_KEY env var.
+The Z.ai API is fully OpenAI-compatible (/chat/completions).
+All pricing comes from models.dev (provider: "zai") — no hardcoded pricing.
+"""
+import json
+import os
+from collections.abc import Iterator
+from typing import Any
+import requests
+from ..cost_mixin import CostMixin
+from ..driver import Driver
+class ZaiDriver(CostMixin, Driver):
+    supports_json_mode = True
+    supports_json_schema = True
+    supports_tool_use = True
+    supports_streaming = True
+    supports_vision = True
+    # All pricing resolved live from models.dev (provider: "zai")
+    MODEL_PRICING: dict[str, dict[str, Any]] = {}
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model: str = "glm-4.7",
+        endpoint: str = "https://api.z.ai/api/paas/v4",
+    ):
+        """Initialize Z.ai driver.
+        Args:
+            api_key: Zhipu API key. If not provided, will look for ZHIPU_API_KEY env var.
+            model: Model to use. Defaults to glm-4.7.
+            endpoint: API base URL. Defaults to https://api.z.ai/api/paas/v4.
+        """
+        self.api_key = api_key or os.getenv("ZHIPU_API_KEY")
+        if not self.api_key:
+            raise ValueError("Zhipu API key not found. Set ZHIPU_API_KEY env var.")
+        self.model = model
+        self.base_url = endpoint.rstrip("/")
+        self.headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+    supports_messages = True
+    def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        from .vision_helpers import _prepare_openai_vision_messages
+        return _prepare_openai_vision_messages(messages)
+    def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
+        messages = [{"role": "user", "content": prompt}]
+        return self._do_generate(messages, options)
+    def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
+        return self._do_generate(self._prepare_messages(messages), options)
+    def _do_generate(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
+        if not self.api_key:
+            raise RuntimeError("Zhipu API key not found")
+        model = options.get("model", self.model)
+        model_config = self._get_model_config("zai", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
+        self._validate_model_capabilities(
+            "zai",
+            model,
+            using_json_schema=bool(options.get("json_schema")),
+        )
+        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        data: dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+        }
+        data[tokens_param] = opts.get("max_tokens", 512)
+        if supports_temperature and "temperature" in opts:
+            data["temperature"] = opts["temperature"]
+        # Native JSON mode support
+        if options.get("json_mode"):
+            json_schema = options.get("json_schema")
+            if json_schema:
+                data["response_format"] = {
+                    "type": "json_schema",
+                    "json_schema": {
+                        "name": "extraction",
+                        "strict": True,
+                        "schema": json_schema,
+                    },
+                }
+            else:
+                data["response_format"] = {"type": "json_object"}
+        try:
+            response = requests.post(
+                f"{self.base_url}/chat/completions",
+                headers=self.headers,
+                json=data,
+                timeout=120,
+            )
+            response.raise_for_status()
+            resp = response.json()
+        except requests.exceptions.HTTPError as e:
+            error_msg = f"Z.ai API request failed: {e!s}"
+            raise RuntimeError(error_msg) from e
+        except requests.exceptions.RequestException as e:
+            raise RuntimeError(f"Z.ai API request failed: {e!s}") from e
+        usage = resp.get("usage", {})
+        prompt_tokens = usage.get("prompt_tokens", 0)
+        completion_tokens = usage.get("completion_tokens", 0)
+        total_tokens = usage.get("total_tokens", 0)
+        total_cost = self._calculate_cost("zai", model, prompt_tokens, completion_tokens)
+        meta = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": round(total_cost, 6),
+            "raw_response": resp,
+            "model_name": model,
+        }
+        text = resp["choices"][0]["message"]["content"]
+        return {"text": text, "meta": meta}
+    # ------------------------------------------------------------------
+    # Tool use
+    # ------------------------------------------------------------------
+    def generate_messages_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Generate a response that may include tool calls."""
+        if not self.api_key:
+            raise RuntimeError("Zhipu API key not found")
+        model = options.get("model", self.model)
+        model_config = self._get_model_config("zai", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
+        self._validate_model_capabilities("zai", model, using_tool_use=True)
+        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        data: dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+            "tools": tools,
+        }
+        data[tokens_param] = opts.get("max_tokens", 512)
+        if supports_temperature and "temperature" in opts:
+            data["temperature"] = opts["temperature"]
+        if "tool_choice" in options:
+            data["tool_choice"] = options["tool_choice"]
+        try:
+            response = requests.post(
+                f"{self.base_url}/chat/completions",
+                headers=self.headers,
+                json=data,
+                timeout=120,
+            )
+            response.raise_for_status()
+            resp = response.json()
+        except requests.exceptions.HTTPError as e:
+            error_msg = f"Z.ai API request failed: {e!s}"
+            raise RuntimeError(error_msg) from e
+        except requests.exceptions.RequestException as e:
+            raise RuntimeError(f"Z.ai API request failed: {e!s}") from e
+        usage = resp.get("usage", {})
+        prompt_tokens = usage.get("prompt_tokens", 0)
+        completion_tokens = usage.get("completion_tokens", 0)
+        total_tokens = usage.get("total_tokens", 0)
+        total_cost = self._calculate_cost("zai", model, prompt_tokens, completion_tokens)
+        meta = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": round(total_cost, 6),
+            "raw_response": resp,
+            "model_name": model,
+        }
+        choice = resp["choices"][0]
+        text = choice["message"].get("content") or ""
+        stop_reason = choice.get("finish_reason")
+        tool_calls_out: list[dict[str, Any]] = []
+        for tc in choice["message"].get("tool_calls", []):
+            try:
+                args = json.loads(tc["function"]["arguments"])
+            except (json.JSONDecodeError, TypeError):
+                args = {}
+            tool_calls_out.append(
+                {
+                    "id": tc["id"],
+                    "name": tc["function"]["name"],
+                    "arguments": args,
+                }
+            )
+        return {
+            "text": text,
+            "meta": meta,
+            "tool_calls": tool_calls_out,
+            "stop_reason": stop_reason,
+        }
+    # ------------------------------------------------------------------
+    # Streaming
+    # ------------------------------------------------------------------
+    def generate_messages_stream(
+        self,
+        messages: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> Iterator[dict[str, Any]]:
+        """Yield response chunks via Z.ai streaming API."""
+        if not self.api_key:
+            raise RuntimeError("Zhipu API key not found")
+        model = options.get("model", self.model)
+        model_config = self._get_model_config("zai", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
+        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        data: dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+            "stream": True,
+            "stream_options": {"include_usage": True},
+        }
+        data[tokens_param] = opts.get("max_tokens", 512)
+        if supports_temperature and "temperature" in opts:
+            data["temperature"] = opts["temperature"]
+        response = requests.post(
+            f"{self.base_url}/chat/completions",
+            headers=self.headers,
+            json=data,
+            stream=True,
+            timeout=120,
+        )
+        response.raise_for_status()
+        full_text = ""
+        prompt_tokens = 0
+        completion_tokens = 0
+        for line in response.iter_lines(decode_unicode=True):
+            if not line or not line.startswith("data: "):
+                continue
+            payload = line[len("data: ") :]
+            if payload.strip() == "[DONE]":
+                break
+            try:
+                chunk = json.loads(payload)
+            except json.JSONDecodeError:
+                continue
+            usage = chunk.get("usage")
+            if usage:
+                prompt_tokens = usage.get("prompt_tokens", 0)
+                completion_tokens = usage.get("completion_tokens", 0)
+            choices = chunk.get("choices", [])
+            if choices:
+                delta = choices[0].get("delta", {})
+                content = delta.get("content", "")
+                if content:
+                    full_text += content
+                    yield {"type": "delta", "text": content}
+        total_tokens = prompt_tokens + completion_tokens
+        total_cost = self._calculate_cost("zai", model, prompt_tokens, completion_tokens)
+        yield {
+            "type": "done",
+            "text": full_text,
+            "meta": {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": total_tokens,
+                "cost": round(total_cost, 6),
+                "raw_response": {},
+                "model_name": model,
+            },
+        }

prompture/model_rates.py CHANGED Viewed

@@ -25,6 +25,8 @@ PROVIDER_MAP: dict[str, str] = {
     "grok": "xai",
     "azure": "azure",
     "openrouter": "openrouter",
+    "moonshot": "moonshotai",
+    "zai": "zai",
 }
 _API_URL = "https://models.dev/api.json"

prompture/settings.py CHANGED Viewed

@@ -51,6 +51,21 @@ class Settings(BaseSettings):
     grok_api_key: Optional[str] = None
     grok_model: str = "grok-4-fast-reasoning"
+    # Moonshot AI (Kimi)
+    moonshot_api_key: Optional[str] = None
+    moonshot_model: str = "kimi-k2-0905-preview"
+    moonshot_endpoint: str = "https://api.moonshot.ai/v1"
+    # Z.ai (Zhipu AI)
+    zhipu_api_key: Optional[str] = None
+    zhipu_model: str = "glm-4.7"
+    zhipu_endpoint: str = "https://api.z.ai/api/paas/v4"
+    # ModelScope (Alibaba Cloud)
+    modelscope_api_key: Optional[str] = None
+    modelscope_model: str = "Qwen/Qwen3-235B-A22B-Instruct-2507"
+    modelscope_endpoint: str = "https://api-inference.modelscope.cn/v1"
     # AirLLM
     airllm_model: str = "meta-llama/Llama-2-7b-hf"
     airllm_compression: Optional[str] = None  # "4bit" or "8bit"

prompture 0.0.40.dev1__py3-none-any.whl → 0.0.41__py3-none-any.whl

prompture 0.0.40.dev1py3-none-any.whl → 0.0.41py3-none-any.whl