PyPI - prompture - Versions diffs - 0.0.29.dev8__py3-none-any.whl → 0.0.35__py3-none-any.whl - Mend

prompture 0.0.29.dev8py3-none-any.whl → 0.0.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

prompture/__init__.py +146 -23
prompture/_version.py +34 -0
prompture/aio/__init__.py +74 -0
prompture/async_conversation.py +607 -0
prompture/async_core.py +803 -0
prompture/async_driver.py +169 -0
prompture/cache.py +469 -0
prompture/callbacks.py +55 -0
prompture/cli.py +63 -4
prompture/conversation.py +631 -0
prompture/core.py +876 -263
prompture/cost_mixin.py +51 -0
prompture/discovery.py +164 -0
prompture/driver.py +168 -5
prompture/drivers/__init__.py +173 -69
prompture/drivers/airllm_driver.py +109 -0
prompture/drivers/async_airllm_driver.py +26 -0
prompture/drivers/async_azure_driver.py +117 -0
prompture/drivers/async_claude_driver.py +107 -0
prompture/drivers/async_google_driver.py +132 -0
prompture/drivers/async_grok_driver.py +91 -0
prompture/drivers/async_groq_driver.py +84 -0
prompture/drivers/async_hugging_driver.py +61 -0
prompture/drivers/async_lmstudio_driver.py +79 -0
prompture/drivers/async_local_http_driver.py +44 -0
prompture/drivers/async_ollama_driver.py +125 -0
prompture/drivers/async_openai_driver.py +96 -0
prompture/drivers/async_openrouter_driver.py +96 -0
prompture/drivers/async_registry.py +129 -0
prompture/drivers/azure_driver.py +36 -9
prompture/drivers/claude_driver.py +251 -34
prompture/drivers/google_driver.py +107 -38
prompture/drivers/grok_driver.py +29 -32
prompture/drivers/groq_driver.py +27 -26
prompture/drivers/hugging_driver.py +6 -6
prompture/drivers/lmstudio_driver.py +26 -13
prompture/drivers/local_http_driver.py +6 -6
prompture/drivers/ollama_driver.py +157 -23
prompture/drivers/openai_driver.py +178 -9
prompture/drivers/openrouter_driver.py +31 -25
prompture/drivers/registry.py +306 -0
prompture/field_definitions.py +106 -96
prompture/logging.py +80 -0
prompture/model_rates.py +217 -0
prompture/runner.py +49 -47
prompture/scaffold/__init__.py +1 -0
prompture/scaffold/generator.py +84 -0
prompture/scaffold/templates/Dockerfile.j2 +12 -0
prompture/scaffold/templates/README.md.j2 +41 -0
prompture/scaffold/templates/config.py.j2 +21 -0
prompture/scaffold/templates/env.example.j2 +8 -0
prompture/scaffold/templates/main.py.j2 +86 -0
prompture/scaffold/templates/models.py.j2 +40 -0
prompture/scaffold/templates/requirements.txt.j2 +5 -0
prompture/server.py +183 -0
prompture/session.py +117 -0
prompture/settings.py +18 -1
prompture/tools.py +219 -267
prompture/tools_schema.py +254 -0
prompture/validator.py +3 -3
{prompture-0.0.29.dev8.dist-info → prompture-0.0.35.dist-info}/METADATA +117 -21
prompture-0.0.35.dist-info/RECORD +66 -0
{prompture-0.0.29.dev8.dist-info → prompture-0.0.35.dist-info}/WHEEL +1 -1
prompture-0.0.29.dev8.dist-info/RECORD +0 -27
{prompture-0.0.29.dev8.dist-info → prompture-0.0.35.dist-info}/entry_points.txt +0 -0
{prompture-0.0.29.dev8.dist-info → prompture-0.0.35.dist-info}/licenses/LICENSE +0 -0
{prompture-0.0.29.dev8.dist-info → prompture-0.0.35.dist-info}/top_level.txt +0 -0

prompture/drivers/grok_driver.py CHANGED Viewed

@@ -1,15 +1,21 @@
 """xAI Grok driver.
 Requires the `requests` package. Uses GROK_API_KEY env var.
 """
 import os
-from typing import Any, Dict
+from typing import Any
 import requests
+from ..cost_mixin import CostMixin
 from ..driver import Driver
-class GrokDriver(Driver):
+class GrokDriver(CostMixin, Driver):
+    supports_json_mode = True
     # Pricing per 1M tokens based on xAI's documentation
+    _PRICING_UNIT = 1_000_000
     MODEL_PRICING = {
         "grok-code-fast-1": {
             "prompt": 0.20,
@@ -72,19 +78,16 @@ class GrokDriver(Driver):
         self.model = model
         self.api_base = "https://api.x.ai/v1"
-    def generate(self, prompt: str, options: Dict[str, Any]) -> Dict[str, Any]:
-        """Generate completion using Grok API.
+    supports_messages = True
-        Args:
-            prompt: Input prompt
-            options: Generation options
-        Returns:
-            Dict containing generated text and metadata
-        Raises:
-            RuntimeError: If API key is missing or request fails
-        """
+    def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
+        messages = [{"role": "user", "content": prompt}]
+        return self._do_generate(messages, options)
+    def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
+        return self._do_generate(messages, options)
+    def _do_generate(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
         if not self.api_key:
             raise RuntimeError("GROK_API_KEY environment variable is required")
@@ -101,7 +104,7 @@ class GrokDriver(Driver):
         # Base request payload
         payload = {
             "model": model,
-            "messages": [{"role": "user", "content": prompt}],
+            "messages": messages,
         }
         # Add token limit with correct parameter name
@@ -111,33 +114,27 @@ class GrokDriver(Driver):
         if supports_temperature and "temperature" in opts:
             payload["temperature"] = opts["temperature"]
-        headers = {
-            "Authorization": f"Bearer {self.api_key}",
-            "Content-Type": "application/json"
-        }
+        # Native JSON mode support
+        if options.get("json_mode"):
+            payload["response_format"] = {"type": "json_object"}
+        headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
         try:
-            response = requests.post(
-                f"{self.api_base}/chat/completions",
-                headers=headers,
-                json=payload
-            )
+            response = requests.post(f"{self.api_base}/chat/completions", headers=headers, json=payload)
             response.raise_for_status()
             resp = response.json()
         except requests.exceptions.RequestException as e:
-            raise RuntimeError(f"Grok API request failed: {str(e)}")
+            raise RuntimeError(f"Grok API request failed: {e!s}") from e
         # Extract usage info
         usage = resp.get("usage", {})
         prompt_tokens = usage.get("prompt_tokens", 0)
-        completion_tokens = usage.get("completion_tokens", 0)
+        completion_tokens = usage.get("completion_tokens", 0)
         total_tokens = usage.get("total_tokens", 0)
-        # Calculate cost
-        model_pricing = self.MODEL_PRICING.get(model, {"prompt": 0, "completion": 0})
-        prompt_cost = (prompt_tokens / 1000000) * model_pricing["prompt"]
-        completion_cost = (completion_tokens / 1000000) * model_pricing["completion"]
-        total_cost = prompt_cost + completion_cost
+        # Calculate cost via shared mixin
+        total_cost = self._calculate_cost("grok", model, prompt_tokens, completion_tokens)
         # Standardized meta object
         meta = {
@@ -150,4 +147,4 @@ class GrokDriver(Driver):
         }
         text = resp["choices"][0]["message"]["content"]
-        return {"text": text, "meta": meta}
+        return {"text": text, "meta": meta}

prompture/drivers/groq_driver.py CHANGED Viewed

@@ -1,18 +1,22 @@
 """Groq driver for prompture.
 Requires the `groq` package. Uses GROQ_API_KEY env var.
 """
 import os
-from typing import Any, Dict
+from typing import Any
 try:
     import groq
 except Exception:
     groq = None
+from ..cost_mixin import CostMixin
 from ..driver import Driver
-class GroqDriver(Driver):
+class GroqDriver(CostMixin, Driver):
+    supports_json_mode = True
     # Approximate pricing per 1K tokens (to be updated with official pricing)
     # Each model entry defines token parameters and temperature support
     MODEL_PRICING = {
@@ -32,7 +36,7 @@ class GroqDriver(Driver):
     def __init__(self, api_key: str | None = None, model: str = "llama2-70b-4096"):
         """Initialize Groq driver.
         Args:
             api_key: Groq API key (defaults to GROQ_API_KEY env var)
             model: Model to use (defaults to llama2-70b-4096)
@@ -44,20 +48,16 @@ class GroqDriver(Driver):
         else:
             self.client = None
-    def generate(self, prompt: str, options: Dict[str, Any]) -> Dict[str, Any]:
-        """Generate completion using Groq API.
-        Args:
-            prompt: Input prompt
-            options: Generation options
-        Returns:
-            Dict containing generated text and metadata
-        Raises:
-            RuntimeError: If groq package is not installed
-            groq.error.*: Various Groq API errors
-        """
+    supports_messages = True
+    def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
+        messages = [{"role": "user", "content": prompt}]
+        return self._do_generate(messages, options)
+    def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
+        return self._do_generate(messages, options)
+    def _do_generate(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
         if self.client is None:
             raise RuntimeError("groq package is not installed")
@@ -74,7 +74,7 @@ class GroqDriver(Driver):
         # Base kwargs for API call
         kwargs = {
             "model": model,
-            "messages": [{"role": "user", "content": prompt}],
+            "messages": messages,
         }
         # Set token limit with correct parameter name
@@ -84,23 +84,24 @@ class GroqDriver(Driver):
         if supports_temperature and "temperature" in opts:
             kwargs["temperature"] = opts["temperature"]
+        # Native JSON mode support
+        if options.get("json_mode"):
+            kwargs["response_format"] = {"type": "json_object"}
         try:
             resp = self.client.chat.completions.create(**kwargs)
-        except Exception as e:
+        except Exception:
             # Re-raise any Groq API errors
             raise
         # Extract usage statistics
         usage = getattr(resp, "usage", None)
         prompt_tokens = getattr(usage, "prompt_tokens", 0)
-        completion_tokens = getattr(usage, "completion_tokens", 0)
+        completion_tokens = getattr(usage, "completion_tokens", 0)
         total_tokens = getattr(usage, "total_tokens", 0)
-        # Calculate costs
-        model_pricing = self.MODEL_PRICING.get(model, {"prompt": 0, "completion": 0})
-        prompt_cost = (prompt_tokens / 1000) * model_pricing["prompt"]
-        completion_cost = (completion_tokens / 1000) * model_pricing["completion"]
-        total_cost = prompt_cost + completion_cost
+        # Calculate cost via shared mixin
+        total_cost = self._calculate_cost("groq", model, prompt_tokens, completion_tokens)
         # Standard metadata object
         meta = {
@@ -114,4 +115,4 @@ class GroqDriver(Driver):
         # Extract generated text
         text = resp.choices[0].message.content
-        return {"text": text, "meta": meta}
+        return {"text": text, "meta": meta}

prompture/drivers/hugging_driver.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import os
+from typing import Any
 import requests
 from ..driver import Driver
-from typing import Any, Dict
 class HuggingFaceDriver(Driver):
     # Hugging Face is usage-based (credits/subscription), but we set costs to 0 for now.
-    MODEL_PRICING = {
-        "default": {"prompt": 0.0, "completion": 0.0}
-    }
+    MODEL_PRICING = {"default": {"prompt": 0.0, "completion": 0.0}}
     def __init__(self, endpoint: str | None = None, token: str | None = None, model: str = "bert-base-uncased"):
         self.endpoint = endpoint or os.getenv("HF_ENDPOINT")
@@ -22,7 +22,7 @@ class HuggingFaceDriver(Driver):
         self.headers = {"Authorization": f"Bearer {self.token}"}
-    def generate(self, prompt: str, options: Dict[str, Any]) -> Dict[str, Any]:
+    def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
         payload = {
             "inputs": prompt,
             "parameters": options,  # HF allows temperature, max_new_tokens, etc. here
@@ -32,7 +32,7 @@ class HuggingFaceDriver(Driver):
             r.raise_for_status()
             response_data = r.json()
         except Exception as e:
-            raise RuntimeError(f"HuggingFaceDriver request failed: {e}")
+            raise RuntimeError(f"HuggingFaceDriver request failed: {e}") from e
         # Different HF models return slightly different response formats
         # Text-generation models usually return [{"generated_text": "..."}]

prompture/drivers/lmstudio_driver.py CHANGED Viewed

@@ -1,26 +1,26 @@
-import os
 import json
-import requests
 import logging
+import os
+from typing import Any, Optional
+import requests
 from ..driver import Driver
-from typing import Any, Dict
 logger = logging.getLogger(__name__)
 class LMStudioDriver(Driver):
+    supports_json_mode = True
     # LM Studio is local – costs are always zero.
-    MODEL_PRICING = {
-        "default": {"prompt": 0.0, "completion": 0.0}
-    }
+    MODEL_PRICING = {"default": {"prompt": 0.0, "completion": 0.0}}
     def __init__(self, endpoint: str | None = None, model: str = "deepseek/deepseek-r1-0528-qwen3-8b"):
         # Allow override via env var
-        self.endpoint = endpoint or os.getenv(
-            "LMSTUDIO_ENDPOINT", "http://127.0.0.1:1234/v1/chat/completions"
-        )
+        self.endpoint = endpoint or os.getenv("LMSTUDIO_ENDPOINT", "http://127.0.0.1:1234/v1/chat/completions")
         self.model = model
-        self.options: Dict[str, Any] = {}
+        self.options: dict[str, Any] = {}
         # Validate connection to LM Studio server
         self._validate_connection()
@@ -38,17 +38,30 @@ class LMStudioDriver(Driver):
         except requests.exceptions.RequestException as e:
             logger.warning(f"Could not validate connection to LM Studio server: {e}")
-    def generate(self, prompt: str, options: Dict[str, Any] = None) -> Dict[str, Any]:
+    supports_messages = True
+    def generate(self, prompt: str, options: Optional[dict[str, Any]] = None) -> dict[str, Any]:
+        messages = [{"role": "user", "content": prompt}]
+        return self._do_generate(messages, options)
+    def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
+        return self._do_generate(messages, options)
+    def _do_generate(self, messages: list[dict[str, str]], options: Optional[dict[str, Any]] = None) -> dict[str, Any]:
         merged_options = self.options.copy()
         if options:
             merged_options.update(options)
         payload = {
             "model": merged_options.get("model", self.model),
-            "messages": [{"role": "user", "content": prompt}],
+            "messages": messages,
             "temperature": merged_options.get("temperature", 0.7),
         }
+        # Native JSON mode support
+        if merged_options.get("json_mode"):
+            payload["response_format"] = {"type": "json_object"}
         try:
             logger.debug(f"Sending request to LM Studio endpoint: {self.endpoint}")
             logger.debug(f"Request payload: {payload}")
@@ -70,7 +83,7 @@ class LMStudioDriver(Driver):
             raise
         except Exception as e:
             logger.error(f"Unexpected error in LM Studio request: {e}")
-            raise RuntimeError(f"LM Studio request failed: {e}")
+            raise RuntimeError(f"LM Studio request failed: {e}") from e
         # Extract text
         text = response_data["choices"][0]["message"]["content"]

prompture/drivers/local_http_driver.py CHANGED Viewed

@@ -1,27 +1,27 @@
 import os
+from typing import Any
 import requests
 from ..driver import Driver
-from typing import Any, Dict
 class LocalHTTPDriver(Driver):
     # Default: no cost; extend if your local service has pricing logic
-    MODEL_PRICING = {
-        "default": {"prompt": 0.0, "completion": 0.0}
-    }
+    MODEL_PRICING = {"default": {"prompt": 0.0, "completion": 0.0}}
     def __init__(self, endpoint: str | None = None, model: str = "local-model"):
         self.endpoint = endpoint or os.getenv("LOCAL_HTTP_ENDPOINT", "http://localhost:8000/generate")
         self.model = model
-    def generate(self, prompt: str, options: Dict[str, Any]) -> Dict[str, Any]:
+    def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
         payload = {"prompt": prompt, "options": options}
         try:
             r = requests.post(self.endpoint, json=payload, timeout=options.get("timeout", 30))
             r.raise_for_status()
             response_data = r.json()
         except Exception as e:
-            raise RuntimeError(f"LocalHTTPDriver request failed: {e}")
+            raise RuntimeError(f"LocalHTTPDriver request failed: {e}") from e
         # If the local API already provides {"text": "...", "meta": {...}}, just return it
         if "text" in response_data and "meta" in response_data:

prompture/drivers/ollama_driver.py CHANGED Viewed

@@ -1,38 +1,40 @@
-import os
 import json
-import requests
 import logging
+import os
+from collections.abc import Iterator
+from typing import Any, Optional
+import requests
 from ..driver import Driver
-from typing import Any, Dict
 logger = logging.getLogger(__name__)
 class OllamaDriver(Driver):
+    supports_json_mode = True
+    supports_streaming = True
     # Ollama is free – costs are always zero.
-    MODEL_PRICING = {
-        "default": {"prompt": 0.0, "completion": 0.0}
-    }
+    MODEL_PRICING = {"default": {"prompt": 0.0, "completion": 0.0}}
     def __init__(self, endpoint: str | None = None, model: str = "llama3"):
         # Allow override via env var
-        self.endpoint = endpoint or os.getenv(
-            "OLLAMA_ENDPOINT", "http://localhost:11434/api/generate"
-        )
+        self.endpoint = endpoint or os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434/api/generate")
         self.model = model
         self.options = {}  # Initialize empty options dict
         # Validate connection to Ollama server
         self._validate_connection()
     def _validate_connection(self):
         """Validate connection to the Ollama server."""
         try:
             # Send a simple HEAD request to check if server is accessible
             # Use the base API endpoint without the specific path
-            base_url = self.endpoint.split('/api/')[0]
+            base_url = self.endpoint.split("/api/")[0]
             health_url = f"{base_url}/api/version"
             logger.debug(f"Validating connection to Ollama server at: {health_url}")
             response = requests.head(health_url, timeout=5)
             response.raise_for_status()
@@ -42,7 +44,9 @@ class OllamaDriver(Driver):
             # We don't raise an error here to allow for delayed server startup
             # The actual error will be raised when generate() is called
-    def generate(self, prompt: str, options: Dict[str, Any] = None) -> Dict[str, Any]:
+    supports_messages = True
+    def generate(self, prompt: str, options: Optional[dict[str, Any]] = None) -> dict[str, Any]:
         # Merge instance options with call-specific options
         merged_options = self.options.copy()
         if options:
@@ -54,6 +58,10 @@ class OllamaDriver(Driver):
             "stream": False,
         }
+        # Native JSON mode support
+        if merged_options.get("json_mode"):
+            payload["format"] = "json"
         # Add any Ollama-specific options from merged_options
         if "temperature" in merged_options:
             payload["temperature"] = merged_options["temperature"]
@@ -65,21 +73,21 @@ class OllamaDriver(Driver):
         try:
             logger.debug(f"Sending request to Ollama endpoint: {self.endpoint}")
             logger.debug(f"Request payload: {payload}")
             r = requests.post(self.endpoint, json=payload, timeout=120)
             logger.debug(f"Response status code: {r.status_code}")
             r.raise_for_status()
             response_text = r.text
             logger.debug(f"Raw response text: {response_text}")
             response_data = r.json()
             logger.debug(f"Parsed response data: {response_data}")
             if not isinstance(response_data, dict):
                 raise ValueError(f"Expected dict response, got {type(response_data)}")
         except requests.exceptions.ConnectionError as e:
             logger.error(f"Connection error to Ollama endpoint: {e}")
             # Preserve original exception
@@ -91,11 +99,11 @@ class OllamaDriver(Driver):
         except json.JSONDecodeError as e:
             logger.error(f"Failed to decode JSON response: {e}")
             # Re-raise JSONDecodeError with more context
-            raise json.JSONDecodeError(f"Invalid JSON response from Ollama: {e.msg}", e.doc, e.pos)
+            raise json.JSONDecodeError(f"Invalid JSON response from Ollama: {e.msg}", e.doc, e.pos) from e
         except Exception as e:
             logger.error(f"Unexpected error in Ollama request: {e}")
             # Only wrap unknown exceptions in RuntimeError
-            raise RuntimeError(f"Ollama request failed: {e}")
+            raise RuntimeError(f"Ollama request failed: {e}") from e
         # Extract token counts
         prompt_tokens = response_data.get("prompt_eval_count", 0)
@@ -113,4 +121,130 @@ class OllamaDriver(Driver):
         }
         # Ollama returns text in "response"
-        return {"text": response_data.get("response", ""), "meta": meta}
+        return {"text": response_data.get("response", ""), "meta": meta}
+    # ------------------------------------------------------------------
+    # Streaming
+    # ------------------------------------------------------------------
+    def generate_messages_stream(
+        self,
+        messages: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> Iterator[dict[str, Any]]:
+        """Yield response chunks via Ollama streaming API."""
+        merged_options = self.options.copy()
+        if options:
+            merged_options.update(options)
+        chat_endpoint = self.endpoint.replace("/api/generate", "/api/chat")
+        payload: dict[str, Any] = {
+            "model": merged_options.get("model", self.model),
+            "messages": messages,
+            "stream": True,
+        }
+        if merged_options.get("json_mode"):
+            payload["format"] = "json"
+        if "temperature" in merged_options:
+            payload["temperature"] = merged_options["temperature"]
+        if "top_p" in merged_options:
+            payload["top_p"] = merged_options["top_p"]
+        if "top_k" in merged_options:
+            payload["top_k"] = merged_options["top_k"]
+        full_text = ""
+        prompt_tokens = 0
+        completion_tokens = 0
+        r = requests.post(chat_endpoint, json=payload, timeout=120, stream=True)
+        r.raise_for_status()
+        for line in r.iter_lines():
+            if not line:
+                continue
+            chunk = json.loads(line)
+            if chunk.get("done"):
+                prompt_tokens = chunk.get("prompt_eval_count", 0)
+                completion_tokens = chunk.get("eval_count", 0)
+            else:
+                content = chunk.get("message", {}).get("content", "")
+                if content:
+                    full_text += content
+                    yield {"type": "delta", "text": content}
+        total_tokens = prompt_tokens + completion_tokens
+        yield {
+            "type": "done",
+            "text": full_text,
+            "meta": {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": total_tokens,
+                "cost": 0.0,
+                "raw_response": {},
+                "model_name": merged_options.get("model", self.model),
+            },
+        }
+    def generate_messages(self, messages: list[dict[str, Any]], options: dict[str, Any]) -> dict[str, Any]:
+        """Use Ollama's /api/chat endpoint for multi-turn conversations."""
+        merged_options = self.options.copy()
+        if options:
+            merged_options.update(options)
+        # Derive the chat endpoint from the generate endpoint
+        chat_endpoint = self.endpoint.replace("/api/generate", "/api/chat")
+        payload: dict[str, Any] = {
+            "model": merged_options.get("model", self.model),
+            "messages": messages,
+            "stream": False,
+        }
+        # Native JSON mode support
+        if merged_options.get("json_mode"):
+            payload["format"] = "json"
+        if "temperature" in merged_options:
+            payload["temperature"] = merged_options["temperature"]
+        if "top_p" in merged_options:
+            payload["top_p"] = merged_options["top_p"]
+        if "top_k" in merged_options:
+            payload["top_k"] = merged_options["top_k"]
+        try:
+            logger.debug(f"Sending chat request to Ollama endpoint: {chat_endpoint}")
+            r = requests.post(chat_endpoint, json=payload, timeout=120)
+            r.raise_for_status()
+            response_data = r.json()
+            if not isinstance(response_data, dict):
+                raise ValueError(f"Expected dict response, got {type(response_data)}")
+        except requests.exceptions.ConnectionError:
+            raise
+        except requests.exceptions.HTTPError:
+            raise
+        except json.JSONDecodeError as e:
+            raise json.JSONDecodeError(f"Invalid JSON response from Ollama: {e.msg}", e.doc, e.pos) from e
+        except Exception as e:
+            raise RuntimeError(f"Ollama chat request failed: {e}") from e
+        prompt_tokens = response_data.get("prompt_eval_count", 0)
+        completion_tokens = response_data.get("eval_count", 0)
+        total_tokens = prompt_tokens + completion_tokens
+        meta = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": 0.0,
+            "raw_response": response_data,
+            "model_name": merged_options.get("model", self.model),
+        }
+        # Chat endpoint returns response in message.content
+        message = response_data.get("message", {})
+        text = message.get("content", "")
+        return {"text": text, "meta": meta}

prompture 0.0.29.dev8__py3-none-any.whl → 0.0.35__py3-none-any.whl

prompture 0.0.29.dev8py3-none-any.whl → 0.0.35py3-none-any.whl