PyPI - prompture - Versions diffs - 0.0.35__py3-none-any.whl → 0.0.40.dev1__py3-none-any.whl - Mend

prompture 0.0.35py3-none-any.whl → 0.0.40.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

prompture/__init__.py +132 -3
prompture/_version.py +2 -2
prompture/agent.py +924 -0
prompture/agent_types.py +156 -0
prompture/async_agent.py +880 -0
prompture/async_conversation.py +208 -17
prompture/async_core.py +16 -0
prompture/async_driver.py +63 -0
prompture/async_groups.py +551 -0
prompture/conversation.py +222 -18
prompture/core.py +46 -12
prompture/cost_mixin.py +37 -0
prompture/discovery.py +132 -44
prompture/driver.py +77 -0
prompture/drivers/__init__.py +5 -1
prompture/drivers/async_azure_driver.py +11 -5
prompture/drivers/async_claude_driver.py +184 -9
prompture/drivers/async_google_driver.py +222 -28
prompture/drivers/async_grok_driver.py +11 -5
prompture/drivers/async_groq_driver.py +11 -5
prompture/drivers/async_lmstudio_driver.py +74 -5
prompture/drivers/async_ollama_driver.py +13 -3
prompture/drivers/async_openai_driver.py +162 -5
prompture/drivers/async_openrouter_driver.py +11 -5
prompture/drivers/async_registry.py +5 -1
prompture/drivers/azure_driver.py +10 -4
prompture/drivers/claude_driver.py +17 -1
prompture/drivers/google_driver.py +227 -33
prompture/drivers/grok_driver.py +11 -5
prompture/drivers/groq_driver.py +11 -5
prompture/drivers/lmstudio_driver.py +73 -8
prompture/drivers/ollama_driver.py +16 -5
prompture/drivers/openai_driver.py +26 -11
prompture/drivers/openrouter_driver.py +11 -5
prompture/drivers/vision_helpers.py +153 -0
prompture/group_types.py +147 -0
prompture/groups.py +530 -0
prompture/image.py +180 -0
prompture/ledger.py +252 -0
prompture/model_rates.py +112 -2
prompture/persistence.py +254 -0
prompture/persona.py +482 -0
prompture/serialization.py +218 -0
prompture/settings.py +1 -0
prompture-0.0.40.dev1.dist-info/METADATA +369 -0
prompture-0.0.40.dev1.dist-info/RECORD +78 -0
prompture-0.0.35.dist-info/METADATA +0 -464
prompture-0.0.35.dist-info/RECORD +0 -66
{prompture-0.0.35.dist-info → prompture-0.0.40.dev1.dist-info}/WHEEL +0 -0
{prompture-0.0.35.dist-info → prompture-0.0.40.dev1.dist-info}/entry_points.txt +0 -0
{prompture-0.0.35.dist-info → prompture-0.0.40.dev1.dist-info}/licenses/LICENSE +0 -0
{prompture-0.0.35.dist-info → prompture-0.0.40.dev1.dist-info}/top_level.txt +0 -0

prompture/drivers/async_google_driver.py CHANGED Viewed

@@ -4,6 +4,8 @@ from __future__ import annotations
 import logging
 import os
+import uuid
+from collections.abc import AsyncIterator
 from typing import Any
 import google.generativeai as genai
@@ -20,6 +22,9 @@ class AsyncGoogleDriver(CostMixin, AsyncDriver):
     supports_json_mode = True
     supports_json_schema = True
+    supports_vision = True
+    supports_tool_use = True
+    supports_streaming = True
     MODEL_PRICING = GoogleDriver.MODEL_PRICING
     _PRICING_UNIT = 1_000_000
@@ -48,18 +53,51 @@ class AsyncGoogleDriver(CostMixin, AsyncDriver):
             completion_cost = (completion_chars / 1_000_000) * model_pricing["completion"]
         return round(prompt_cost + completion_cost, 6)
+    def _extract_usage_metadata(self, response: Any, messages: list[dict[str, Any]]) -> dict[str, Any]:
+        """Extract token counts from response, falling back to character estimation."""
+        usage = getattr(response, "usage_metadata", None)
+        if usage:
+            prompt_tokens = getattr(usage, "prompt_token_count", 0) or 0
+            completion_tokens = getattr(usage, "candidates_token_count", 0) or 0
+            total_tokens = getattr(usage, "total_token_count", 0) or (prompt_tokens + completion_tokens)
+            cost = self._calculate_cost("google", self.model, prompt_tokens, completion_tokens)
+        else:
+            # Fallback: estimate from character counts
+            total_prompt_chars = 0
+            for msg in messages:
+                c = msg.get("content", "")
+                if isinstance(c, str):
+                    total_prompt_chars += len(c)
+                elif isinstance(c, list):
+                    for part in c:
+                        if isinstance(part, str):
+                            total_prompt_chars += len(part)
+                        elif isinstance(part, dict) and "text" in part:
+                            total_prompt_chars += len(part["text"])
+            completion_chars = len(response.text) if response.text else 0
+            prompt_tokens = total_prompt_chars // 4
+            completion_tokens = completion_chars // 4
+            total_tokens = prompt_tokens + completion_tokens
+            cost = self._calculate_cost_chars(total_prompt_chars, completion_chars)
+        return {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": round(cost, 6),
+        }
     supports_messages = True
-    async def generate(self, prompt: str, options: dict[str, Any] | None = None) -> dict[str, Any]:
-        messages = [{"role": "user", "content": prompt}]
-        return await self._do_generate(messages, options)
+    def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        from .vision_helpers import _prepare_google_vision_messages
-    async def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
-        return await self._do_generate(messages, options)
+        return _prepare_google_vision_messages(messages)
-    async def _do_generate(
-        self, messages: list[dict[str, str]], options: dict[str, Any] | None = None
-    ) -> dict[str, Any]:
+    def _build_generation_args(
+        self, messages: list[dict[str, Any]], options: dict[str, Any] | None = None
+    ) -> tuple[Any, dict[str, Any], dict[str, Any]]:
+        """Parse messages and options into (gen_input, gen_kwargs, model_kwargs)."""
         merged_options = self.options.copy()
         if options:
             merged_options.update(options)
@@ -90,37 +128,65 @@ class AsyncGoogleDriver(CostMixin, AsyncDriver):
             role = msg.get("role", "user")
             content = msg.get("content", "")
             if role == "system":
-                system_instruction = content
+                system_instruction = content if isinstance(content, str) else str(content)
             else:
                 gemini_role = "model" if role == "assistant" else "user"
-                contents.append({"role": gemini_role, "parts": [content]})
+                if msg.get("_vision_parts"):
+                    contents.append({"role": gemini_role, "parts": content})
+                else:
+                    contents.append({"role": gemini_role, "parts": [content]})
+        # For a single message, unwrap only if it has exactly one string part
+        if len(contents) == 1:
+            parts = contents[0]["parts"]
+            if len(parts) == 1 and isinstance(parts[0], str):
+                gen_input = parts[0]
+            else:
+                gen_input = contents
+        else:
+            gen_input = contents
+        model_kwargs: dict[str, Any] = {}
+        if system_instruction:
+            model_kwargs["system_instruction"] = system_instruction
+        gen_kwargs: dict[str, Any] = {
+            "generation_config": generation_config if generation_config else None,
+            "safety_settings": safety_settings if safety_settings else None,
+        }
+        return gen_input, gen_kwargs, model_kwargs
+    async def generate(self, prompt: str, options: dict[str, Any] | None = None) -> dict[str, Any]:
+        messages = [{"role": "user", "content": prompt}]
+        return await self._do_generate(messages, options)
+    async def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
+        return await self._do_generate(self._prepare_messages(messages), options)
+    async def _do_generate(
+        self, messages: list[dict[str, str]], options: dict[str, Any] | None = None
+    ) -> dict[str, Any]:
+        gen_input, gen_kwargs, model_kwargs = self._build_generation_args(messages, options)
+        # Validate capabilities against models.dev metadata
+        self._validate_model_capabilities(
+            "google",
+            self.model,
+            using_json_schema=bool((options or {}).get("json_schema")),
+        )
         try:
-            model_kwargs: dict[str, Any] = {}
-            if system_instruction:
-                model_kwargs["system_instruction"] = system_instruction
             model = genai.GenerativeModel(self.model, **model_kwargs)
-            gen_input: Any = contents if len(contents) != 1 else contents[0]["parts"][0]
-            response = await model.generate_content_async(
-                gen_input,
-                generation_config=generation_config if generation_config else None,
-                safety_settings=safety_settings if safety_settings else None,
-            )
+            response = await model.generate_content_async(gen_input, **gen_kwargs)
             if not response.text:
                 raise ValueError("Empty response from model")
-            total_prompt_chars = sum(len(msg.get("content", "")) for msg in messages)
-            completion_chars = len(response.text)
-            total_cost = self._calculate_cost_chars(total_prompt_chars, completion_chars)
+            usage_meta = self._extract_usage_metadata(response, messages)
             meta = {
-                "prompt_chars": total_prompt_chars,
-                "completion_chars": completion_chars,
-                "total_chars": total_prompt_chars + completion_chars,
-                "cost": total_cost,
+                **usage_meta,
                 "raw_response": response.prompt_feedback if hasattr(response, "prompt_feedback") else None,
                 "model_name": self.model,
             }
@@ -130,3 +196,131 @@ class AsyncGoogleDriver(CostMixin, AsyncDriver):
         except Exception as e:
             logger.error(f"Google API request failed: {e}")
             raise RuntimeError(f"Google API request failed: {e}") from e
+    # ------------------------------------------------------------------
+    # Tool use
+    # ------------------------------------------------------------------
+    async def generate_messages_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Generate a response that may include tool/function calls (async)."""
+        model = options.get("model", self.model)
+        self._validate_model_capabilities("google", model, using_tool_use=True)
+        gen_input, gen_kwargs, model_kwargs = self._build_generation_args(
+            self._prepare_messages(messages), options
+        )
+        # Convert tools from OpenAI format to Gemini function declarations
+        function_declarations = []
+        for t in tools:
+            if "type" in t and t["type"] == "function":
+                fn = t["function"]
+                decl = {
+                    "name": fn["name"],
+                    "description": fn.get("description", ""),
+                }
+                params = fn.get("parameters")
+                if params:
+                    decl["parameters"] = params
+                function_declarations.append(decl)
+            elif "name" in t:
+                decl = {"name": t["name"], "description": t.get("description", "")}
+                params = t.get("parameters") or t.get("input_schema")
+                if params:
+                    decl["parameters"] = params
+                function_declarations.append(decl)
+        try:
+            model = genai.GenerativeModel(self.model, **model_kwargs)
+            gemini_tools = [genai.types.Tool(function_declarations=function_declarations)]
+            response = await model.generate_content_async(gen_input, tools=gemini_tools, **gen_kwargs)
+            usage_meta = self._extract_usage_metadata(response, messages)
+            meta = {
+                **usage_meta,
+                "raw_response": response.prompt_feedback if hasattr(response, "prompt_feedback") else None,
+                "model_name": self.model,
+            }
+            text = ""
+            tool_calls_out: list[dict[str, Any]] = []
+            stop_reason = "stop"
+            for candidate in response.candidates:
+                for part in candidate.content.parts:
+                    if hasattr(part, "text") and part.text:
+                        text += part.text
+                    if hasattr(part, "function_call") and part.function_call.name:
+                        fc = part.function_call
+                        tool_calls_out.append({
+                            "id": str(uuid.uuid4()),
+                            "name": fc.name,
+                            "arguments": dict(fc.args) if fc.args else {},
+                        })
+                finish_reason = getattr(candidate, "finish_reason", None)
+                if finish_reason is not None:
+                    reason_map = {1: "stop", 2: "max_tokens", 3: "safety", 4: "recitation", 5: "other"}
+                    stop_reason = reason_map.get(finish_reason, "stop")
+            if tool_calls_out:
+                stop_reason = "tool_use"
+            return {
+                "text": text,
+                "meta": meta,
+                "tool_calls": tool_calls_out,
+                "stop_reason": stop_reason,
+            }
+        except Exception as e:
+            logger.error(f"Google API tool call request failed: {e}")
+            raise RuntimeError(f"Google API tool call request failed: {e}") from e
+    # ------------------------------------------------------------------
+    # Streaming
+    # ------------------------------------------------------------------
+    async def generate_messages_stream(
+        self,
+        messages: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> AsyncIterator[dict[str, Any]]:
+        """Yield response chunks via Gemini async streaming API."""
+        gen_input, gen_kwargs, model_kwargs = self._build_generation_args(
+            self._prepare_messages(messages), options
+        )
+        try:
+            model = genai.GenerativeModel(self.model, **model_kwargs)
+            response = await model.generate_content_async(gen_input, stream=True, **gen_kwargs)
+            full_text = ""
+            async for chunk in response:
+                chunk_text = getattr(chunk, "text", None) or ""
+                if chunk_text:
+                    full_text += chunk_text
+                    yield {"type": "delta", "text": chunk_text}
+            # After iteration completes, usage_metadata should be available
+            usage_meta = self._extract_usage_metadata(response, messages)
+            yield {
+                "type": "done",
+                "text": full_text,
+                "meta": {
+                    **usage_meta,
+                    "raw_response": {},
+                    "model_name": self.model,
+                },
+            }
+        except Exception as e:
+            logger.error(f"Google API streaming request failed: {e}")
+            raise RuntimeError(f"Google API streaming request failed: {e}") from e

prompture/drivers/async_grok_driver.py CHANGED Viewed

@@ -14,6 +14,7 @@ from .grok_driver import GrokDriver
 class AsyncGrokDriver(CostMixin, AsyncDriver):
     supports_json_mode = True
+    supports_vision = True
     MODEL_PRICING = GrokDriver.MODEL_PRICING
     _PRICING_UNIT = 1_000_000
@@ -25,12 +26,17 @@ class AsyncGrokDriver(CostMixin, AsyncDriver):
     supports_messages = True
+    def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        from .vision_helpers import _prepare_openai_vision_messages
+        return _prepare_openai_vision_messages(messages)
     async def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
         messages = [{"role": "user", "content": prompt}]
         return await self._do_generate(messages, options)
     async def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
-        return await self._do_generate(messages, options)
+        return await self._do_generate(self._prepare_messages(messages), options)
     async def _do_generate(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
         if not self.api_key:
@@ -38,9 +44,9 @@ class AsyncGrokDriver(CostMixin, AsyncDriver):
         model = options.get("model", self.model)
-        model_info = self.MODEL_PRICING.get(model, {})
-        tokens_param = model_info.get("tokens_param", "max_tokens")
-        supports_temperature = model_info.get("supports_temperature", True)
+        model_config = self._get_model_config("grok", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
         opts = {"temperature": 1.0, "max_tokens": 512, **options}
@@ -82,7 +88,7 @@ class AsyncGrokDriver(CostMixin, AsyncDriver):
             "prompt_tokens": prompt_tokens,
             "completion_tokens": completion_tokens,
             "total_tokens": total_tokens,
-            "cost": total_cost,
+            "cost": round(total_cost, 6),
             "raw_response": resp,
             "model_name": model,
         }

prompture/drivers/async_groq_driver.py CHANGED Viewed

@@ -17,6 +17,7 @@ from .groq_driver import GroqDriver
 class AsyncGroqDriver(CostMixin, AsyncDriver):
     supports_json_mode = True
+    supports_vision = True
     MODEL_PRICING = GroqDriver.MODEL_PRICING
@@ -30,12 +31,17 @@ class AsyncGroqDriver(CostMixin, AsyncDriver):
     supports_messages = True
+    def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        from .vision_helpers import _prepare_openai_vision_messages
+        return _prepare_openai_vision_messages(messages)
     async def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
         messages = [{"role": "user", "content": prompt}]
         return await self._do_generate(messages, options)
     async def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
-        return await self._do_generate(messages, options)
+        return await self._do_generate(self._prepare_messages(messages), options)
     async def _do_generate(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
         if self.client is None:
@@ -43,9 +49,9 @@ class AsyncGroqDriver(CostMixin, AsyncDriver):
         model = options.get("model", self.model)
-        model_info = self.MODEL_PRICING.get(model, {})
-        tokens_param = model_info.get("tokens_param", "max_tokens")
-        supports_temperature = model_info.get("supports_temperature", True)
+        model_config = self._get_model_config("groq", model)
+        tokens_param = model_config["tokens_param"]
+        supports_temperature = model_config["supports_temperature"]
         opts = {"temperature": 0.7, "max_tokens": 512, **options}
@@ -75,7 +81,7 @@ class AsyncGroqDriver(CostMixin, AsyncDriver):
             "prompt_tokens": prompt_tokens,
             "completion_tokens": completion_tokens,
             "total_tokens": total_tokens,
-            "cost": total_cost,
+            "cost": round(total_cost, 6),
             "raw_response": resp.model_dump(),
             "model_name": model,
         }

prompture/drivers/async_lmstudio_driver.py CHANGED Viewed

@@ -15,22 +15,48 @@ logger = logging.getLogger(__name__)
 class AsyncLMStudioDriver(AsyncDriver):
     supports_json_mode = True
+    supports_json_schema = True
+    supports_vision = True
     MODEL_PRICING = {"default": {"prompt": 0.0, "completion": 0.0}}
-    def __init__(self, endpoint: str | None = None, model: str = "deepseek/deepseek-r1-0528-qwen3-8b"):
+    def __init__(
+        self,
+        endpoint: str | None = None,
+        model: str = "deepseek/deepseek-r1-0528-qwen3-8b",
+        api_key: str | None = None,
+    ):
         self.endpoint = endpoint or os.getenv("LMSTUDIO_ENDPOINT", "http://127.0.0.1:1234/v1/chat/completions")
         self.model = model
         self.options: dict[str, Any] = {}
+        # Derive base_url once for reuse across management endpoints
+        self.base_url = self.endpoint.split("/v1/")[0]
+        # API key for LM Studio 0.4.0+ authentication
+        self.api_key = api_key or os.getenv("LMSTUDIO_API_KEY")
+        self._headers = self._build_headers()
     supports_messages = True
+    def _build_headers(self) -> dict[str, str]:
+        """Build request headers, including auth if an API key is configured."""
+        headers: dict[str, str] = {"Content-Type": "application/json"}
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        return headers
+    def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        from .vision_helpers import _prepare_openai_vision_messages
+        return _prepare_openai_vision_messages(messages)
     async def generate(self, prompt: str, options: dict[str, Any] | None = None) -> dict[str, Any]:
         messages = [{"role": "user", "content": prompt}]
         return await self._do_generate(messages, options)
     async def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
-        return await self._do_generate(messages, options)
+        return await self._do_generate(self._prepare_messages(messages), options)
     async def _do_generate(
         self, messages: list[dict[str, str]], options: dict[str, Any] | None = None
@@ -45,13 +71,25 @@ class AsyncLMStudioDriver(AsyncDriver):
             "temperature": merged_options.get("temperature", 0.7),
         }
-        # Native JSON mode support
+        # Native JSON mode support (LM Studio requires json_schema, not json_object)
         if merged_options.get("json_mode"):
-            payload["response_format"] = {"type": "json_object"}
+            json_schema = merged_options.get("json_schema")
+            if json_schema:
+                payload["response_format"] = {
+                    "type": "json_schema",
+                    "json_schema": {
+                        "name": "extraction",
+                        "schema": json_schema,
+                    },
+                }
+            else:
+                # No schema provided — omit response_format entirely;
+                # LM Studio rejects "json_object" type.
+                pass
         async with httpx.AsyncClient() as client:
             try:
-                r = await client.post(self.endpoint, json=payload, timeout=120)
+                r = await client.post(self.endpoint, json=payload, headers=self._headers, timeout=120)
                 r.raise_for_status()
                 response_data = r.json()
             except Exception as e:
@@ -77,3 +115,34 @@ class AsyncLMStudioDriver(AsyncDriver):
         }
         return {"text": text, "meta": meta}
+    # -- Model management (LM Studio 0.4.0+) ----------------------------------
+    async def list_models(self) -> list[dict[str, Any]]:
+        """List currently loaded models via GET /v1/models (OpenAI-compatible)."""
+        url = f"{self.base_url}/v1/models"
+        async with httpx.AsyncClient() as client:
+            r = await client.get(url, headers=self._headers, timeout=10)
+            r.raise_for_status()
+            data = r.json()
+        return data.get("data", [])
+    async def load_model(self, model: str, context_length: int | None = None) -> dict[str, Any]:
+        """Load a model into LM Studio via POST /api/v1/models/load."""
+        url = f"{self.base_url}/api/v1/models/load"
+        payload: dict[str, Any] = {"model": model}
+        if context_length is not None:
+            payload["context_length"] = context_length
+        async with httpx.AsyncClient() as client:
+            r = await client.post(url, json=payload, headers=self._headers, timeout=120)
+            r.raise_for_status()
+        return r.json()
+    async def unload_model(self, model: str) -> dict[str, Any]:
+        """Unload a model from LM Studio via POST /api/v1/models/unload."""
+        url = f"{self.base_url}/api/v1/models/unload"
+        payload = {"instance_id": model}
+        async with httpx.AsyncClient() as client:
+            r = await client.post(url, json=payload, headers=self._headers, timeout=30)
+            r.raise_for_status()
+        return r.json()

prompture/drivers/async_ollama_driver.py CHANGED Viewed

@@ -15,6 +15,8 @@ logger = logging.getLogger(__name__)
 class AsyncOllamaDriver(AsyncDriver):
     supports_json_mode = True
+    supports_json_schema = True
+    supports_vision = True
     MODEL_PRICING = {"default": {"prompt": 0.0, "completion": 0.0}}
@@ -25,6 +27,11 @@ class AsyncOllamaDriver(AsyncDriver):
     supports_messages = True
+    def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        from .vision_helpers import _prepare_ollama_vision_messages
+        return _prepare_ollama_vision_messages(messages)
     async def generate(self, prompt: str, options: dict[str, Any] | None = None) -> dict[str, Any]:
         merged_options = self.options.copy()
         if options:
@@ -36,9 +43,10 @@ class AsyncOllamaDriver(AsyncDriver):
             "stream": False,
         }
-        # Native JSON mode support
+        # Native JSON mode / structured output support
         if merged_options.get("json_mode"):
-            payload["format"] = "json"
+            json_schema = merged_options.get("json_schema")
+            payload["format"] = json_schema if json_schema else "json"
         if "temperature" in merged_options:
             payload["temperature"] = merged_options["temperature"]
@@ -74,6 +82,7 @@ class AsyncOllamaDriver(AsyncDriver):
     async def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
         """Use Ollama's /api/chat endpoint for multi-turn conversations."""
+        messages = self._prepare_messages(messages)
         merged_options = self.options.copy()
         if options:
             merged_options.update(options)
@@ -88,7 +97,8 @@ class AsyncOllamaDriver(AsyncDriver):
         }
         if merged_options.get("json_mode"):
-            payload["format"] = "json"
+            json_schema = merged_options.get("json_schema")
+            payload["format"] = json_schema if json_schema else "json"
         if "temperature" in merged_options:
             payload["temperature"] = merged_options["temperature"]

prompture 0.0.35__py3-none-any.whl → 0.0.40.dev1__py3-none-any.whl

prompture 0.0.35py3-none-any.whl → 0.0.40.dev1py3-none-any.whl