PyPI - prompture - Versions diffs - 0.0.38.dev1__py3-none-any.whl → 0.0.38.dev3__py3-none-any.whl - Mend

prompture 0.0.38.dev1py3-none-any.whl → 0.0.38.dev3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

prompture/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.0.38.dev1'
-__version_tuple__ = version_tuple = (0, 0, 38, 'dev1')
+__version__ = version = '0.0.38.dev3'
+__version_tuple__ = version_tuple = (0, 0, 38, 'dev3')
 __commit_id__ = commit_id = None

prompture/drivers/async_azure_driver.py CHANGED Viewed

@@ -113,7 +113,7 @@ class AsyncAzureDriver(CostMixin, AsyncDriver):
             "prompt_tokens": prompt_tokens,
             "completion_tokens": completion_tokens,
             "total_tokens": total_tokens,
-            "cost": total_cost,
+            "cost": round(total_cost, 6),
             "raw_response": resp.model_dump(),
             "model_name": model,
             "deployment_id": self.deployment_id,

prompture/drivers/async_claude_driver.py CHANGED Viewed

@@ -4,6 +4,7 @@ from __future__ import annotations
 import json
 import os
+from collections.abc import AsyncIterator
 from typing import Any
 try:
@@ -19,6 +20,8 @@ from .claude_driver import ClaudeDriver
 class AsyncClaudeDriver(CostMixin, AsyncDriver):
     supports_json_mode = True
     supports_json_schema = True
+    supports_tool_use = True
+    supports_streaming = True
     supports_vision = True
     MODEL_PRICING = ClaudeDriver.MODEL_PRICING
@@ -51,13 +54,7 @@ class AsyncClaudeDriver(CostMixin, AsyncDriver):
         client = anthropic.AsyncAnthropic(api_key=self.api_key)
         # Anthropic requires system messages as a top-level parameter
-        system_content = None
-        api_messages = []
-        for msg in messages:
-            if msg.get("role") == "system":
-                system_content = msg.get("content", "")
-            else:
-                api_messages.append(msg)
+        system_content, api_messages = self._extract_system_and_messages(messages)
         # Build common kwargs
         common_kwargs: dict[str, Any] = {
@@ -105,9 +102,171 @@ class AsyncClaudeDriver(CostMixin, AsyncDriver):
             "prompt_tokens": prompt_tokens,
             "completion_tokens": completion_tokens,
             "total_tokens": total_tokens,
-            "cost": total_cost,
+            "cost": round(total_cost, 6),
             "raw_response": dict(resp),
             "model_name": model,
         }
         return {"text": text, "meta": meta}
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+    def _extract_system_and_messages(
+        self, messages: list[dict[str, Any]]
+    ) -> tuple[str | None, list[dict[str, Any]]]:
+        """Separate system message from conversation messages for Anthropic API."""
+        system_content = None
+        api_messages: list[dict[str, Any]] = []
+        for msg in messages:
+            if msg.get("role") == "system":
+                system_content = msg.get("content", "")
+            else:
+                api_messages.append(msg)
+        return system_content, api_messages
+    # ------------------------------------------------------------------
+    # Tool use
+    # ------------------------------------------------------------------
+    async def generate_messages_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Generate a response that may include tool calls (Anthropic)."""
+        if anthropic is None:
+            raise RuntimeError("anthropic package not installed")
+        opts = {**{"temperature": 0.0, "max_tokens": 512}, **options}
+        model = options.get("model", self.model)
+        client = anthropic.AsyncAnthropic(api_key=self.api_key)
+        system_content, api_messages = self._extract_system_and_messages(messages)
+        # Convert tools from OpenAI format to Anthropic format if needed
+        anthropic_tools = []
+        for t in tools:
+            if "type" in t and t["type"] == "function":
+                # OpenAI format -> Anthropic format
+                fn = t["function"]
+                anthropic_tools.append({
+                    "name": fn["name"],
+                    "description": fn.get("description", ""),
+                    "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
+                })
+            elif "input_schema" in t:
+                # Already Anthropic format
+                anthropic_tools.append(t)
+            else:
+                anthropic_tools.append(t)
+        kwargs: dict[str, Any] = {
+            "model": model,
+            "messages": api_messages,
+            "temperature": opts["temperature"],
+            "max_tokens": opts["max_tokens"],
+            "tools": anthropic_tools,
+        }
+        if system_content:
+            kwargs["system"] = system_content
+        resp = await client.messages.create(**kwargs)
+        prompt_tokens = resp.usage.input_tokens
+        completion_tokens = resp.usage.output_tokens
+        total_tokens = prompt_tokens + completion_tokens
+        total_cost = self._calculate_cost("claude", model, prompt_tokens, completion_tokens)
+        meta = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": round(total_cost, 6),
+            "raw_response": dict(resp),
+            "model_name": model,
+        }
+        text = ""
+        tool_calls_out: list[dict[str, Any]] = []
+        for block in resp.content:
+            if block.type == "text":
+                text += block.text
+            elif block.type == "tool_use":
+                tool_calls_out.append({
+                    "id": block.id,
+                    "name": block.name,
+                    "arguments": block.input,
+                })
+        return {
+            "text": text,
+            "meta": meta,
+            "tool_calls": tool_calls_out,
+            "stop_reason": resp.stop_reason,
+        }
+    # ------------------------------------------------------------------
+    # Streaming
+    # ------------------------------------------------------------------
+    async def generate_messages_stream(
+        self,
+        messages: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> AsyncIterator[dict[str, Any]]:
+        """Yield response chunks via Anthropic streaming API."""
+        if anthropic is None:
+            raise RuntimeError("anthropic package not installed")
+        opts = {**{"temperature": 0.0, "max_tokens": 512}, **options}
+        model = options.get("model", self.model)
+        client = anthropic.AsyncAnthropic(api_key=self.api_key)
+        system_content, api_messages = self._extract_system_and_messages(messages)
+        kwargs: dict[str, Any] = {
+            "model": model,
+            "messages": api_messages,
+            "temperature": opts["temperature"],
+            "max_tokens": opts["max_tokens"],
+        }
+        if system_content:
+            kwargs["system"] = system_content
+        full_text = ""
+        prompt_tokens = 0
+        completion_tokens = 0
+        async with client.messages.stream(**kwargs) as stream:
+            async for event in stream:
+                if hasattr(event, "type"):
+                    if event.type == "content_block_delta" and hasattr(event, "delta"):
+                        delta_text = getattr(event.delta, "text", "")
+                        if delta_text:
+                            full_text += delta_text
+                            yield {"type": "delta", "text": delta_text}
+                    elif event.type == "message_delta" and hasattr(event, "usage"):
+                        completion_tokens = getattr(event.usage, "output_tokens", 0)
+                    elif event.type == "message_start" and hasattr(event, "message"):
+                        usage = getattr(event.message, "usage", None)
+                        if usage:
+                            prompt_tokens = getattr(usage, "input_tokens", 0)
+        total_tokens = prompt_tokens + completion_tokens
+        total_cost = self._calculate_cost("claude", model, prompt_tokens, completion_tokens)
+        yield {
+            "type": "done",
+            "text": full_text,
+            "meta": {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": total_tokens,
+                "cost": round(total_cost, 6),
+                "raw_response": {},
+                "model_name": model,
+            },
+        }

prompture/drivers/async_google_driver.py CHANGED Viewed

@@ -4,6 +4,8 @@ from __future__ import annotations
 import logging
 import os
+import uuid
+from collections.abc import AsyncIterator
 from typing import Any
 import google.generativeai as genai
@@ -21,6 +23,8 @@ class AsyncGoogleDriver(CostMixin, AsyncDriver):
     supports_json_mode = True
     supports_json_schema = True
     supports_vision = True
+    supports_tool_use = True
+    supports_streaming = True
     MODEL_PRICING = GoogleDriver.MODEL_PRICING
     _PRICING_UNIT = 1_000_000
@@ -49,6 +53,40 @@ class AsyncGoogleDriver(CostMixin, AsyncDriver):
             completion_cost = (completion_chars / 1_000_000) * model_pricing["completion"]
         return round(prompt_cost + completion_cost, 6)
+    def _extract_usage_metadata(self, response: Any, messages: list[dict[str, Any]]) -> dict[str, Any]:
+        """Extract token counts from response, falling back to character estimation."""
+        usage = getattr(response, "usage_metadata", None)
+        if usage:
+            prompt_tokens = getattr(usage, "prompt_token_count", 0) or 0
+            completion_tokens = getattr(usage, "candidates_token_count", 0) or 0
+            total_tokens = getattr(usage, "total_token_count", 0) or (prompt_tokens + completion_tokens)
+            cost = self._calculate_cost("google", self.model, prompt_tokens, completion_tokens)
+        else:
+            # Fallback: estimate from character counts
+            total_prompt_chars = 0
+            for msg in messages:
+                c = msg.get("content", "")
+                if isinstance(c, str):
+                    total_prompt_chars += len(c)
+                elif isinstance(c, list):
+                    for part in c:
+                        if isinstance(part, str):
+                            total_prompt_chars += len(part)
+                        elif isinstance(part, dict) and "text" in part:
+                            total_prompt_chars += len(part["text"])
+            completion_chars = len(response.text) if response.text else 0
+            prompt_tokens = total_prompt_chars // 4
+            completion_tokens = completion_chars // 4
+            total_tokens = prompt_tokens + completion_tokens
+            cost = self._calculate_cost_chars(total_prompt_chars, completion_chars)
+        return {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": round(cost, 6),
+        }
     supports_messages = True
     def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
@@ -56,16 +94,10 @@ class AsyncGoogleDriver(CostMixin, AsyncDriver):
         return _prepare_google_vision_messages(messages)
-    async def generate(self, prompt: str, options: dict[str, Any] | None = None) -> dict[str, Any]:
-        messages = [{"role": "user", "content": prompt}]
-        return await self._do_generate(messages, options)
-    async def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
-        return await self._do_generate(self._prepare_messages(messages), options)
-    async def _do_generate(
-        self, messages: list[dict[str, str]], options: dict[str, Any] | None = None
-    ) -> dict[str, Any]:
+    def _build_generation_args(
+        self, messages: list[dict[str, Any]], options: dict[str, Any] | None = None
+    ) -> tuple[Any, dict[str, Any], dict[str, Any]]:
+        """Parse messages and options into (gen_input, gen_kwargs, model_kwargs)."""
         merged_options = self.options.copy()
         if options:
             merged_options.update(options)
@@ -100,47 +132,54 @@ class AsyncGoogleDriver(CostMixin, AsyncDriver):
             else:
                 gemini_role = "model" if role == "assistant" else "user"
                 if msg.get("_vision_parts"):
-                    # Already converted to Gemini parts by _prepare_messages
                     contents.append({"role": gemini_role, "parts": content})
                 else:
                     contents.append({"role": gemini_role, "parts": [content]})
+        # For a single message, unwrap only if it has exactly one string part
+        if len(contents) == 1:
+            parts = contents[0]["parts"]
+            if len(parts) == 1 and isinstance(parts[0], str):
+                gen_input = parts[0]
+            else:
+                gen_input = contents
+        else:
+            gen_input = contents
+        model_kwargs: dict[str, Any] = {}
+        if system_instruction:
+            model_kwargs["system_instruction"] = system_instruction
+        gen_kwargs: dict[str, Any] = {
+            "generation_config": generation_config if generation_config else None,
+            "safety_settings": safety_settings if safety_settings else None,
+        }
+        return gen_input, gen_kwargs, model_kwargs
+    async def generate(self, prompt: str, options: dict[str, Any] | None = None) -> dict[str, Any]:
+        messages = [{"role": "user", "content": prompt}]
+        return await self._do_generate(messages, options)
+    async def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
+        return await self._do_generate(self._prepare_messages(messages), options)
+    async def _do_generate(
+        self, messages: list[dict[str, str]], options: dict[str, Any] | None = None
+    ) -> dict[str, Any]:
+        gen_input, gen_kwargs, model_kwargs = self._build_generation_args(messages, options)
         try:
-            model_kwargs: dict[str, Any] = {}
-            if system_instruction:
-                model_kwargs["system_instruction"] = system_instruction
             model = genai.GenerativeModel(self.model, **model_kwargs)
-            gen_input: Any = contents if len(contents) != 1 else contents[0]["parts"][0]
-            response = await model.generate_content_async(
-                gen_input,
-                generation_config=generation_config if generation_config else None,
-                safety_settings=safety_settings if safety_settings else None,
-            )
+            response = await model.generate_content_async(gen_input, **gen_kwargs)
             if not response.text:
                 raise ValueError("Empty response from model")
-            total_prompt_chars = 0
-            for msg in messages:
-                c = msg.get("content", "")
-                if isinstance(c, str):
-                    total_prompt_chars += len(c)
-                elif isinstance(c, list):
-                    for part in c:
-                        if isinstance(part, str):
-                            total_prompt_chars += len(part)
-                        elif isinstance(part, dict) and "text" in part:
-                            total_prompt_chars += len(part["text"])
-            completion_chars = len(response.text)
-            total_cost = self._calculate_cost_chars(total_prompt_chars, completion_chars)
+            usage_meta = self._extract_usage_metadata(response, messages)
             meta = {
-                "prompt_chars": total_prompt_chars,
-                "completion_chars": completion_chars,
-                "total_chars": total_prompt_chars + completion_chars,
-                "cost": total_cost,
+                **usage_meta,
                 "raw_response": response.prompt_feedback if hasattr(response, "prompt_feedback") else None,
                 "model_name": self.model,
             }
@@ -150,3 +189,128 @@ class AsyncGoogleDriver(CostMixin, AsyncDriver):
         except Exception as e:
             logger.error(f"Google API request failed: {e}")
             raise RuntimeError(f"Google API request failed: {e}") from e
+    # ------------------------------------------------------------------
+    # Tool use
+    # ------------------------------------------------------------------
+    async def generate_messages_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Generate a response that may include tool/function calls (async)."""
+        gen_input, gen_kwargs, model_kwargs = self._build_generation_args(
+            self._prepare_messages(messages), options
+        )
+        # Convert tools from OpenAI format to Gemini function declarations
+        function_declarations = []
+        for t in tools:
+            if "type" in t and t["type"] == "function":
+                fn = t["function"]
+                decl = {
+                    "name": fn["name"],
+                    "description": fn.get("description", ""),
+                }
+                params = fn.get("parameters")
+                if params:
+                    decl["parameters"] = params
+                function_declarations.append(decl)
+            elif "name" in t:
+                decl = {"name": t["name"], "description": t.get("description", "")}
+                params = t.get("parameters") or t.get("input_schema")
+                if params:
+                    decl["parameters"] = params
+                function_declarations.append(decl)
+        try:
+            model = genai.GenerativeModel(self.model, **model_kwargs)
+            gemini_tools = [genai.types.Tool(function_declarations=function_declarations)]
+            response = await model.generate_content_async(gen_input, tools=gemini_tools, **gen_kwargs)
+            usage_meta = self._extract_usage_metadata(response, messages)
+            meta = {
+                **usage_meta,
+                "raw_response": response.prompt_feedback if hasattr(response, "prompt_feedback") else None,
+                "model_name": self.model,
+            }
+            text = ""
+            tool_calls_out: list[dict[str, Any]] = []
+            stop_reason = "stop"
+            for candidate in response.candidates:
+                for part in candidate.content.parts:
+                    if hasattr(part, "text") and part.text:
+                        text += part.text
+                    if hasattr(part, "function_call") and part.function_call.name:
+                        fc = part.function_call
+                        tool_calls_out.append({
+                            "id": str(uuid.uuid4()),
+                            "name": fc.name,
+                            "arguments": dict(fc.args) if fc.args else {},
+                        })
+                finish_reason = getattr(candidate, "finish_reason", None)
+                if finish_reason is not None:
+                    reason_map = {1: "stop", 2: "max_tokens", 3: "safety", 4: "recitation", 5: "other"}
+                    stop_reason = reason_map.get(finish_reason, "stop")
+            if tool_calls_out:
+                stop_reason = "tool_use"
+            return {
+                "text": text,
+                "meta": meta,
+                "tool_calls": tool_calls_out,
+                "stop_reason": stop_reason,
+            }
+        except Exception as e:
+            logger.error(f"Google API tool call request failed: {e}")
+            raise RuntimeError(f"Google API tool call request failed: {e}") from e
+    # ------------------------------------------------------------------
+    # Streaming
+    # ------------------------------------------------------------------
+    async def generate_messages_stream(
+        self,
+        messages: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> AsyncIterator[dict[str, Any]]:
+        """Yield response chunks via Gemini async streaming API."""
+        gen_input, gen_kwargs, model_kwargs = self._build_generation_args(
+            self._prepare_messages(messages), options
+        )
+        try:
+            model = genai.GenerativeModel(self.model, **model_kwargs)
+            response = await model.generate_content_async(gen_input, stream=True, **gen_kwargs)
+            full_text = ""
+            async for chunk in response:
+                chunk_text = getattr(chunk, "text", None) or ""
+                if chunk_text:
+                    full_text += chunk_text
+                    yield {"type": "delta", "text": chunk_text}
+            # After iteration completes, usage_metadata should be available
+            usage_meta = self._extract_usage_metadata(response, messages)
+            yield {
+                "type": "done",
+                "text": full_text,
+                "meta": {
+                    **usage_meta,
+                    "raw_response": {},
+                    "model_name": self.model,
+                },
+            }
+        except Exception as e:
+            logger.error(f"Google API streaming request failed: {e}")
+            raise RuntimeError(f"Google API streaming request failed: {e}") from e

prompture/drivers/async_grok_driver.py CHANGED Viewed

@@ -88,7 +88,7 @@ class AsyncGrokDriver(CostMixin, AsyncDriver):
             "prompt_tokens": prompt_tokens,
             "completion_tokens": completion_tokens,
             "total_tokens": total_tokens,
-            "cost": total_cost,
+            "cost": round(total_cost, 6),
             "raw_response": resp,
             "model_name": model,
         }

prompture/drivers/async_groq_driver.py CHANGED Viewed

@@ -81,7 +81,7 @@ class AsyncGroqDriver(CostMixin, AsyncDriver):
             "prompt_tokens": prompt_tokens,
             "completion_tokens": completion_tokens,
             "total_tokens": total_tokens,
-            "cost": total_cost,
+            "cost": round(total_cost, 6),
             "raw_response": resp.model_dump(),
             "model_name": model,
         }

prompture/drivers/async_openai_driver.py CHANGED Viewed

@@ -2,7 +2,9 @@
 from __future__ import annotations
+import json
 import os
+from collections.abc import AsyncIterator
 from typing import Any
 try:
@@ -18,6 +20,8 @@ from .openai_driver import OpenAIDriver
 class AsyncOpenAIDriver(CostMixin, AsyncDriver):
     supports_json_mode = True
     supports_json_schema = True
+    supports_tool_use = True
+    supports_streaming = True
     supports_vision = True
     MODEL_PRICING = OpenAIDriver.MODEL_PRICING
@@ -93,10 +97,148 @@ class AsyncOpenAIDriver(CostMixin, AsyncDriver):
             "prompt_tokens": prompt_tokens,
             "completion_tokens": completion_tokens,
             "total_tokens": total_tokens,
-            "cost": total_cost,
+            "cost": round(total_cost, 6),
             "raw_response": resp.model_dump(),
             "model_name": model,
         }
         text = resp.choices[0].message.content
         return {"text": text, "meta": meta}
+    # ------------------------------------------------------------------
+    # Tool use
+    # ------------------------------------------------------------------
+    async def generate_messages_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Generate a response that may include tool calls."""
+        if self.client is None:
+            raise RuntimeError("openai package (>=1.0.0) is not installed")
+        model = options.get("model", self.model)
+        model_info = self.MODEL_PRICING.get(model, {})
+        tokens_param = model_info.get("tokens_param", "max_tokens")
+        supports_temperature = model_info.get("supports_temperature", True)
+        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        kwargs: dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+            "tools": tools,
+        }
+        kwargs[tokens_param] = opts.get("max_tokens", 512)
+        if supports_temperature and "temperature" in opts:
+            kwargs["temperature"] = opts["temperature"]
+        resp = await self.client.chat.completions.create(**kwargs)
+        usage = getattr(resp, "usage", None)
+        prompt_tokens = getattr(usage, "prompt_tokens", 0)
+        completion_tokens = getattr(usage, "completion_tokens", 0)
+        total_tokens = getattr(usage, "total_tokens", 0)
+        total_cost = self._calculate_cost("openai", model, prompt_tokens, completion_tokens)
+        meta = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": round(total_cost, 6),
+            "raw_response": resp.model_dump(),
+            "model_name": model,
+        }
+        choice = resp.choices[0]
+        text = choice.message.content or ""
+        stop_reason = choice.finish_reason
+        tool_calls_out: list[dict[str, Any]] = []
+        if choice.message.tool_calls:
+            for tc in choice.message.tool_calls:
+                try:
+                    args = json.loads(tc.function.arguments)
+                except (json.JSONDecodeError, TypeError):
+                    args = {}
+                tool_calls_out.append({
+                    "id": tc.id,
+                    "name": tc.function.name,
+                    "arguments": args,
+                })
+        return {
+            "text": text,
+            "meta": meta,
+            "tool_calls": tool_calls_out,
+            "stop_reason": stop_reason,
+        }
+    # ------------------------------------------------------------------
+    # Streaming
+    # ------------------------------------------------------------------
+    async def generate_messages_stream(
+        self,
+        messages: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> AsyncIterator[dict[str, Any]]:
+        """Yield response chunks via OpenAI streaming API."""
+        if self.client is None:
+            raise RuntimeError("openai package (>=1.0.0) is not installed")
+        model = options.get("model", self.model)
+        model_info = self.MODEL_PRICING.get(model, {})
+        tokens_param = model_info.get("tokens_param", "max_tokens")
+        supports_temperature = model_info.get("supports_temperature", True)
+        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        kwargs: dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+            "stream": True,
+            "stream_options": {"include_usage": True},
+        }
+        kwargs[tokens_param] = opts.get("max_tokens", 512)
+        if supports_temperature and "temperature" in opts:
+            kwargs["temperature"] = opts["temperature"]
+        stream = await self.client.chat.completions.create(**kwargs)
+        full_text = ""
+        prompt_tokens = 0
+        completion_tokens = 0
+        async for chunk in stream:
+            # Usage comes in the final chunk
+            if getattr(chunk, "usage", None):
+                prompt_tokens = chunk.usage.prompt_tokens or 0
+                completion_tokens = chunk.usage.completion_tokens or 0
+            if chunk.choices:
+                delta = chunk.choices[0].delta
+                content = getattr(delta, "content", None) or ""
+                if content:
+                    full_text += content
+                    yield {"type": "delta", "text": content}
+        total_tokens = prompt_tokens + completion_tokens
+        total_cost = self._calculate_cost("openai", model, prompt_tokens, completion_tokens)
+        yield {
+            "type": "done",
+            "text": full_text,
+            "meta": {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": total_tokens,
+                "cost": round(total_cost, 6),
+                "raw_response": {},
+                "model_name": model,
+            },
+        }

prompture/drivers/async_openrouter_driver.py CHANGED Viewed

@@ -93,7 +93,7 @@ class AsyncOpenRouterDriver(CostMixin, AsyncDriver):
             "prompt_tokens": prompt_tokens,
             "completion_tokens": completion_tokens,
             "total_tokens": total_tokens,
-            "cost": total_cost,
+            "cost": round(total_cost, 6),
             "raw_response": resp,
             "model_name": model,
         }

prompture/drivers/google_driver.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import logging
 import os
+import uuid
+from collections.abc import Iterator
 from typing import Any, Optional
 import google.generativeai as genai
@@ -16,6 +18,8 @@ class GoogleDriver(CostMixin, Driver):
     supports_json_mode = True
     supports_json_schema = True
     supports_vision = True
+    supports_tool_use = True
+    supports_streaming = True
     # Based on current Gemini pricing (as of 2025)
     # Source: https://cloud.google.com/vertex-ai/pricing#gemini_models
@@ -106,6 +110,40 @@ class GoogleDriver(CostMixin, Driver):
             completion_cost = (completion_chars / 1_000_000) * model_pricing["completion"]
         return round(prompt_cost + completion_cost, 6)
+    def _extract_usage_metadata(self, response: Any, messages: list[dict[str, Any]]) -> dict[str, Any]:
+        """Extract token counts from response, falling back to character estimation."""
+        usage = getattr(response, "usage_metadata", None)
+        if usage:
+            prompt_tokens = getattr(usage, "prompt_token_count", 0) or 0
+            completion_tokens = getattr(usage, "candidates_token_count", 0) or 0
+            total_tokens = getattr(usage, "total_token_count", 0) or (prompt_tokens + completion_tokens)
+            cost = self._calculate_cost("google", self.model, prompt_tokens, completion_tokens)
+        else:
+            # Fallback: estimate from character counts
+            total_prompt_chars = 0
+            for msg in messages:
+                c = msg.get("content", "")
+                if isinstance(c, str):
+                    total_prompt_chars += len(c)
+                elif isinstance(c, list):
+                    for part in c:
+                        if isinstance(part, str):
+                            total_prompt_chars += len(part)
+                        elif isinstance(part, dict) and "text" in part:
+                            total_prompt_chars += len(part["text"])
+            completion_chars = len(response.text) if response.text else 0
+            prompt_tokens = total_prompt_chars // 4
+            completion_tokens = completion_chars // 4
+            total_tokens = prompt_tokens + completion_tokens
+            cost = self._calculate_cost_chars(total_prompt_chars, completion_chars)
+        return {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": round(cost, 6),
+        }
     supports_messages = True
     def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
@@ -113,23 +151,21 @@ class GoogleDriver(CostMixin, Driver):
         return _prepare_google_vision_messages(messages)
-    def generate(self, prompt: str, options: Optional[dict[str, Any]] = None) -> dict[str, Any]:
-        messages = [{"role": "user", "content": prompt}]
-        return self._do_generate(messages, options)
-    def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
-        return self._do_generate(self._prepare_messages(messages), options)
+    def _build_generation_args(
+        self, messages: list[dict[str, Any]], options: Optional[dict[str, Any]] = None
+    ) -> tuple[Any, dict[str, Any]]:
+        """Parse messages and options into (gen_input, kwargs) for generate_content.
-    def _do_generate(self, messages: list[dict[str, str]], options: Optional[dict[str, Any]] = None) -> dict[str, Any]:
+        Returns the content input and a dict of keyword arguments
+        (generation_config, safety_settings, model kwargs including system_instruction).
+        """
         merged_options = self.options.copy()
         if options:
             merged_options.update(options)
-        # Extract specific options for Google's API
         generation_config = merged_options.get("generation_config", {})
         safety_settings = merged_options.get("safety_settings", {})
-        # Map common options to generation_config if not present
         if "temperature" in merged_options and "temperature" not in generation_config:
             generation_config["temperature"] = merged_options["temperature"]
         if "max_tokens" in merged_options and "max_output_tokens" not in generation_config:
@@ -155,56 +191,57 @@ class GoogleDriver(CostMixin, Driver):
             if role == "system":
                 system_instruction = content if isinstance(content, str) else str(content)
             else:
-                # Gemini uses "model" for assistant role
                 gemini_role = "model" if role == "assistant" else "user"
                 if msg.get("_vision_parts"):
-                    # Already converted to Gemini parts by _prepare_messages
                     contents.append({"role": gemini_role, "parts": content})
                 else:
                     contents.append({"role": gemini_role, "parts": [content]})
+        # For a single message, unwrap only if it has exactly one string part
+        if len(contents) == 1:
+            parts = contents[0]["parts"]
+            if len(parts) == 1 and isinstance(parts[0], str):
+                gen_input = parts[0]
+            else:
+                gen_input = contents
+        else:
+            gen_input = contents
+        model_kwargs: dict[str, Any] = {}
+        if system_instruction:
+            model_kwargs["system_instruction"] = system_instruction
+        gen_kwargs: dict[str, Any] = {
+            "generation_config": generation_config if generation_config else None,
+            "safety_settings": safety_settings if safety_settings else None,
+        }
+        return gen_input, gen_kwargs, model_kwargs
+    def generate(self, prompt: str, options: Optional[dict[str, Any]] = None) -> dict[str, Any]:
+        messages = [{"role": "user", "content": prompt}]
+        return self._do_generate(messages, options)
+    def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
+        return self._do_generate(self._prepare_messages(messages), options)
+    def _do_generate(self, messages: list[dict[str, str]], options: Optional[dict[str, Any]] = None) -> dict[str, Any]:
+        gen_input, gen_kwargs, model_kwargs = self._build_generation_args(messages, options)
         try:
             logger.debug(f"Initializing {self.model} for generation")
-            model_kwargs: dict[str, Any] = {}
-            if system_instruction:
-                model_kwargs["system_instruction"] = system_instruction
             model = genai.GenerativeModel(self.model, **model_kwargs)
-            # Generate response
-            logger.debug(f"Generating with {len(contents)} content parts")
-            # If single user message, pass content directly for backward compatibility
-            gen_input: Any = contents if len(contents) != 1 else contents[0]["parts"][0]
-            response = model.generate_content(
-                gen_input,
-                generation_config=generation_config if generation_config else None,
-                safety_settings=safety_settings if safety_settings else None,
-            )
+            logger.debug(f"Generating with model {self.model}")
+            response = model.generate_content(gen_input, **gen_kwargs)
             if not response.text:
                 raise ValueError("Empty response from model")
-            # Calculate token usage and cost
-            total_prompt_chars = 0
-            for msg in messages:
-                c = msg.get("content", "")
-                if isinstance(c, str):
-                    total_prompt_chars += len(c)
-                elif isinstance(c, list):
-                    for part in c:
-                        if isinstance(part, str):
-                            total_prompt_chars += len(part)
-                        elif isinstance(part, dict) and "text" in part:
-                            total_prompt_chars += len(part["text"])
-            completion_chars = len(response.text)
-            # Google uses character-based cost estimation
-            total_cost = self._calculate_cost_chars(total_prompt_chars, completion_chars)
+            usage_meta = self._extract_usage_metadata(response, messages)
             meta = {
-                "prompt_chars": total_prompt_chars,
-                "completion_chars": completion_chars,
-                "total_chars": total_prompt_chars + completion_chars,
-                "cost": total_cost,
+                **usage_meta,
                 "raw_response": response.prompt_feedback if hasattr(response, "prompt_feedback") else None,
                 "model_name": self.model,
             }
@@ -214,3 +251,130 @@ class GoogleDriver(CostMixin, Driver):
         except Exception as e:
             logger.error(f"Google API request failed: {e}")
             raise RuntimeError(f"Google API request failed: {e}") from e
+    # ------------------------------------------------------------------
+    # Tool use
+    # ------------------------------------------------------------------
+    def generate_messages_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Generate a response that may include tool/function calls."""
+        gen_input, gen_kwargs, model_kwargs = self._build_generation_args(
+            self._prepare_messages(messages), options
+        )
+        # Convert tools from OpenAI format to Gemini function declarations
+        function_declarations = []
+        for t in tools:
+            if "type" in t and t["type"] == "function":
+                fn = t["function"]
+                decl = {
+                    "name": fn["name"],
+                    "description": fn.get("description", ""),
+                }
+                params = fn.get("parameters")
+                if params:
+                    decl["parameters"] = params
+                function_declarations.append(decl)
+            elif "name" in t:
+                # Already in a generic format
+                decl = {"name": t["name"], "description": t.get("description", "")}
+                params = t.get("parameters") or t.get("input_schema")
+                if params:
+                    decl["parameters"] = params
+                function_declarations.append(decl)
+        try:
+            model = genai.GenerativeModel(self.model, **model_kwargs)
+            gemini_tools = [genai.types.Tool(function_declarations=function_declarations)]
+            response = model.generate_content(gen_input, tools=gemini_tools, **gen_kwargs)
+            usage_meta = self._extract_usage_metadata(response, messages)
+            meta = {
+                **usage_meta,
+                "raw_response": response.prompt_feedback if hasattr(response, "prompt_feedback") else None,
+                "model_name": self.model,
+            }
+            text = ""
+            tool_calls_out: list[dict[str, Any]] = []
+            stop_reason = "stop"
+            for candidate in response.candidates:
+                for part in candidate.content.parts:
+                    if hasattr(part, "text") and part.text:
+                        text += part.text
+                    if hasattr(part, "function_call") and part.function_call.name:
+                        fc = part.function_call
+                        tool_calls_out.append({
+                            "id": str(uuid.uuid4()),
+                            "name": fc.name,
+                            "arguments": dict(fc.args) if fc.args else {},
+                        })
+                finish_reason = getattr(candidate, "finish_reason", None)
+                if finish_reason is not None:
+                    # Map Gemini finish reasons to standard stop reasons
+                    reason_map = {1: "stop", 2: "max_tokens", 3: "safety", 4: "recitation", 5: "other"}
+                    stop_reason = reason_map.get(finish_reason, "stop")
+            if tool_calls_out:
+                stop_reason = "tool_use"
+            return {
+                "text": text,
+                "meta": meta,
+                "tool_calls": tool_calls_out,
+                "stop_reason": stop_reason,
+            }
+        except Exception as e:
+            logger.error(f"Google API tool call request failed: {e}")
+            raise RuntimeError(f"Google API tool call request failed: {e}") from e
+    # ------------------------------------------------------------------
+    # Streaming
+    # ------------------------------------------------------------------
+    def generate_messages_stream(
+        self,
+        messages: list[dict[str, Any]],
+        options: dict[str, Any],
+    ) -> Iterator[dict[str, Any]]:
+        """Yield response chunks via Gemini streaming API."""
+        gen_input, gen_kwargs, model_kwargs = self._build_generation_args(
+            self._prepare_messages(messages), options
+        )
+        try:
+            model = genai.GenerativeModel(self.model, **model_kwargs)
+            response = model.generate_content(gen_input, stream=True, **gen_kwargs)
+            full_text = ""
+            for chunk in response:
+                chunk_text = getattr(chunk, "text", None) or ""
+                if chunk_text:
+                    full_text += chunk_text
+                    yield {"type": "delta", "text": chunk_text}
+            # After iteration completes, resolve() has been called on the response
+            usage_meta = self._extract_usage_metadata(response, messages)
+            yield {
+                "type": "done",
+                "text": full_text,
+                "meta": {
+                    **usage_meta,
+                    "raw_response": {},
+                    "model_name": self.model,
+                },
+            }
+        except Exception as e:
+            logger.error(f"Google API streaming request failed: {e}")
+            raise RuntimeError(f"Google API streaming request failed: {e}") from e

{prompture-0.0.38.dev1.dist-info → prompture-0.0.38.dev3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: prompture
-Version: 0.0.38.dev1
+Version: 0.0.38.dev3
 Summary: Ask LLMs to return structured JSON and run cross-model tests. API-first.
 Author-email: Juan Denis <juan@vene.co>
 License-Expression: MIT

{prompture-0.0.38.dev1.dist-info → prompture-0.0.38.dev3.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 prompture/__init__.py,sha256=RrpHZlLPpzntUOp2tL2II2DdVxQRoCxY6JBF_b4k3s0,7213
-prompture/_version.py,sha256=RC6_NeerdSjHaWAxl4iygvpfefFayk2zxKyKmOU7s08,719
+prompture/_version.py,sha256=e1uep7-PEqCFbKHaF3uTPcu4UaXdHJjkYrnGcuFmFZM,719
 prompture/agent.py,sha256=xe_yFHGDzTxaU4tmaLt5AQnzrN0I72hBGwGVrCxg2D0,34704
 prompture/agent_types.py,sha256=Icl16PQI-ThGLMFCU43adtQA6cqETbsPn4KssKBI4xc,4664
 prompture/async_agent.py,sha256=nOLOQCNkg0sKKTpryIiidmIcAAlA3FR2NfnZwrNBuCg,33066
@@ -35,21 +35,21 @@ prompture/aio/__init__.py,sha256=bKqTu4Jxld16aP_7SP9wU5au45UBIb041ORo4E4HzVo,181
 prompture/drivers/__init__.py,sha256=VuEBZPqaQzXLl_Lvn_c5mRlJJrrlObZCLeHaR8n2eJ4,7050
 prompture/drivers/airllm_driver.py,sha256=SaTh7e7Plvuct_TfRqQvsJsKHvvM_3iVqhBtlciM-Kw,3858
 prompture/drivers/async_airllm_driver.py,sha256=1hIWLXfyyIg9tXaOE22tLJvFyNwHnOi1M5BIKnV8ysk,908
-prompture/drivers/async_azure_driver.py,sha256=Rqq_5Utgr-lvxMHwlU0B5lwCTtqDhuUW212G9k8P0fQ,4463
-prompture/drivers/async_claude_driver.py,sha256=yB5QLbXD7Uqs4j45yulj73QSJJx1-IyIo84YGA1xjkw,4092
-prompture/drivers/async_google_driver.py,sha256=UL3WtQ2gdVYXPpq_HqzNkOifYiR7GLADr7DOOel1SjI,6634
-prompture/drivers/async_grok_driver.py,sha256=bblcUY5c5NJ_IeuFQ-jHRapGi_WywVgH6SSWWWbUMzo,3546
-prompture/drivers/async_groq_driver.py,sha256=gHvVe4M5VaRcyvonK9FQMLmCuL7i7HV9hwWcRgASUSg,3075
+prompture/drivers/async_azure_driver.py,sha256=lGZICROspP2_o2XlwIZZvrCDenSJZPNYTu7clCgRD68,4473
+prompture/drivers/async_claude_driver.py,sha256=dbUHH2EEotxUWz8cTXVCWtf4ExtiLv3FzzNenvHSVVI,10275
+prompture/drivers/async_google_driver.py,sha256=MIemYcE0ppSWfvVaxv4V-Tqjmy6BKO7sRG6UfZqtdV8,13349
+prompture/drivers/async_grok_driver.py,sha256=fvqEK-mrAx4U4_0C1RePGdZ-TUmQI9Qvj-x1f_uGI5c,3556
+prompture/drivers/async_groq_driver.py,sha256=PEAAj7QHjVqT9UtLfnFY4i__Mk-QpngmHGvbaBNEUrE,3085
 prompture/drivers/async_hugging_driver.py,sha256=IblxqU6TpNUiigZ0BCgNkAgzpUr2FtPHJOZnOZMnHF0,2152
 prompture/drivers/async_lmstudio_driver.py,sha256=rPn2qVPm6UE2APzAn7ZHYTELUwr0dQMi8XHv6gAhyH8,5782
 prompture/drivers/async_local_http_driver.py,sha256=qoigIf-w3_c2dbVdM6m1e2RMAWP4Gk4VzVs5hM3lPvQ,1609
 prompture/drivers/async_ollama_driver.py,sha256=FaSXtFXrgeVHIe0b90Vg6rGeSTWLpPnjaThh9Ai7qQo,5042
-prompture/drivers/async_openai_driver.py,sha256=eLdVYQ8BUErQzVr4Ek1BZ75riMbHMz3ZPm6VQSTNFxk,3572
-prompture/drivers/async_openrouter_driver.py,sha256=VcSYOeBhbzRbzorYh_7K58yWCXB4UO0d6MmpBLf-7lQ,3783
+prompture/drivers/async_openai_driver.py,sha256=6p538rPlfAWhsTZ5HKAg8KEW1xM4WEFzXVPZsigz_P4,8704
+prompture/drivers/async_openrouter_driver.py,sha256=qvvwJADjnEj6J9f8m0eGlfWTBEm6oXTjwrgt_Im4K7w,3793
 prompture/drivers/async_registry.py,sha256=syervbb7THneJ-NUVSuxy4cnxGW6VuNzKv-Aqqn2ysU,4329
 prompture/drivers/azure_driver.py,sha256=QZr7HEvgSKT9LOTCtCjuBdHl57yvrnWmeTHtmewuJQY,5727
 prompture/drivers/claude_driver.py,sha256=8XnCBHtk6N_PzHStwxIUlcvekdPN896BqOLShmgxU9k,11536
-prompture/drivers/google_driver.py,sha256=2V2mfWO8TuJTtvOKBW11WM1dicNfYFhBJrt7SsgiBbE,9432
+prompture/drivers/google_driver.py,sha256=8bnAcve1xtgpUXrCdVzWpU_yAqwaeuiBWk8-PbG1cmM,15956
 prompture/drivers/grok_driver.py,sha256=AIwuzNAQyOhmVDA07ISWt2e-rsv5aYk3I5AM4HkLM7o,5294
 prompture/drivers/groq_driver.py,sha256=9cZI21RsgYJTjnrtX2fVA0AadDL-VklhY4ugjDCutwM,4195
 prompture/drivers/hugging_driver.py,sha256=gZir3XnM77VfYIdnu3S1pRftlZJM6G3L8bgGn5esg-Q,2346
@@ -69,9 +69,9 @@ prompture/scaffold/templates/env.example.j2,sha256=eESKr1KWgyrczO6d-nwAhQwSpf_G-
 prompture/scaffold/templates/main.py.j2,sha256=TEgc5OvsZOEX0JthkSW1NI_yLwgoeVN_x97Ibg-vyWY,2632
 prompture/scaffold/templates/models.py.j2,sha256=JrZ99GCVK6TKWapskVRSwCssGrTu5cGZ_r46fOhY2GE,858
 prompture/scaffold/templates/requirements.txt.j2,sha256=m3S5fi1hq9KG9l_9j317rjwWww0a43WMKd8VnUWv2A4,102
-prompture-0.0.38.dev1.dist-info/licenses/LICENSE,sha256=0HgDepH7aaHNFhHF-iXuW6_GqDfYPnVkjtiCAZ4yS8I,1060
-prompture-0.0.38.dev1.dist-info/METADATA,sha256=ZDa9mNU6SdEy4IKb7l-wVvR2Tp_bO3RZ8sHshWtq6Y8,10842
-prompture-0.0.38.dev1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-prompture-0.0.38.dev1.dist-info/entry_points.txt,sha256=AFPG3lJR86g4IJMoWQUW5Ph7G6MLNWG3A2u2Tp9zkp8,48
-prompture-0.0.38.dev1.dist-info/top_level.txt,sha256=to86zq_kjfdoLeAxQNr420UWqT0WzkKoZ509J7Qr2t4,10
-prompture-0.0.38.dev1.dist-info/RECORD,,
+prompture-0.0.38.dev3.dist-info/licenses/LICENSE,sha256=0HgDepH7aaHNFhHF-iXuW6_GqDfYPnVkjtiCAZ4yS8I,1060
+prompture-0.0.38.dev3.dist-info/METADATA,sha256=ejIH91dOyVKrmJ4nKEbsutiI5Gb2xMRiqKuhzgz04Kw,10842
+prompture-0.0.38.dev3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+prompture-0.0.38.dev3.dist-info/entry_points.txt,sha256=AFPG3lJR86g4IJMoWQUW5Ph7G6MLNWG3A2u2Tp9zkp8,48
+prompture-0.0.38.dev3.dist-info/top_level.txt,sha256=to86zq_kjfdoLeAxQNr420UWqT0WzkKoZ509J7Qr2t4,10
+prompture-0.0.38.dev3.dist-info/RECORD,,

{prompture-0.0.38.dev1.dist-info → prompture-0.0.38.dev3.dist-info}/WHEEL RENAMED Viewed

File without changes

{prompture-0.0.38.dev1.dist-info → prompture-0.0.38.dev3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{prompture-0.0.38.dev1.dist-info → prompture-0.0.38.dev3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{prompture-0.0.38.dev1.dist-info → prompture-0.0.38.dev3.dist-info}/top_level.txt RENAMED Viewed

File without changes

prompture 0.0.38.dev1__py3-none-any.whl → 0.0.38.dev3__py3-none-any.whl

prompture 0.0.38.dev1py3-none-any.whl → 0.0.38.dev3py3-none-any.whl