PyPI - prompture - Versions diffs - 0.0.47.dev2__py3-none-any.whl → 0.0.48__py3-none-any.whl - Mend

prompture 0.0.47.dev2py3-none-any.whl → 0.0.48py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

prompture/_version.py +2 -2
prompture/async_conversation.py +16 -0
prompture/conversation.py +16 -0
prompture/drivers/async_claude_driver.py +32 -7
prompture/drivers/async_grok_driver.py +23 -9
prompture/drivers/async_groq_driver.py +23 -9
prompture/drivers/async_lmstudio_driver.py +10 -2
prompture/drivers/async_moonshot_driver.py +20 -9
prompture/drivers/async_ollama_driver.py +27 -3
prompture/drivers/async_openrouter_driver.py +43 -17
prompture/drivers/claude_driver.py +43 -7
prompture/drivers/grok_driver.py +23 -9
prompture/drivers/groq_driver.py +23 -9
prompture/drivers/lmstudio_driver.py +11 -2
prompture/drivers/moonshot_driver.py +27 -16
prompture/drivers/ollama_driver.py +42 -9
prompture/drivers/openrouter_driver.py +34 -10
{prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/METADATA +1 -1
{prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/RECORD +23 -23
{prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/WHEEL +0 -0
{prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/entry_points.txt +0 -0
{prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/licenses/LICENSE +0 -0
{prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/top_level.txt +0 -0

prompture/drivers/claude_driver.py CHANGED Viewed

@@ -131,6 +131,13 @@ class ClaudeDriver(CostMixin, Driver):
             resp = client.messages.create(**common_kwargs)
             text = resp.content[0].text
+        # Extract reasoning/thinking content from content blocks
+        reasoning_content = self._extract_thinking(resp.content)
+        # Fallback: use reasoning as text if content is empty
+        if not text and reasoning_content:
+            text = reasoning_content
         # Extract token usage from Claude response
         prompt_tokens = resp.usage.input_tokens
         completion_tokens = resp.usage.output_tokens
@@ -149,12 +156,26 @@ class ClaudeDriver(CostMixin, Driver):
             "model_name": model,
         }
-        return {"text": text, "meta": meta}
+        result: dict[str, Any] = {"text": text, "meta": meta}
+        if reasoning_content is not None:
+            result["reasoning_content"] = reasoning_content
+        return result
     # ------------------------------------------------------------------
     # Helpers
     # ------------------------------------------------------------------
+    @staticmethod
+    def _extract_thinking(content_blocks: list[Any]) -> str | None:
+        """Extract thinking/reasoning text from Claude content blocks."""
+        parts: list[str] = []
+        for block in content_blocks:
+            if getattr(block, "type", None) == "thinking":
+                thinking_text = getattr(block, "thinking", "")
+                if thinking_text:
+                    parts.append(thinking_text)
+        return "\n".join(parts) if parts else None
     def _extract_system_and_messages(
         self, messages: list[dict[str, Any]]
     ) -> tuple[str | None, list[dict[str, Any]]]:
@@ -246,12 +267,17 @@ class ClaudeDriver(CostMixin, Driver):
                     "arguments": block.input,
                 })
-        return {
+        reasoning_content = self._extract_thinking(resp.content)
+        result: dict[str, Any] = {
             "text": text,
             "meta": meta,
             "tool_calls": tool_calls_out,
             "stop_reason": resp.stop_reason,
         }
+        if reasoning_content is not None:
+            result["reasoning_content"] = reasoning_content
+        return result
     # ------------------------------------------------------------------
     # Streaming
@@ -282,6 +308,7 @@ class ClaudeDriver(CostMixin, Driver):
             kwargs["system"] = system_content
         full_text = ""
+        full_reasoning = ""
         prompt_tokens = 0
         completion_tokens = 0
@@ -289,10 +316,16 @@ class ClaudeDriver(CostMixin, Driver):
             for event in stream:
                 if hasattr(event, "type"):
                     if event.type == "content_block_delta" and hasattr(event, "delta"):
-                        delta_text = getattr(event.delta, "text", "")
-                        if delta_text:
-                            full_text += delta_text
-                            yield {"type": "delta", "text": delta_text}
+                        delta_type = getattr(event.delta, "type", "")
+                        if delta_type == "thinking_delta":
+                            thinking_text = getattr(event.delta, "thinking", "")
+                            if thinking_text:
+                                full_reasoning += thinking_text
+                        else:
+                            delta_text = getattr(event.delta, "text", "")
+                            if delta_text:
+                                full_text += delta_text
+                                yield {"type": "delta", "text": delta_text}
                     elif event.type == "message_delta" and hasattr(event, "usage"):
                         completion_tokens = getattr(event.usage, "output_tokens", 0)
                     elif event.type == "message_start" and hasattr(event, "message"):
@@ -303,7 +336,7 @@ class ClaudeDriver(CostMixin, Driver):
         total_tokens = prompt_tokens + completion_tokens
         total_cost = self._calculate_cost("claude", model, prompt_tokens, completion_tokens)
-        yield {
+        done_chunk: dict[str, Any] = {
             "type": "done",
             "text": full_text,
             "meta": {
@@ -315,3 +348,6 @@ class ClaudeDriver(CostMixin, Driver):
                 "model_name": model,
             },
         }
+        if full_reasoning:
+            done_chunk["reasoning_content"] = full_reasoning
+        yield done_chunk

prompture/drivers/grok_driver.py CHANGED Viewed

@@ -154,8 +154,17 @@ class GrokDriver(CostMixin, Driver):
             "model_name": model,
         }
-        text = resp["choices"][0]["message"]["content"]
-        return {"text": text, "meta": meta}
+        message = resp["choices"][0]["message"]
+        text = message.get("content") or ""
+        reasoning_content = message.get("reasoning_content")
+        if not text and reasoning_content:
+            text = reasoning_content
+        result: dict[str, Any] = {"text": text, "meta": meta}
+        if reasoning_content is not None:
+            result["reasoning_content"] = reasoning_content
+        return result
     # ------------------------------------------------------------------
     # Tool use
@@ -227,15 +236,20 @@ class GrokDriver(CostMixin, Driver):
                 args = json.loads(tc["function"]["arguments"])
             except (json.JSONDecodeError, TypeError):
                 args = {}
-            tool_calls_out.append({
-                "id": tc["id"],
-                "name": tc["function"]["name"],
-                "arguments": args,
-            })
-        return {
+            tool_calls_out.append(
+                {
+                    "id": tc["id"],
+                    "name": tc["function"]["name"],
+                    "arguments": args,
+                }
+            )
+        result: dict[str, Any] = {
             "text": text,
             "meta": meta,
             "tool_calls": tool_calls_out,
             "stop_reason": stop_reason,
         }
+        if choice["message"].get("reasoning_content") is not None:
+            result["reasoning_content"] = choice["message"]["reasoning_content"]
+        return result

prompture/drivers/groq_driver.py CHANGED Viewed

@@ -122,8 +122,16 @@ class GroqDriver(CostMixin, Driver):
         }
         # Extract generated text
-        text = resp.choices[0].message.content
-        return {"text": text, "meta": meta}
+        text = resp.choices[0].message.content or ""
+        reasoning_content = getattr(resp.choices[0].message, "reasoning_content", None)
+        if not text and reasoning_content:
+            text = reasoning_content
+        result: dict[str, Any] = {"text": text, "meta": meta}
+        if reasoning_content is not None:
+            result["reasoning_content"] = reasoning_content
+        return result
     # ------------------------------------------------------------------
     # Tool use
@@ -186,15 +194,21 @@ class GroqDriver(CostMixin, Driver):
                     args = json.loads(tc.function.arguments)
                 except (json.JSONDecodeError, TypeError):
                     args = {}
-                tool_calls_out.append({
-                    "id": tc.id,
-                    "name": tc.function.name,
-                    "arguments": args,
-                })
-        return {
+                tool_calls_out.append(
+                    {
+                        "id": tc.id,
+                        "name": tc.function.name,
+                        "arguments": args,
+                    }
+                )
+        result: dict[str, Any] = {
             "text": text,
             "meta": meta,
             "tool_calls": tool_calls_out,
             "stop_reason": stop_reason,
         }
+        reasoning_content = getattr(choice.message, "reasoning_content", None)
+        if reasoning_content is not None:
+            result["reasoning_content"] = reasoning_content
+        return result

prompture/drivers/lmstudio_driver.py CHANGED Viewed

@@ -123,7 +123,13 @@ class LMStudioDriver(Driver):
             raise RuntimeError(f"LM Studio request failed: {e}") from e
         # Extract text
-        text = response_data["choices"][0]["message"]["content"]
+        message = response_data["choices"][0]["message"]
+        text = message.get("content") or ""
+        reasoning_content = message.get("reasoning_content")
+        # Reasoning models (e.g. DeepSeek R1) may return content in reasoning_content
+        if not text and reasoning_content:
+            text = reasoning_content
         # Meta info
         usage = response_data.get("usage", {})
@@ -140,7 +146,10 @@ class LMStudioDriver(Driver):
             "model_name": merged_options.get("model", self.model),
         }
-        return {"text": text, "meta": meta}
+        result: dict[str, Any] = {"text": text, "meta": meta}
+        if reasoning_content is not None:
+            result["reasoning_content"] = reasoning_content
+        return result
     # -- Model management (LM Studio 0.4.0+) ----------------------------------

prompture/drivers/moonshot_driver.py CHANGED Viewed

@@ -167,7 +167,7 @@ class MoonshotDriver(CostMixin, Driver):
             using_json_schema=bool(options.get("json_schema")),
         )
-        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        opts = {"temperature": 1.0, "max_tokens": 512, "timeout": 300, **options}
         opts = self._clamp_temperature(opts)
         data: dict[str, Any] = {
@@ -210,7 +210,7 @@ class MoonshotDriver(CostMixin, Driver):
                 f"{self.base_url}/chat/completions",
                 headers=self.headers,
                 json=data,
-                timeout=120,
+                timeout=opts.get("timeout", 300),
             )
             response.raise_for_status()
             resp = response.json()
@@ -228,10 +228,11 @@ class MoonshotDriver(CostMixin, Driver):
         message = resp["choices"][0]["message"]
         text = message.get("content") or ""
+        reasoning_content = message.get("reasoning_content")
         # Reasoning models may return content in reasoning_content when content is empty
-        if not text and message.get("reasoning_content"):
-            text = message["reasoning_content"]
+        if not text and reasoning_content:
+            text = reasoning_content
         # Structured output fallback: if we used json_schema mode and got an
         # empty response, retry with json_object mode and schema in the prompt.
@@ -260,7 +261,7 @@ class MoonshotDriver(CostMixin, Driver):
                     f"{self.base_url}/chat/completions",
                     headers=self.headers,
                     json=fallback_data,
-                    timeout=120,
+                    timeout=opts.get("timeout", 300),
                 )
                 fb_response.raise_for_status()
                 fb_resp = fb_response.json()
@@ -275,8 +276,9 @@ class MoonshotDriver(CostMixin, Driver):
                 resp = fb_resp
                 fb_message = fb_resp["choices"][0]["message"]
                 text = fb_message.get("content") or ""
-                if not text and fb_message.get("reasoning_content"):
-                    text = fb_message["reasoning_content"]
+                reasoning_content = fb_message.get("reasoning_content")
+                if not text and reasoning_content:
+                    text = reasoning_content
         total_cost = self._calculate_cost("moonshot", model, prompt_tokens, completion_tokens)
@@ -289,7 +291,10 @@ class MoonshotDriver(CostMixin, Driver):
             "model_name": model,
         }
-        return {"text": text, "meta": meta}
+        result: dict[str, Any] = {"text": text, "meta": meta}
+        if reasoning_content is not None:
+            result["reasoning_content"] = reasoning_content
+        return result
     # ------------------------------------------------------------------
     # Tool use
@@ -312,7 +317,7 @@ class MoonshotDriver(CostMixin, Driver):
         self._validate_model_capabilities("moonshot", model, using_tool_use=True)
-        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        opts = {"temperature": 1.0, "max_tokens": 512, "timeout": 300, **options}
         opts = self._clamp_temperature(opts)
         sanitized_tools = self._sanitize_tools(tools)
@@ -337,7 +342,7 @@ class MoonshotDriver(CostMixin, Driver):
                 f"{self.base_url}/chat/completions",
                 headers=self.headers,
                 json=data,
-                timeout=120,
+                timeout=opts.get("timeout", 300),
             )
             response.raise_for_status()
             resp = response.json()
@@ -415,7 +420,7 @@ class MoonshotDriver(CostMixin, Driver):
         tokens_param = model_config["tokens_param"]
         supports_temperature = model_config["supports_temperature"]
-        opts = {"temperature": 1.0, "max_tokens": 512, **options}
+        opts = {"temperature": 1.0, "max_tokens": 512, "timeout": 300, **options}
         opts = self._clamp_temperature(opts)
         data: dict[str, Any] = {
@@ -434,11 +439,12 @@ class MoonshotDriver(CostMixin, Driver):
             headers=self.headers,
             json=data,
             stream=True,
-            timeout=120,
+            timeout=opts.get("timeout", 300),
         )
         response.raise_for_status()
         full_text = ""
+        full_reasoning = ""
         prompt_tokens = 0
         completion_tokens = 0
@@ -462,9 +468,11 @@ class MoonshotDriver(CostMixin, Driver):
             if choices:
                 delta = choices[0].get("delta", {})
                 content = delta.get("content") or ""
-                # Reasoning models stream thinking via reasoning_content
-                if not content:
-                    content = delta.get("reasoning_content") or ""
+                reasoning_chunk = delta.get("reasoning_content") or ""
+                if reasoning_chunk:
+                    full_reasoning += reasoning_chunk
+                if not content and reasoning_chunk:
+                    content = reasoning_chunk
                 if content:
                     full_text += content
                     yield {"type": "delta", "text": content}
@@ -472,7 +480,7 @@ class MoonshotDriver(CostMixin, Driver):
         total_tokens = prompt_tokens + completion_tokens
         total_cost = self._calculate_cost("moonshot", model, prompt_tokens, completion_tokens)
-        yield {
+        done_chunk: dict[str, Any] = {
             "type": "done",
             "text": full_text,
             "meta": {
@@ -484,3 +492,6 @@ class MoonshotDriver(CostMixin, Driver):
                 "model_name": model,
             },
         }
+        if full_reasoning:
+            done_chunk["reasoning_content"] = full_reasoning
+        yield done_chunk

prompture/drivers/ollama_driver.py CHANGED Viewed

@@ -84,7 +84,7 @@ class OllamaDriver(Driver):
             logger.debug(f"Sending request to Ollama endpoint: {self.endpoint}")
             logger.debug(f"Request payload: {payload}")
-            r = requests.post(self.endpoint, json=payload, timeout=120)
+            r = requests.post(self.endpoint, json=payload, timeout=merged_options.get("timeout", 300))
             logger.debug(f"Response status code: {r.status_code}")
             r.raise_for_status()
@@ -131,7 +131,17 @@ class OllamaDriver(Driver):
         }
         # Ollama returns text in "response"
-        return {"text": response_data.get("response", ""), "meta": meta}
+        text = response_data.get("response", "")
+        reasoning_content = response_data.get("thinking") or None
+        # Reasoning models may return content only in thinking
+        if not text and reasoning_content:
+            text = reasoning_content
+        result: dict[str, Any] = {"text": text, "meta": meta}
+        if reasoning_content is not None:
+            result["reasoning_content"] = reasoning_content
+        return result
     # ------------------------------------------------------------------
     # Tool use
@@ -166,7 +176,7 @@ class OllamaDriver(Driver):
         try:
             logger.debug(f"Sending tool use request to Ollama endpoint: {chat_endpoint}")
-            r = requests.post(chat_endpoint, json=payload, timeout=120)
+            r = requests.post(chat_endpoint, json=payload, timeout=merged_options.get("timeout", 300))
             r.raise_for_status()
             response_data = r.json()
@@ -196,8 +206,12 @@ class OllamaDriver(Driver):
         message = response_data.get("message", {})
         text = message.get("content") or ""
+        reasoning_content = message.get("thinking") or None
         stop_reason = response_data.get("done_reason", "stop")
+        if not text and reasoning_content:
+            text = reasoning_content
         tool_calls_out: list[dict[str, Any]] = []
         for tc in message.get("tool_calls", []):
             func = tc.get("function", {})
@@ -215,12 +229,15 @@ class OllamaDriver(Driver):
                 "arguments": args,
             })
-        return {
+        result: dict[str, Any] = {
             "text": text,
             "meta": meta,
             "tool_calls": tool_calls_out,
             "stop_reason": stop_reason,
         }
+        if reasoning_content is not None:
+            result["reasoning_content"] = reasoning_content
+        return result
     # ------------------------------------------------------------------
     # Streaming
@@ -255,10 +272,11 @@ class OllamaDriver(Driver):
             payload["top_k"] = merged_options["top_k"]
         full_text = ""
+        full_reasoning = ""
         prompt_tokens = 0
         completion_tokens = 0
-        r = requests.post(chat_endpoint, json=payload, timeout=120, stream=True)
+        r = requests.post(chat_endpoint, json=payload, timeout=merged_options.get("timeout", 300), stream=True)
         r.raise_for_status()
         for line in r.iter_lines():
@@ -269,13 +287,17 @@ class OllamaDriver(Driver):
                 prompt_tokens = chunk.get("prompt_eval_count", 0)
                 completion_tokens = chunk.get("eval_count", 0)
             else:
-                content = chunk.get("message", {}).get("content", "")
+                msg = chunk.get("message", {})
+                thinking = msg.get("thinking", "")
+                if thinking:
+                    full_reasoning += thinking
+                content = msg.get("content", "")
                 if content:
                     full_text += content
                     yield {"type": "delta", "text": content}
         total_tokens = prompt_tokens + completion_tokens
-        yield {
+        done_chunk: dict[str, Any] = {
             "type": "done",
             "text": full_text,
             "meta": {
@@ -287,6 +309,9 @@ class OllamaDriver(Driver):
                 "model_name": merged_options.get("model", self.model),
             },
         }
+        if full_reasoning:
+            done_chunk["reasoning_content"] = full_reasoning
+        yield done_chunk
     def generate_messages(self, messages: list[dict[str, Any]], options: dict[str, Any]) -> dict[str, Any]:
         """Use Ollama's /api/chat endpoint for multi-turn conversations."""
@@ -318,7 +343,7 @@ class OllamaDriver(Driver):
         try:
             logger.debug(f"Sending chat request to Ollama endpoint: {chat_endpoint}")
-            r = requests.post(chat_endpoint, json=payload, timeout=120)
+            r = requests.post(chat_endpoint, json=payload, timeout=merged_options.get("timeout", 300))
             r.raise_for_status()
             response_data = r.json()
@@ -349,4 +374,12 @@ class OllamaDriver(Driver):
         # Chat endpoint returns response in message.content
         message = response_data.get("message", {})
         text = message.get("content", "")
-        return {"text": text, "meta": meta}
+        reasoning_content = message.get("thinking") or None
+        if not text and reasoning_content:
+            text = reasoning_content
+        result: dict[str, Any] = {"text": text, "meta": meta}
+        if reasoning_content is not None:
+            result["reasoning_content"] = reasoning_content
+        return result

prompture/drivers/openrouter_driver.py CHANGED Viewed

@@ -181,8 +181,18 @@ class OpenRouterDriver(CostMixin, Driver):
             "model_name": model,
         }
-        text = resp["choices"][0]["message"]["content"]
-        return {"text": text, "meta": meta}
+        message = resp["choices"][0]["message"]
+        text = message.get("content") or ""
+        reasoning_content = message.get("reasoning_content")
+        # Reasoning models may return content in reasoning_content when content is empty
+        if not text and reasoning_content:
+            text = reasoning_content
+        result: dict[str, Any] = {"text": text, "meta": meta}
+        if reasoning_content is not None:
+            result["reasoning_content"] = reasoning_content
+        return result
     # ------------------------------------------------------------------
     # Tool use
@@ -257,18 +267,23 @@ class OpenRouterDriver(CostMixin, Driver):
                 args = json.loads(tc["function"]["arguments"])
             except (json.JSONDecodeError, TypeError):
                 args = {}
-            tool_calls_out.append({
-                "id": tc["id"],
-                "name": tc["function"]["name"],
-                "arguments": args,
-            })
+            tool_calls_out.append(
+                {
+                    "id": tc["id"],
+                    "name": tc["function"]["name"],
+                    "arguments": args,
+                }
+            )
-        return {
+        result: dict[str, Any] = {
             "text": text,
             "meta": meta,
             "tool_calls": tool_calls_out,
             "stop_reason": stop_reason,
         }
+        if choice["message"].get("reasoning_content") is not None:
+            result["reasoning_content"] = choice["message"]["reasoning_content"]
+        return result
     # ------------------------------------------------------------------
     # Streaming
@@ -311,13 +326,14 @@ class OpenRouterDriver(CostMixin, Driver):
         response.raise_for_status()
         full_text = ""
+        full_reasoning = ""
         prompt_tokens = 0
         completion_tokens = 0
         for line in response.iter_lines(decode_unicode=True):
             if not line or not line.startswith("data: "):
                 continue
-            payload = line[len("data: "):]
+            payload = line[len("data: ") :]
             if payload.strip() == "[DONE]":
                 break
             try:
@@ -335,6 +351,11 @@ class OpenRouterDriver(CostMixin, Driver):
             if choices:
                 delta = choices[0].get("delta", {})
                 content = delta.get("content", "")
+                reasoning_chunk = delta.get("reasoning_content") or ""
+                if reasoning_chunk:
+                    full_reasoning += reasoning_chunk
+                if not content and reasoning_chunk:
+                    content = reasoning_chunk
                 if content:
                     full_text += content
                     yield {"type": "delta", "text": content}
@@ -342,7 +363,7 @@ class OpenRouterDriver(CostMixin, Driver):
         total_tokens = prompt_tokens + completion_tokens
         total_cost = self._calculate_cost("openrouter", model, prompt_tokens, completion_tokens)
-        yield {
+        done_chunk: dict[str, Any] = {
             "type": "done",
             "text": full_text,
             "meta": {
@@ -354,3 +375,6 @@ class OpenRouterDriver(CostMixin, Driver):
                 "model_name": model,
             },
         }
+        if full_reasoning:
+            done_chunk["reasoning_content"] = full_reasoning
+        yield done_chunk

{prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: prompture
-Version: 0.0.47.dev2
+Version: 0.0.48
 Summary: Ask LLMs to return structured JSON and run cross-model tests. API-first.
 Author-email: Juan Denis <juan@vene.co>
 License-Expression: MIT

prompture 0.0.47.dev2__py3-none-any.whl → 0.0.48__py3-none-any.whl

prompture 0.0.47.dev2py3-none-any.whl → 0.0.48py3-none-any.whl