PyPI - inferencesh - Versions diffs - 0.2.16__tar.gz → 0.2.18__tar.gz - Mend

inferencesh 0.2.16tar.gz → 0.2.18tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of inferencesh might be problematic. Click here for more details.

Files changed (21) hide show

{inferencesh-0.2.16/src/inferencesh.egg-info → inferencesh-0.2.18}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inferencesh
-Version: 0.2.16
+Version: 0.2.18
 Summary: inference.sh Python SDK
 Author: Inference Shell Inc.
 Author-email: "Inference Shell Inc." <hello@inference.sh>

{inferencesh-0.2.16 → inferencesh-0.2.18}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "inferencesh"
-version = "0.2.16"
+version = "0.2.18"
 description = "inference.sh Python SDK"
 authors = [
     {name = "Inference Shell Inc.", email = "hello@inference.sh"},

{inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/models/llm.py RENAMED Viewed

@@ -89,6 +89,8 @@ class LLMInput(BaseAppInput):
     # Model specific flags
     reasoning: bool = Field(default=False)
+    tools: List[Dict[str, Any]] = Field(default=[])
 class LLMUsage(BaseAppOutput):
     stop_reason: str = ""
@@ -104,6 +106,7 @@ class LLMUsage(BaseAppOutput):
 class LLMOutput(BaseAppOutput):
     response: str
     reasoning: Optional[str] = None
+    tool_calls: Optional[List[Dict[str, Any]]] = None
     usage: Optional[LLMUsage] = None
@@ -228,6 +231,7 @@ class ResponseTransformer:
     def __init__(self, output_cls: type[LLMOutput] = LLMOutput):
         self.state = ResponseState()
         self.output_cls = output_cls
+        self.timing = None  # Will be set by stream_generate
     def clean_text(self, text: str) -> str:
         """Clean common tokens from the text and apply model-specific cleaning.
@@ -264,10 +268,17 @@ class ResponseTransformer:
             text: Cleaned text to process for reasoning
         """
         # Default implementation for <think> style reasoning
-        if "<think>" in text:
+        if "<think>" in text and not self.state.state_changes["reasoning_started"]:
             self.state.state_changes["reasoning_started"] = True
-        if "</think>" in text:
+            if self.timing:
+                self.timing.start_reasoning()
+        if "</think>" in text and not self.state.state_changes["reasoning_ended"]:
             self.state.state_changes["reasoning_ended"] = True
+            if self.timing:
+                # Estimate token count from character count (rough approximation)
+                token_count = len(self.state.buffer.split("<think>")[1].split("</think>")[0]) // 4
+                self.timing.end_reasoning(token_count)
         if "<think>" in self.state.buffer:
             parts = self.state.buffer.split("</think>", 1)
@@ -354,6 +365,8 @@ class ResponseTransformer:
 def stream_generate(
     model: Any,
     messages: List[Dict[str, Any]],
+    tools: List[Dict[str, Any]],
+    tool_choice: Dict[str, Any],
     transformer: ResponseTransformer,
     temperature: float = 0.7,
     top_p: float = 0.95,
@@ -371,7 +384,7 @@ def stream_generate(
         max_tokens: Maximum tokens to generate
         stop: Optional list of stop sequences
     """
-    response_queue: Queue[Optional[tuple[str, dict]]] = Queue()
+    response_queue: Queue[Optional[tuple[str, dict, Optional[List[Dict[str, Any]]]]]] = Queue()
     thread_exception = None
     usage_stats = {
         "prompt_tokens": 0,
@@ -381,11 +394,16 @@ def stream_generate(
     }
     with timing_context() as timing:
+        # Set timing context in transformer
+        transformer.timing = timing
         def generation_thread():
             nonlocal thread_exception, usage_stats
             try:
                 completion = model.create_chat_completion(
                     messages=messages,
+                    tools=tools,
+                    tool_choice=tool_choice,
                     stream=True,
                     temperature=temperature,
                     top_p=top_p,
@@ -400,18 +418,23 @@ def stream_generate(
                     delta = chunk.get("choices", [{}])[0]
                     content = None
                     finish_reason = None
+                    tool_calls = None
                     if "message" in delta:
-                        content = delta["message"].get("content", "")
+                        message = delta["message"]
+                        content = message.get("content", "")
+                        tool_calls = message.get("tool_calls")
                         finish_reason = delta.get("finish_reason")
                     elif "delta" in delta:
-                        content = delta["delta"].get("content", "")
+                        delta_content = delta["delta"]
+                        content = delta_content.get("content", "")
+                        tool_calls = delta_content.get("tool_calls")
                         finish_reason = delta.get("finish_reason")
-                    if content:
+                    if content or tool_calls:
                         if not timing.first_token_time:
                             timing.mark_first_token()
-                        response_queue.put((content, {}))
+                        response_queue.put((content or "", {}, tool_calls))
                     if finish_reason:
                         usage_stats["stop_reason"] = finish_reason
@@ -427,7 +450,7 @@ def stream_generate(
                     "tokens_per_second": tokens_per_second,
                     "reasoning_time": timing_stats["reasoning_time"],
                     "reasoning_tokens": timing_stats["reasoning_tokens"]
-                }))
+                }, None))
         thread = Thread(target=generation_thread, daemon=True)
         thread.start()
@@ -440,7 +463,7 @@ def stream_generate(
                     if thread_exception:
                         raise thread_exception
-                    piece, timing_stats = result
+                    piece, timing_stats, tool_calls = result
                     if piece is None:
                         # Final yield with complete usage stats
                         usage = LLMUsage(
@@ -456,10 +479,14 @@ def stream_generate(
                         buffer, output, _ = transformer(piece or "", buffer)
                         output.usage = usage
+                        if tool_calls:
+                            output.tool_calls = tool_calls
                         yield output
                         break
                     buffer, output, _ = transformer(piece, buffer)
+                    if tool_calls:
+                        output.tool_calls = tool_calls
                     yield output
                 except Exception as e:

{inferencesh-0.2.16 → inferencesh-0.2.18/src/inferencesh.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inferencesh
-Version: 0.2.16
+Version: 0.2.18
 Summary: inference.sh Python SDK
 Author: Inference Shell Inc.
 Author-email: "Inference Shell Inc." <hello@inference.sh>