PyPI - inferencesh - Versions diffs - 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

inferencesh 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of inferencesh might be problematic. Click here for more details.

Files changed (7) hide show

inferencesh/models/llm.py CHANGED Viewed

@@ -216,7 +216,8 @@ class ResponseState:
         self.response = ""
         self.reasoning = None
         self.function_calls = None  # For future function calling support
-        self.tool_calls = None      # For future tool calling support
+        self.tool_calls = []      # List to accumulate tool calls
+        self.current_tool_call = None  # Track current tool call being built
         self.state_changes = {
             "reasoning_started": False,
             "reasoning_ended": False,
@@ -373,17 +374,7 @@ def stream_generate(
     max_tokens: int = 4096,
     stop: Optional[List[str]] = None,
 ) -> Generator[LLMOutput, None, None]:
-    """Stream generate from LLaMA.cpp model with timing and usage tracking.
-    Args:
-        model: The LLaMA.cpp model instance
-        messages: List of messages to send to the model
-        transformer: ResponseTransformer instance to use for processing output
-        temperature: Sampling temperature
-        top_p: Top-p sampling threshold
-        max_tokens: Maximum tokens to generate
-        stop: Optional list of stop sequences
-    """
+    """Stream generate from LLaMA.cpp model with timing and usage tracking."""
     response_queue: Queue[Optional[tuple[str, dict, Optional[List[Dict[str, Any]]]]]] = Queue()
     thread_exception = None
     usage_stats = {
@@ -394,7 +385,6 @@ def stream_generate(
     }
     with timing_context() as timing:
-        # Set timing context in transformer
         transformer.timing = timing
         def generation_thread():
@@ -411,30 +401,66 @@ def stream_generate(
                     stop=stop
                 )
+                tool_calls = []
+                current_tool = None
                 for chunk in completion:
                     if "usage" in chunk and chunk["usage"] is not None:
                         usage_stats.update(chunk["usage"])
                     delta = chunk.get("choices", [{}])[0]
-                    content = None
+                    content = ""
                     finish_reason = None
-                    tool_calls = None
+                    # Extract delta content from either message or delta
                     if "message" in delta:
                         message = delta["message"]
                         content = message.get("content", "")
-                        tool_calls = message.get("tool_calls")
+                        if message.get("tool_calls"):
+                            for tool in message["tool_calls"]:
+                                if tool.get("id") not in {t.get("id") for t in tool_calls}:
+                                    tool_calls.append(tool)
                         finish_reason = delta.get("finish_reason")
                     elif "delta" in delta:
                         delta_content = delta["delta"]
                         content = delta_content.get("content", "")
-                        tool_calls = delta_content.get("tool_calls")
+                        # Handle streaming tool calls
+                        if delta_content.get("tool_calls"):
+                            for tool_delta in delta_content["tool_calls"]:
+                                tool_id = tool_delta.get("id")
+                                # Find or create tool call
+                                if tool_id:
+                                    current_tool = next((t for t in tool_calls if t["id"] == tool_id), None)
+                                    if not current_tool:
+                                        current_tool = {
+                                            "id": tool_id,
+                                            "type": tool_delta.get("type", "function"),
+                                            "function": {"name": "", "arguments": ""}
+                                        }
+                                        tool_calls.append(current_tool)
+                                # Update tool call
+                                if current_tool and "function" in tool_delta:
+                                    func_delta = tool_delta["function"]
+                                    if "name" in func_delta:
+                                        current_tool["function"]["name"] = func_delta["name"]
+                                    if "arguments" in func_delta:
+                                        current_tool["function"]["arguments"] += func_delta["arguments"]
                         finish_reason = delta.get("finish_reason")
-                    if content or tool_calls:
+                    has_update = bool(content)
+                    has_tool_update = bool(
+                        (delta.get("message", {}) or {}).get("tool_calls") or
+                        (delta.get("delta", {}) or {}).get("tool_calls")
+                    )
+                    if has_update or has_tool_update:
                         if not timing.first_token_time:
                             timing.mark_first_token()
-                        response_queue.put((content or "", {}, tool_calls))
+                        response_queue.put((content, {}, tool_calls[:] if tool_calls else None))
                     if finish_reason:
                         usage_stats["stop_reason"] = finish_reason
@@ -450,7 +476,7 @@ def stream_generate(
                     "tokens_per_second": tokens_per_second,
                     "reasoning_time": timing_stats["reasoning_time"],
                     "reasoning_tokens": timing_stats["reasoning_tokens"]
-                }, None))
+                }, tool_calls if tool_calls else None))
         thread = Thread(target=generation_thread, daemon=True)
         thread.start()

{inferencesh-0.2.19.dist-info → inferencesh-0.2.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inferencesh
-Version: 0.2.19
+Version: 0.2.21
 Summary: inference.sh Python SDK
 Author: Inference Shell Inc.
 Author-email: "Inference Shell Inc." <hello@inference.sh>

{inferencesh-0.2.19.dist-info → inferencesh-0.2.21.dist-info}/RECORD RENAMED Viewed

@@ -2,13 +2,13 @@ inferencesh/__init__.py,sha256=WdADtOhfa3HDOunoE9HLFCTFlXRykYstBIH1FpyWvj8,613
 inferencesh/models/__init__.py,sha256=FDwcdtT6c4hbRitymjmN-hZMlQa8RbKSftkZZyjtUXA,536
 inferencesh/models/base.py,sha256=4gZQRi8J7y9U6PrGD9pRIehd1MJVJAqGakPQDs2AKFM,3251
 inferencesh/models/file.py,sha256=5xnpypcRahM1YcEjj64rv9g2gTimxrZb41YT4r440hU,7393
-inferencesh/models/llm.py,sha256=CVZjUGYZfm7KxFXkEYmJ_iLBquQ4vt7wqK5AthUq4tU,19857
+inferencesh/models/llm.py,sha256=jzTpOp65DtZSqQUtnwNF-_OBQVqCQHX3GOhOvSqkmbc,21695
 inferencesh/utils/__init__.py,sha256=-xiD6uo2XzcrPAWFb_fUbaimmnW4KFKc-8IvBzaxNd4,148
 inferencesh/utils/download.py,sha256=7n5twvoNYDcFnKJyefImaj2YfzRI7vddQw4usZbj38c,1521
 inferencesh/utils/storage.py,sha256=E4J8emd4eFKdmdDgAqzz3TpaaDd3n0l8gYlMHuY8yIU,519
-inferencesh-0.2.19.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
-inferencesh-0.2.19.dist-info/METADATA,sha256=DYfJaMeiXtoRKVFanWH_0uznvNEWkp5l_DHRTJTGOfc,2757
-inferencesh-0.2.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-inferencesh-0.2.19.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
-inferencesh-0.2.19.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
-inferencesh-0.2.19.dist-info/RECORD,,
+inferencesh-0.2.21.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
+inferencesh-0.2.21.dist-info/METADATA,sha256=qMs9bH6l5e194tUwq6egASxXTbEehhPZF_4QsQlQZrA,2757
+inferencesh-0.2.21.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+inferencesh-0.2.21.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
+inferencesh-0.2.21.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
+inferencesh-0.2.21.dist-info/RECORD,,

{inferencesh-0.2.19.dist-info → inferencesh-0.2.21.dist-info}/WHEEL RENAMED Viewed

File without changes

{inferencesh-0.2.19.dist-info → inferencesh-0.2.21.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{inferencesh-0.2.19.dist-info → inferencesh-0.2.21.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{inferencesh-0.2.19.dist-info → inferencesh-0.2.21.dist-info}/top_level.txt RENAMED Viewed

File without changes

inferencesh 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

Potentially problematic release.

inferencesh 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl