PyPI - inferencesh - Versions diffs - 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl - Mend

inferencesh 0.2.23py3-none-any.whl → 0.2.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of inferencesh might be problematic. Click here for more details.

Files changed (7) hide show

inferencesh/models/llm.py CHANGED Viewed

@@ -10,7 +10,6 @@ import base64
 from .base import BaseAppInput, BaseAppOutput
 from .file import File
 class ContextMessageRole(str, Enum):
     USER = "user"
     ASSISTANT = "assistant"
@@ -116,7 +115,7 @@ def timing_context():
     class TimingInfo:
         def __init__(self):
             self.start_time = time.time()
-            self.first_token_time = 0
+            self.first_token_time = None
             self.reasoning_start_time = None
             self.total_reasoning_time = 0.0
             self.reasoning_tokens = 0
@@ -140,12 +139,17 @@ def timing_context():
         @property
         def stats(self):
-            end_time = time.time()
+            current_time = time.time()
             if self.first_token_time is None:
-                self.first_token_time = end_time
+                return {
+                    "time_to_first_token": 0.0,
+                    "generation_time": 0.0,
+                    "reasoning_time": self.total_reasoning_time,
+                    "reasoning_tokens": self.reasoning_tokens
+                }
             time_to_first = self.first_token_time - self.start_time
-            generation_time = end_time - self.first_token_time
+            generation_time = current_time - self.first_token_time
             return {
                 "time_to_first_token": time_to_first,
@@ -216,7 +220,7 @@ class StreamResponse:
         self.tool_calls = None  # Changed from [] to None
         self.finish_reason = None
         self.timing_stats = {
-            "time_to_first_token": 0.0,
+            "time_to_first_token": None,  # Changed from 0.0 to None
             "generation_time": 0.0,
             "reasoning_time": 0.0,
             "reasoning_tokens": 0,
@@ -231,9 +235,22 @@ class StreamResponse:
     def update_from_chunk(self, chunk: Dict[str, Any], timing: Any) -> None:
         """Update response state from a chunk."""
+        print("DEBUG: Entering update_from_chunk")
+        print(f"DEBUG: Current usage stats: {self.usage_stats}")
+        print(f"DEBUG: Chunk: {chunk}")
         # Update usage stats if present
-        if "usage" in chunk and chunk["usage"] is not None:
-            self.usage_stats.update(chunk["usage"])
+        if "usage" in chunk:
+            usage = chunk["usage"]
+            if usage is not None:
+                print(f"DEBUG: Updating usage stats with: {usage}")
+                # Update usage stats preserving existing values if not provided
+                self.usage_stats.update({
+                    "prompt_tokens": usage.get("prompt_tokens", self.usage_stats["prompt_tokens"]),
+                    "completion_tokens": usage.get("completion_tokens", self.usage_stats["completion_tokens"]),
+                    "total_tokens": usage.get("total_tokens", self.usage_stats["total_tokens"])
+                })
+                print(f"DEBUG: Updated usage stats: {self.usage_stats}")
         # Get the delta from the chunk
         delta = chunk.get("choices", [{}])[0]
@@ -245,23 +262,35 @@ class StreamResponse:
             if message.get("tool_calls"):
                 self._update_tool_calls(message["tool_calls"])
             self.finish_reason = delta.get("finish_reason")
+            if self.finish_reason:
+                self.usage_stats["stop_reason"] = self.finish_reason
         elif "delta" in delta:
             delta_content = delta["delta"]
             self.content = delta_content.get("content", "")
             if delta_content.get("tool_calls"):
                 self._update_tool_calls(delta_content["tool_calls"])
             self.finish_reason = delta.get("finish_reason")
+            if self.finish_reason:
+                self.usage_stats["stop_reason"] = self.finish_reason
-        # Update timing stats while preserving tokens_per_second
+        # Update timing stats
         timing_stats = timing.stats
-        generation_time = timing_stats["generation_time"]
-        completion_tokens = self.usage_stats.get("completion_tokens", 0)
-        tokens_per_second = (completion_tokens / generation_time) if generation_time > 0 and completion_tokens > 0 else 0.0
+        if self.timing_stats["time_to_first_token"] is None:
+            self.timing_stats["time_to_first_token"] = timing_stats["time_to_first_token"]
         self.timing_stats.update({
-            **timing_stats,
-            "tokens_per_second": tokens_per_second
+            "generation_time": timing_stats["generation_time"],
+            "reasoning_time": timing_stats["reasoning_time"],
+            "reasoning_tokens": timing_stats["reasoning_tokens"]
         })
+        # Calculate tokens per second only if we have valid completion tokens and generation time
+        if self.usage_stats["completion_tokens"] > 0 and timing_stats["generation_time"] > 0:
+            self.timing_stats["tokens_per_second"] = (
+                self.usage_stats["completion_tokens"] / timing_stats["generation_time"]
+            )
+        print(f"DEBUG: Final usage stats in update_from_chunk: {self.usage_stats}")
     def _update_tool_calls(self, new_tool_calls: List[Dict[str, Any]]) -> None:
         """Update tool calls, handling both full and partial updates."""
@@ -292,29 +321,40 @@ class StreamResponse:
                     current_tool["function"]["arguments"] += func_delta["arguments"]
     def has_updates(self) -> bool:
-        """Check if this response has any content or tool call updates."""
-        return bool(self.content) or bool(self.tool_calls)
+        """Check if this response has any content, tool call, or usage updates."""
+        has_content = bool(self.content)
+        has_tool_calls = bool(self.tool_calls)
+        has_usage = self.usage_stats["prompt_tokens"] > 0 or self.usage_stats["completion_tokens"] > 0
+        has_finish = bool(self.finish_reason)
+        print(f"DEBUG: has_updates check - content: {has_content}, tool_calls: {has_tool_calls}, usage: {has_usage}, finish: {has_finish}")
+        return has_content or has_tool_calls or has_usage or has_finish
     def to_output(self, buffer: str, transformer: Any) -> LLMOutput:
         """Convert current state to LLMOutput."""
+        print("DEBUG: Entering to_output")
+        print(f"DEBUG: Usage stats before conversion: {self.usage_stats}")
         buffer, output, _ = transformer(self.content, buffer)
         # Add tool calls if present
         if self.tool_calls:
             output.tool_calls = self.tool_calls
-        # Add usage stats if this is final
-        if self.finish_reason:
-            output.usage = LLMUsage(
-                stop_reason=self.usage_stats["stop_reason"],
-                time_to_first_token=self.timing_stats["time_to_first_token"],
-                tokens_per_second=self.timing_stats["tokens_per_second"],
-                prompt_tokens=self.usage_stats["prompt_tokens"],
-                completion_tokens=self.usage_stats["completion_tokens"],
-                total_tokens=self.usage_stats["total_tokens"],
-                reasoning_time=self.timing_stats["reasoning_time"],
-                reasoning_tokens=self.timing_stats["reasoning_tokens"]
-            )
+        # Add usage stats
+        print(f"DEBUG: Creating LLMUsage with stats: {self.usage_stats}")
+        output.usage = LLMUsage(
+            stop_reason=self.usage_stats["stop_reason"],
+            time_to_first_token=self.timing_stats["time_to_first_token"] or 0.0,
+            tokens_per_second=self.timing_stats["tokens_per_second"],
+            prompt_tokens=self.usage_stats["prompt_tokens"],
+            completion_tokens=self.usage_stats["completion_tokens"],
+            total_tokens=self.usage_stats["total_tokens"],
+            reasoning_time=self.timing_stats["reasoning_time"],
+            reasoning_tokens=self.timing_stats["reasoning_tokens"]
+        )
+        print(f"DEBUG: Created output usage: {output.usage}")
         return output, buffer

{inferencesh-0.2.23.dist-info → inferencesh-0.2.24.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inferencesh
-Version: 0.2.23
+Version: 0.2.24
 Summary: inference.sh Python SDK
 Author: Inference Shell Inc.
 Author-email: "Inference Shell Inc." <hello@inference.sh>

{inferencesh-0.2.23.dist-info → inferencesh-0.2.24.dist-info}/RECORD RENAMED Viewed

@@ -2,13 +2,13 @@ inferencesh/__init__.py,sha256=WdADtOhfa3HDOunoE9HLFCTFlXRykYstBIH1FpyWvj8,613
 inferencesh/models/__init__.py,sha256=FDwcdtT6c4hbRitymjmN-hZMlQa8RbKSftkZZyjtUXA,536
 inferencesh/models/base.py,sha256=4gZQRi8J7y9U6PrGD9pRIehd1MJVJAqGakPQDs2AKFM,3251
 inferencesh/models/file.py,sha256=5xnpypcRahM1YcEjj64rv9g2gTimxrZb41YT4r440hU,7393
-inferencesh/models/llm.py,sha256=knvwpKECQb67rG8VIt-VmZu0aDVpABzQiifrytAfv9s,20932
+inferencesh/models/llm.py,sha256=ycg20sSx3UJevjoTVukBZXwRyXY06tFZKAmlVp0MBzQ,23168
 inferencesh/utils/__init__.py,sha256=-xiD6uo2XzcrPAWFb_fUbaimmnW4KFKc-8IvBzaxNd4,148
 inferencesh/utils/download.py,sha256=7n5twvoNYDcFnKJyefImaj2YfzRI7vddQw4usZbj38c,1521
 inferencesh/utils/storage.py,sha256=E4J8emd4eFKdmdDgAqzz3TpaaDd3n0l8gYlMHuY8yIU,519
-inferencesh-0.2.23.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
-inferencesh-0.2.23.dist-info/METADATA,sha256=w5AOt2foy30CdqgfcivGhBflpWOvtm1B7tHEJo_ipVE,2757
-inferencesh-0.2.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-inferencesh-0.2.23.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
-inferencesh-0.2.23.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
-inferencesh-0.2.23.dist-info/RECORD,,
+inferencesh-0.2.24.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
+inferencesh-0.2.24.dist-info/METADATA,sha256=kQq9qN65EU9DS-SAQHm3Sw73yzz-FZVQX6ueHSgktW8,2757
+inferencesh-0.2.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+inferencesh-0.2.24.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
+inferencesh-0.2.24.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
+inferencesh-0.2.24.dist-info/RECORD,,

{inferencesh-0.2.23.dist-info → inferencesh-0.2.24.dist-info}/WHEEL RENAMED Viewed

File without changes

{inferencesh-0.2.23.dist-info → inferencesh-0.2.24.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{inferencesh-0.2.23.dist-info → inferencesh-0.2.24.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{inferencesh-0.2.23.dist-info → inferencesh-0.2.24.dist-info}/top_level.txt RENAMED Viewed

File without changes

inferencesh 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl

Potentially problematic release.

inferencesh 0.2.23py3-none-any.whl → 0.2.24py3-none-any.whl