inferencesh 0.2.23__tar.gz → 0.2.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of inferencesh might be problematic. Click here for more details.

Files changed (21) hide show
  1. {inferencesh-0.2.23/src/inferencesh.egg-info → inferencesh-0.2.25}/PKG-INFO +1 -1
  2. {inferencesh-0.2.23 → inferencesh-0.2.25}/pyproject.toml +1 -1
  3. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh/models/llm.py +55 -29
  4. {inferencesh-0.2.23 → inferencesh-0.2.25/src/inferencesh.egg-info}/PKG-INFO +1 -1
  5. {inferencesh-0.2.23 → inferencesh-0.2.25}/LICENSE +0 -0
  6. {inferencesh-0.2.23 → inferencesh-0.2.25}/README.md +0 -0
  7. {inferencesh-0.2.23 → inferencesh-0.2.25}/setup.cfg +0 -0
  8. {inferencesh-0.2.23 → inferencesh-0.2.25}/setup.py +0 -0
  9. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh/__init__.py +0 -0
  10. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh/models/__init__.py +0 -0
  11. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh/models/base.py +0 -0
  12. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh/models/file.py +0 -0
  13. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh/utils/__init__.py +0 -0
  14. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh/utils/download.py +0 -0
  15. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh/utils/storage.py +0 -0
  16. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh.egg-info/SOURCES.txt +0 -0
  17. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh.egg-info/dependency_links.txt +0 -0
  18. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh.egg-info/entry_points.txt +0 -0
  19. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh.egg-info/requires.txt +0 -0
  20. {inferencesh-0.2.23 → inferencesh-0.2.25}/src/inferencesh.egg-info/top_level.txt +0 -0
  21. {inferencesh-0.2.23 → inferencesh-0.2.25}/tests/test_sdk.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.23
3
+ Version: 0.2.25
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "inferencesh"
7
- version = "0.2.23"
7
+ version = "0.2.25"
8
8
  description = "inference.sh Python SDK"
9
9
  authors = [
10
10
  {name = "Inference Shell Inc.", email = "hello@inference.sh"},
@@ -10,7 +10,6 @@ import base64
10
10
  from .base import BaseAppInput, BaseAppOutput
11
11
  from .file import File
12
12
 
13
-
14
13
  class ContextMessageRole(str, Enum):
15
14
  USER = "user"
16
15
  ASSISTANT = "assistant"
@@ -116,7 +115,7 @@ def timing_context():
116
115
  class TimingInfo:
117
116
  def __init__(self):
118
117
  self.start_time = time.time()
119
- self.first_token_time = 0
118
+ self.first_token_time = None
120
119
  self.reasoning_start_time = None
121
120
  self.total_reasoning_time = 0.0
122
121
  self.reasoning_tokens = 0
@@ -140,12 +139,17 @@ def timing_context():
140
139
 
141
140
  @property
142
141
  def stats(self):
143
- end_time = time.time()
142
+ current_time = time.time()
144
143
  if self.first_token_time is None:
145
- self.first_token_time = end_time
144
+ return {
145
+ "time_to_first_token": 0.0,
146
+ "generation_time": 0.0,
147
+ "reasoning_time": self.total_reasoning_time,
148
+ "reasoning_tokens": self.reasoning_tokens
149
+ }
146
150
 
147
151
  time_to_first = self.first_token_time - self.start_time
148
- generation_time = end_time - self.first_token_time
152
+ generation_time = current_time - self.first_token_time
149
153
 
150
154
  return {
151
155
  "time_to_first_token": time_to_first,
@@ -216,7 +220,7 @@ class StreamResponse:
216
220
  self.tool_calls = None # Changed from [] to None
217
221
  self.finish_reason = None
218
222
  self.timing_stats = {
219
- "time_to_first_token": 0.0,
223
+ "time_to_first_token": None, # Changed from 0.0 to None
220
224
  "generation_time": 0.0,
221
225
  "reasoning_time": 0.0,
222
226
  "reasoning_tokens": 0,
@@ -232,8 +236,15 @@ class StreamResponse:
232
236
  def update_from_chunk(self, chunk: Dict[str, Any], timing: Any) -> None:
233
237
  """Update response state from a chunk."""
234
238
  # Update usage stats if present
235
- if "usage" in chunk and chunk["usage"] is not None:
236
- self.usage_stats.update(chunk["usage"])
239
+ if "usage" in chunk:
240
+ usage = chunk["usage"]
241
+ if usage is not None:
242
+ # Update usage stats preserving existing values if not provided
243
+ self.usage_stats.update({
244
+ "prompt_tokens": usage.get("prompt_tokens", self.usage_stats["prompt_tokens"]),
245
+ "completion_tokens": usage.get("completion_tokens", self.usage_stats["completion_tokens"]),
246
+ "total_tokens": usage.get("total_tokens", self.usage_stats["total_tokens"])
247
+ })
237
248
 
238
249
  # Get the delta from the chunk
239
250
  delta = chunk.get("choices", [{}])[0]
@@ -245,23 +256,34 @@ class StreamResponse:
245
256
  if message.get("tool_calls"):
246
257
  self._update_tool_calls(message["tool_calls"])
247
258
  self.finish_reason = delta.get("finish_reason")
259
+ if self.finish_reason:
260
+ self.usage_stats["stop_reason"] = self.finish_reason
248
261
  elif "delta" in delta:
249
262
  delta_content = delta["delta"]
250
263
  self.content = delta_content.get("content", "")
251
264
  if delta_content.get("tool_calls"):
252
265
  self._update_tool_calls(delta_content["tool_calls"])
253
266
  self.finish_reason = delta.get("finish_reason")
267
+ if self.finish_reason:
268
+ self.usage_stats["stop_reason"] = self.finish_reason
254
269
 
255
- # Update timing stats while preserving tokens_per_second
270
+ # Update timing stats
256
271
  timing_stats = timing.stats
257
- generation_time = timing_stats["generation_time"]
258
- completion_tokens = self.usage_stats.get("completion_tokens", 0)
259
- tokens_per_second = (completion_tokens / generation_time) if generation_time > 0 and completion_tokens > 0 else 0.0
272
+ if self.timing_stats["time_to_first_token"] is None:
273
+ self.timing_stats["time_to_first_token"] = timing_stats["time_to_first_token"]
260
274
 
261
275
  self.timing_stats.update({
262
- **timing_stats,
263
- "tokens_per_second": tokens_per_second
276
+ "generation_time": timing_stats["generation_time"],
277
+ "reasoning_time": timing_stats["reasoning_time"],
278
+ "reasoning_tokens": timing_stats["reasoning_tokens"]
264
279
  })
280
+
281
+ # Calculate tokens per second only if we have valid completion tokens and generation time
282
+ if self.usage_stats["completion_tokens"] > 0 and timing_stats["generation_time"] > 0:
283
+ self.timing_stats["tokens_per_second"] = (
284
+ self.usage_stats["completion_tokens"] / timing_stats["generation_time"]
285
+ )
286
+
265
287
 
266
288
  def _update_tool_calls(self, new_tool_calls: List[Dict[str, Any]]) -> None:
267
289
  """Update tool calls, handling both full and partial updates."""
@@ -292,29 +314,33 @@ class StreamResponse:
292
314
  current_tool["function"]["arguments"] += func_delta["arguments"]
293
315
 
294
316
  def has_updates(self) -> bool:
295
- """Check if this response has any content or tool call updates."""
296
- return bool(self.content) or bool(self.tool_calls)
317
+ """Check if this response has any content, tool call, or usage updates."""
318
+ has_content = bool(self.content)
319
+ has_tool_calls = bool(self.tool_calls)
320
+ has_usage = self.usage_stats["prompt_tokens"] > 0 or self.usage_stats["completion_tokens"] > 0
321
+ has_finish = bool(self.finish_reason)
322
+
323
+ return has_content or has_tool_calls or has_usage or has_finish
297
324
 
298
325
  def to_output(self, buffer: str, transformer: Any) -> LLMOutput:
299
- """Convert current state to LLMOutput."""
326
+ """Convert current state to LLMOutput."""
300
327
  buffer, output, _ = transformer(self.content, buffer)
301
328
 
302
329
  # Add tool calls if present
303
330
  if self.tool_calls:
304
331
  output.tool_calls = self.tool_calls
305
332
 
306
- # Add usage stats if this is final
307
- if self.finish_reason:
308
- output.usage = LLMUsage(
309
- stop_reason=self.usage_stats["stop_reason"],
310
- time_to_first_token=self.timing_stats["time_to_first_token"],
311
- tokens_per_second=self.timing_stats["tokens_per_second"],
312
- prompt_tokens=self.usage_stats["prompt_tokens"],
313
- completion_tokens=self.usage_stats["completion_tokens"],
314
- total_tokens=self.usage_stats["total_tokens"],
315
- reasoning_time=self.timing_stats["reasoning_time"],
316
- reasoning_tokens=self.timing_stats["reasoning_tokens"]
317
- )
333
+ # Add usage stats
334
+ output.usage = LLMUsage(
335
+ stop_reason=self.usage_stats["stop_reason"],
336
+ time_to_first_token=self.timing_stats["time_to_first_token"] or 0.0,
337
+ tokens_per_second=self.timing_stats["tokens_per_second"],
338
+ prompt_tokens=self.usage_stats["prompt_tokens"],
339
+ completion_tokens=self.usage_stats["completion_tokens"],
340
+ total_tokens=self.usage_stats["total_tokens"],
341
+ reasoning_time=self.timing_stats["reasoning_time"],
342
+ reasoning_tokens=self.timing_stats["reasoning_tokens"]
343
+ )
318
344
 
319
345
  return output, buffer
320
346
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.23
3
+ Version: 0.2.25
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
File without changes
File without changes
File without changes
File without changes