inferencesh 0.2.16__tar.gz → 0.2.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of inferencesh might be problematic. Click here for more details.

Files changed (21) hide show
  1. {inferencesh-0.2.16/src/inferencesh.egg-info → inferencesh-0.2.18}/PKG-INFO +1 -1
  2. {inferencesh-0.2.16 → inferencesh-0.2.18}/pyproject.toml +1 -1
  3. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/models/llm.py +36 -9
  4. {inferencesh-0.2.16 → inferencesh-0.2.18/src/inferencesh.egg-info}/PKG-INFO +1 -1
  5. {inferencesh-0.2.16 → inferencesh-0.2.18}/LICENSE +0 -0
  6. {inferencesh-0.2.16 → inferencesh-0.2.18}/README.md +0 -0
  7. {inferencesh-0.2.16 → inferencesh-0.2.18}/setup.cfg +0 -0
  8. {inferencesh-0.2.16 → inferencesh-0.2.18}/setup.py +0 -0
  9. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/__init__.py +0 -0
  10. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/models/__init__.py +0 -0
  11. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/models/base.py +0 -0
  12. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/models/file.py +0 -0
  13. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/utils/__init__.py +0 -0
  14. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/utils/download.py +0 -0
  15. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/utils/storage.py +0 -0
  16. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh.egg-info/SOURCES.txt +0 -0
  17. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh.egg-info/dependency_links.txt +0 -0
  18. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh.egg-info/entry_points.txt +0 -0
  19. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh.egg-info/requires.txt +0 -0
  20. {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh.egg-info/top_level.txt +0 -0
  21. {inferencesh-0.2.16 → inferencesh-0.2.18}/tests/test_sdk.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.16
3
+ Version: 0.2.18
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "inferencesh"
7
- version = "0.2.16"
7
+ version = "0.2.18"
8
8
  description = "inference.sh Python SDK"
9
9
  authors = [
10
10
  {name = "Inference Shell Inc.", email = "hello@inference.sh"},
@@ -89,6 +89,8 @@ class LLMInput(BaseAppInput):
89
89
 
90
90
  # Model specific flags
91
91
  reasoning: bool = Field(default=False)
92
+
93
+ tools: List[Dict[str, Any]] = Field(default=[])
92
94
 
93
95
  class LLMUsage(BaseAppOutput):
94
96
  stop_reason: str = ""
@@ -104,6 +106,7 @@ class LLMUsage(BaseAppOutput):
104
106
  class LLMOutput(BaseAppOutput):
105
107
  response: str
106
108
  reasoning: Optional[str] = None
109
+ tool_calls: Optional[List[Dict[str, Any]]] = None
107
110
  usage: Optional[LLMUsage] = None
108
111
 
109
112
 
@@ -228,6 +231,7 @@ class ResponseTransformer:
228
231
  def __init__(self, output_cls: type[LLMOutput] = LLMOutput):
229
232
  self.state = ResponseState()
230
233
  self.output_cls = output_cls
234
+ self.timing = None # Will be set by stream_generate
231
235
 
232
236
  def clean_text(self, text: str) -> str:
233
237
  """Clean common tokens from the text and apply model-specific cleaning.
@@ -264,10 +268,17 @@ class ResponseTransformer:
264
268
  text: Cleaned text to process for reasoning
265
269
  """
266
270
  # Default implementation for <think> style reasoning
267
- if "<think>" in text:
271
+ if "<think>" in text and not self.state.state_changes["reasoning_started"]:
268
272
  self.state.state_changes["reasoning_started"] = True
269
- if "</think>" in text:
273
+ if self.timing:
274
+ self.timing.start_reasoning()
275
+
276
+ if "</think>" in text and not self.state.state_changes["reasoning_ended"]:
270
277
  self.state.state_changes["reasoning_ended"] = True
278
+ if self.timing:
279
+ # Estimate token count from character count (rough approximation)
280
+ token_count = len(self.state.buffer.split("<think>")[1].split("</think>")[0]) // 4
281
+ self.timing.end_reasoning(token_count)
271
282
 
272
283
  if "<think>" in self.state.buffer:
273
284
  parts = self.state.buffer.split("</think>", 1)
@@ -354,6 +365,8 @@ class ResponseTransformer:
354
365
  def stream_generate(
355
366
  model: Any,
356
367
  messages: List[Dict[str, Any]],
368
+ tools: List[Dict[str, Any]],
369
+ tool_choice: Dict[str, Any],
357
370
  transformer: ResponseTransformer,
358
371
  temperature: float = 0.7,
359
372
  top_p: float = 0.95,
@@ -371,7 +384,7 @@ def stream_generate(
371
384
  max_tokens: Maximum tokens to generate
372
385
  stop: Optional list of stop sequences
373
386
  """
374
- response_queue: Queue[Optional[tuple[str, dict]]] = Queue()
387
+ response_queue: Queue[Optional[tuple[str, dict, Optional[List[Dict[str, Any]]]]]] = Queue()
375
388
  thread_exception = None
376
389
  usage_stats = {
377
390
  "prompt_tokens": 0,
@@ -381,11 +394,16 @@ def stream_generate(
381
394
  }
382
395
 
383
396
  with timing_context() as timing:
397
+ # Set timing context in transformer
398
+ transformer.timing = timing
399
+
384
400
  def generation_thread():
385
401
  nonlocal thread_exception, usage_stats
386
402
  try:
387
403
  completion = model.create_chat_completion(
388
404
  messages=messages,
405
+ tools=tools,
406
+ tool_choice=tool_choice,
389
407
  stream=True,
390
408
  temperature=temperature,
391
409
  top_p=top_p,
@@ -400,18 +418,23 @@ def stream_generate(
400
418
  delta = chunk.get("choices", [{}])[0]
401
419
  content = None
402
420
  finish_reason = None
421
+ tool_calls = None
403
422
 
404
423
  if "message" in delta:
405
- content = delta["message"].get("content", "")
424
+ message = delta["message"]
425
+ content = message.get("content", "")
426
+ tool_calls = message.get("tool_calls")
406
427
  finish_reason = delta.get("finish_reason")
407
428
  elif "delta" in delta:
408
- content = delta["delta"].get("content", "")
429
+ delta_content = delta["delta"]
430
+ content = delta_content.get("content", "")
431
+ tool_calls = delta_content.get("tool_calls")
409
432
  finish_reason = delta.get("finish_reason")
410
433
 
411
- if content:
434
+ if content or tool_calls:
412
435
  if not timing.first_token_time:
413
436
  timing.mark_first_token()
414
- response_queue.put((content, {}))
437
+ response_queue.put((content or "", {}, tool_calls))
415
438
 
416
439
  if finish_reason:
417
440
  usage_stats["stop_reason"] = finish_reason
@@ -427,7 +450,7 @@ def stream_generate(
427
450
  "tokens_per_second": tokens_per_second,
428
451
  "reasoning_time": timing_stats["reasoning_time"],
429
452
  "reasoning_tokens": timing_stats["reasoning_tokens"]
430
- }))
453
+ }, None))
431
454
 
432
455
  thread = Thread(target=generation_thread, daemon=True)
433
456
  thread.start()
@@ -440,7 +463,7 @@ def stream_generate(
440
463
  if thread_exception:
441
464
  raise thread_exception
442
465
 
443
- piece, timing_stats = result
466
+ piece, timing_stats, tool_calls = result
444
467
  if piece is None:
445
468
  # Final yield with complete usage stats
446
469
  usage = LLMUsage(
@@ -456,10 +479,14 @@ def stream_generate(
456
479
 
457
480
  buffer, output, _ = transformer(piece or "", buffer)
458
481
  output.usage = usage
482
+ if tool_calls:
483
+ output.tool_calls = tool_calls
459
484
  yield output
460
485
  break
461
486
 
462
487
  buffer, output, _ = transformer(piece, buffer)
488
+ if tool_calls:
489
+ output.tool_calls = tool_calls
463
490
  yield output
464
491
 
465
492
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.16
3
+ Version: 0.2.18
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
File without changes
File without changes
File without changes
File without changes