inferencesh 0.2.17__tar.gz → 0.2.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of inferencesh might be problematic. Click here for more details.

Files changed (21) hide show
  1. {inferencesh-0.2.17/src/inferencesh.egg-info → inferencesh-0.2.18}/PKG-INFO +1 -1
  2. {inferencesh-0.2.17 → inferencesh-0.2.18}/pyproject.toml +1 -1
  3. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh/models/llm.py +23 -7
  4. {inferencesh-0.2.17 → inferencesh-0.2.18/src/inferencesh.egg-info}/PKG-INFO +1 -1
  5. {inferencesh-0.2.17 → inferencesh-0.2.18}/LICENSE +0 -0
  6. {inferencesh-0.2.17 → inferencesh-0.2.18}/README.md +0 -0
  7. {inferencesh-0.2.17 → inferencesh-0.2.18}/setup.cfg +0 -0
  8. {inferencesh-0.2.17 → inferencesh-0.2.18}/setup.py +0 -0
  9. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh/__init__.py +0 -0
  10. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh/models/__init__.py +0 -0
  11. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh/models/base.py +0 -0
  12. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh/models/file.py +0 -0
  13. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh/utils/__init__.py +0 -0
  14. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh/utils/download.py +0 -0
  15. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh/utils/storage.py +0 -0
  16. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh.egg-info/SOURCES.txt +0 -0
  17. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh.egg-info/dependency_links.txt +0 -0
  18. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh.egg-info/entry_points.txt +0 -0
  19. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh.egg-info/requires.txt +0 -0
  20. {inferencesh-0.2.17 → inferencesh-0.2.18}/src/inferencesh.egg-info/top_level.txt +0 -0
  21. {inferencesh-0.2.17 → inferencesh-0.2.18}/tests/test_sdk.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.17
3
+ Version: 0.2.18
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "inferencesh"
7
- version = "0.2.17"
7
+ version = "0.2.18"
8
8
  description = "inference.sh Python SDK"
9
9
  authors = [
10
10
  {name = "Inference Shell Inc.", email = "hello@inference.sh"},
@@ -89,6 +89,8 @@ class LLMInput(BaseAppInput):
89
89
 
90
90
  # Model specific flags
91
91
  reasoning: bool = Field(default=False)
92
+
93
+ tools: List[Dict[str, Any]] = Field(default=[])
92
94
 
93
95
  class LLMUsage(BaseAppOutput):
94
96
  stop_reason: str = ""
@@ -104,6 +106,7 @@ class LLMUsage(BaseAppOutput):
104
106
  class LLMOutput(BaseAppOutput):
105
107
  response: str
106
108
  reasoning: Optional[str] = None
109
+ tool_calls: Optional[List[Dict[str, Any]]] = None
107
110
  usage: Optional[LLMUsage] = None
108
111
 
109
112
 
@@ -362,6 +365,8 @@ class ResponseTransformer:
362
365
  def stream_generate(
363
366
  model: Any,
364
367
  messages: List[Dict[str, Any]],
368
+ tools: List[Dict[str, Any]],
369
+ tool_choice: Dict[str, Any],
365
370
  transformer: ResponseTransformer,
366
371
  temperature: float = 0.7,
367
372
  top_p: float = 0.95,
@@ -379,7 +384,7 @@ def stream_generate(
379
384
  max_tokens: Maximum tokens to generate
380
385
  stop: Optional list of stop sequences
381
386
  """
382
- response_queue: Queue[Optional[tuple[str, dict]]] = Queue()
387
+ response_queue: Queue[Optional[tuple[str, dict, Optional[List[Dict[str, Any]]]]]] = Queue()
383
388
  thread_exception = None
384
389
  usage_stats = {
385
390
  "prompt_tokens": 0,
@@ -397,6 +402,8 @@ def stream_generate(
397
402
  try:
398
403
  completion = model.create_chat_completion(
399
404
  messages=messages,
405
+ tools=tools,
406
+ tool_choice=tool_choice,
400
407
  stream=True,
401
408
  temperature=temperature,
402
409
  top_p=top_p,
@@ -411,18 +418,23 @@ def stream_generate(
411
418
  delta = chunk.get("choices", [{}])[0]
412
419
  content = None
413
420
  finish_reason = None
421
+ tool_calls = None
414
422
 
415
423
  if "message" in delta:
416
- content = delta["message"].get("content", "")
424
+ message = delta["message"]
425
+ content = message.get("content", "")
426
+ tool_calls = message.get("tool_calls")
417
427
  finish_reason = delta.get("finish_reason")
418
428
  elif "delta" in delta:
419
- content = delta["delta"].get("content", "")
429
+ delta_content = delta["delta"]
430
+ content = delta_content.get("content", "")
431
+ tool_calls = delta_content.get("tool_calls")
420
432
  finish_reason = delta.get("finish_reason")
421
433
 
422
- if content:
434
+ if content or tool_calls:
423
435
  if not timing.first_token_time:
424
436
  timing.mark_first_token()
425
- response_queue.put((content, {}))
437
+ response_queue.put((content or "", {}, tool_calls))
426
438
 
427
439
  if finish_reason:
428
440
  usage_stats["stop_reason"] = finish_reason
@@ -438,7 +450,7 @@ def stream_generate(
438
450
  "tokens_per_second": tokens_per_second,
439
451
  "reasoning_time": timing_stats["reasoning_time"],
440
452
  "reasoning_tokens": timing_stats["reasoning_tokens"]
441
- }))
453
+ }, None))
442
454
 
443
455
  thread = Thread(target=generation_thread, daemon=True)
444
456
  thread.start()
@@ -451,7 +463,7 @@ def stream_generate(
451
463
  if thread_exception:
452
464
  raise thread_exception
453
465
 
454
- piece, timing_stats = result
466
+ piece, timing_stats, tool_calls = result
455
467
  if piece is None:
456
468
  # Final yield with complete usage stats
457
469
  usage = LLMUsage(
@@ -467,10 +479,14 @@ def stream_generate(
467
479
 
468
480
  buffer, output, _ = transformer(piece or "", buffer)
469
481
  output.usage = usage
482
+ if tool_calls:
483
+ output.tool_calls = tool_calls
470
484
  yield output
471
485
  break
472
486
 
473
487
  buffer, output, _ = transformer(piece, buffer)
488
+ if tool_calls:
489
+ output.tool_calls = tool_calls
474
490
  yield output
475
491
 
476
492
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.17
3
+ Version: 0.2.18
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
File without changes
File without changes
File without changes
File without changes