inferencesh 0.2.31__tar.gz → 0.2.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of inferencesh might be problematic. Click here for more details.

Files changed (21) hide show
  1. {inferencesh-0.2.31/src/inferencesh.egg-info → inferencesh-0.2.32}/PKG-INFO +1 -1
  2. {inferencesh-0.2.31 → inferencesh-0.2.32}/pyproject.toml +1 -1
  3. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh/models/llm.py +40 -16
  4. {inferencesh-0.2.31 → inferencesh-0.2.32/src/inferencesh.egg-info}/PKG-INFO +1 -1
  5. {inferencesh-0.2.31 → inferencesh-0.2.32}/LICENSE +0 -0
  6. {inferencesh-0.2.31 → inferencesh-0.2.32}/README.md +0 -0
  7. {inferencesh-0.2.31 → inferencesh-0.2.32}/setup.cfg +0 -0
  8. {inferencesh-0.2.31 → inferencesh-0.2.32}/setup.py +0 -0
  9. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh/__init__.py +0 -0
  10. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh/models/__init__.py +0 -0
  11. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh/models/base.py +0 -0
  12. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh/models/file.py +0 -0
  13. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh/utils/__init__.py +0 -0
  14. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh/utils/download.py +0 -0
  15. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh/utils/storage.py +0 -0
  16. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh.egg-info/SOURCES.txt +0 -0
  17. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh.egg-info/dependency_links.txt +0 -0
  18. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh.egg-info/entry_points.txt +0 -0
  19. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh.egg-info/requires.txt +0 -0
  20. {inferencesh-0.2.31 → inferencesh-0.2.32}/src/inferencesh.egg-info/top_level.txt +0 -0
  21. {inferencesh-0.2.31 → inferencesh-0.2.32}/tests/test_sdk.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.31
3
+ Version: 0.2.32
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "inferencesh"
7
- version = "0.2.31"
7
+ version = "0.2.32"
8
8
  description = "inference.sh Python SDK"
9
9
  authors = [
10
10
  {name = "Inference Shell Inc.", email = "hello@inference.sh"},
@@ -238,7 +238,13 @@ def build_messages(
238
238
  image = images[0] if images else None # TODO: handle multiple images
239
239
  return ContextMessage(role=messages[0].role, text=text, image=image)
240
240
 
241
- user_msg = ContextMessage(role=ContextMessageRole.USER, text=input_data.text, image=input_data.image)
241
+ user_input_text = ""
242
+ if hasattr(input_data, "text"):
243
+ user_input_text = transform_user_message(input_data.text) if transform_user_message else input_data.text
244
+ user_input_image = None
245
+ if hasattr(input_data, "image"):
246
+ user_input_image = input_data.image
247
+ user_msg = ContextMessage(role=ContextMessageRole.USER, text=user_input_text, image=user_input_image)
242
248
 
243
249
  input_data.context.append(user_msg)
244
250
 
@@ -585,7 +591,7 @@ def stream_generate(
585
591
  output_cls: type[BaseLLMOutput] = LLMOutput,
586
592
  ) -> Generator[BaseLLMOutput, None, None]:
587
593
  """Stream generate from LLaMA.cpp model with timing and usage tracking."""
588
-
594
+
589
595
  # Create queues for communication between threads
590
596
  response_queue = Queue()
591
597
  error_queue = Queue()
@@ -617,8 +623,6 @@ def stream_generate(
617
623
  completion = model.create_chat_completion(**completion_kwargs)
618
624
 
619
625
  for chunk in completion:
620
- if verbose:
621
- print(chunk)
622
626
  response_queue.put(("chunk", chunk))
623
627
  # Update keep-alive timestamp
624
628
  keep_alive_queue.put(("alive", time.time()))
@@ -627,7 +631,9 @@ def stream_generate(
627
631
  response_queue.put(("done", None))
628
632
 
629
633
  except Exception as e:
630
- error_queue.put(e)
634
+ # Preserve the full exception with traceback
635
+ import sys
636
+ error_queue.put((e, sys.exc_info()[2]))
631
637
  response_queue.put(("error", str(e)))
632
638
 
633
639
  with timing_context() as timing:
@@ -657,14 +663,22 @@ def stream_generate(
657
663
  raise RuntimeError(f"Model failed to initialize within {init_timeout} seconds")
658
664
 
659
665
  while True:
660
- # Check for errors
666
+ # Check for errors - now with proper exception chaining
661
667
  if not error_queue.empty():
662
- raise error_queue.get()
668
+ exc, tb = error_queue.get()
669
+ if isinstance(exc, Exception):
670
+ raise exc.with_traceback(tb)
671
+ else:
672
+ raise RuntimeError(f"Unknown error in worker thread: {exc}")
663
673
 
664
674
  # Check keep-alive
665
- while not keep_alive_queue.empty():
666
- _, timestamp = keep_alive_queue.get_nowait()
667
- last_activity = timestamp
675
+ try:
676
+ while not keep_alive_queue.empty():
677
+ _, timestamp = keep_alive_queue.get_nowait()
678
+ last_activity = timestamp
679
+ except Empty:
680
+ # Ignore empty queue - this is expected
681
+ pass
668
682
 
669
683
  # Check for timeout
670
684
  if time.time() - last_activity > chunk_timeout:
@@ -677,12 +691,17 @@ def stream_generate(
677
691
  continue
678
692
 
679
693
  if msg_type == "error":
694
+ # If we get an error message but no exception in error_queue,
695
+ # create a new error
680
696
  raise RuntimeError(f"Generation error: {data}")
681
697
  elif msg_type == "done":
682
698
  break
683
699
 
684
700
  chunk = data
685
701
 
702
+ if verbose:
703
+ print(chunk)
704
+
686
705
  # Mark first token time
687
706
  if not timing.first_token_time:
688
707
  timing.mark_first_token()
@@ -700,12 +719,17 @@ def stream_generate(
700
719
  break
701
720
 
702
721
  # Wait for generation thread to finish
703
- generation_thread.join(timeout=5.0) # Increased timeout to 5 seconds
704
722
  if generation_thread.is_alive():
705
- # Thread didn't finish - this shouldn't happen normally
706
- # but we handle it gracefully
707
- raise RuntimeError("Generation thread failed to finish")
723
+ generation_thread.join(timeout=5.0) # Increased timeout to 5 seconds
724
+ if generation_thread.is_alive():
725
+ # Thread didn't finish - this shouldn't happen normally
726
+ raise RuntimeError("Generation thread failed to finish")
708
727
 
709
728
  except Exception as e:
710
- # Ensure any error is properly propagated
711
- raise e
729
+ # Check if there's a thread error we should chain with
730
+ if not error_queue.empty():
731
+ thread_exc, thread_tb = error_queue.get()
732
+ if isinstance(thread_exc, Exception):
733
+ raise e from thread_exc
734
+ # If no thread error, raise the original exception
735
+ raise
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.31
3
+ Version: 0.2.32
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
File without changes
File without changes
File without changes
File without changes