inferencesh 0.2.31__py3-none-any.whl → 0.2.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of inferencesh might be problematic. Click here for more details.

inferencesh/models/llm.py CHANGED
@@ -229,7 +229,6 @@ def build_messages(
229
229
  return parts[0]["text"]
230
230
  raise ValueError("Image content requires multipart support")
231
231
 
232
- multipart = any(m.image for m in input_data.context) or input_data.image is not None
233
232
  messages = [{"role": "system", "content": input_data.system_prompt}] if input_data.system_prompt is not None and input_data.system_prompt != "" else []
234
233
 
235
234
  def merge_messages(messages: List[ContextMessage]) -> ContextMessage:
@@ -238,7 +237,17 @@ def build_messages(
238
237
  image = images[0] if images else None # TODO: handle multiple images
239
238
  return ContextMessage(role=messages[0].role, text=text, image=image)
240
239
 
241
- user_msg = ContextMessage(role=ContextMessageRole.USER, text=input_data.text, image=input_data.image)
240
+ user_input_text = ""
241
+ if hasattr(input_data, "text"):
242
+ user_input_text = transform_user_message(input_data.text) if transform_user_message else input_data.text
243
+
244
+ user_input_image = None
245
+ multipart = any(m.image for m in input_data.context)
246
+ if hasattr(input_data, "image"):
247
+ user_input_image = input_data.image
248
+ multipart = multipart or input_data.image is not None
249
+
250
+ user_msg = ContextMessage(role=ContextMessageRole.USER, text=user_input_text, image=user_input_image)
242
251
 
243
252
  input_data.context.append(user_msg)
244
253
 
@@ -585,7 +594,7 @@ def stream_generate(
585
594
  output_cls: type[BaseLLMOutput] = LLMOutput,
586
595
  ) -> Generator[BaseLLMOutput, None, None]:
587
596
  """Stream generate from LLaMA.cpp model with timing and usage tracking."""
588
-
597
+
589
598
  # Create queues for communication between threads
590
599
  response_queue = Queue()
591
600
  error_queue = Queue()
@@ -617,8 +626,6 @@ def stream_generate(
617
626
  completion = model.create_chat_completion(**completion_kwargs)
618
627
 
619
628
  for chunk in completion:
620
- if verbose:
621
- print(chunk)
622
629
  response_queue.put(("chunk", chunk))
623
630
  # Update keep-alive timestamp
624
631
  keep_alive_queue.put(("alive", time.time()))
@@ -627,7 +634,9 @@ def stream_generate(
627
634
  response_queue.put(("done", None))
628
635
 
629
636
  except Exception as e:
630
- error_queue.put(e)
637
+ # Preserve the full exception with traceback
638
+ import sys
639
+ error_queue.put((e, sys.exc_info()[2]))
631
640
  response_queue.put(("error", str(e)))
632
641
 
633
642
  with timing_context() as timing:
@@ -657,14 +666,22 @@ def stream_generate(
657
666
  raise RuntimeError(f"Model failed to initialize within {init_timeout} seconds")
658
667
 
659
668
  while True:
660
- # Check for errors
669
+ # Check for errors - now with proper exception chaining
661
670
  if not error_queue.empty():
662
- raise error_queue.get()
671
+ exc, tb = error_queue.get()
672
+ if isinstance(exc, Exception):
673
+ raise exc.with_traceback(tb)
674
+ else:
675
+ raise RuntimeError(f"Unknown error in worker thread: {exc}")
663
676
 
664
677
  # Check keep-alive
665
- while not keep_alive_queue.empty():
666
- _, timestamp = keep_alive_queue.get_nowait()
667
- last_activity = timestamp
678
+ try:
679
+ while not keep_alive_queue.empty():
680
+ _, timestamp = keep_alive_queue.get_nowait()
681
+ last_activity = timestamp
682
+ except Empty:
683
+ # Ignore empty queue - this is expected
684
+ pass
668
685
 
669
686
  # Check for timeout
670
687
  if time.time() - last_activity > chunk_timeout:
@@ -677,12 +694,17 @@ def stream_generate(
677
694
  continue
678
695
 
679
696
  if msg_type == "error":
697
+ # If we get an error message but no exception in error_queue,
698
+ # create a new error
680
699
  raise RuntimeError(f"Generation error: {data}")
681
700
  elif msg_type == "done":
682
701
  break
683
702
 
684
703
  chunk = data
685
704
 
705
+ if verbose:
706
+ print(chunk)
707
+
686
708
  # Mark first token time
687
709
  if not timing.first_token_time:
688
710
  timing.mark_first_token()
@@ -700,12 +722,17 @@ def stream_generate(
700
722
  break
701
723
 
702
724
  # Wait for generation thread to finish
703
- generation_thread.join(timeout=5.0) # Increased timeout to 5 seconds
704
725
  if generation_thread.is_alive():
705
- # Thread didn't finish - this shouldn't happen normally
706
- # but we handle it gracefully
707
- raise RuntimeError("Generation thread failed to finish")
726
+ generation_thread.join(timeout=5.0) # Increased timeout to 5 seconds
727
+ if generation_thread.is_alive():
728
+ # Thread didn't finish - this shouldn't happen normally
729
+ raise RuntimeError("Generation thread failed to finish")
708
730
 
709
731
  except Exception as e:
710
- # Ensure any error is properly propagated
711
- raise e
732
+ # Check if there's a thread error we should chain with
733
+ if not error_queue.empty():
734
+ thread_exc, thread_tb = error_queue.get()
735
+ if isinstance(thread_exc, Exception):
736
+ raise e from thread_exc
737
+ # If no thread error, raise the original exception
738
+ raise
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.31
3
+ Version: 0.2.33
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
@@ -2,13 +2,13 @@ inferencesh/__init__.py,sha256=WdADtOhfa3HDOunoE9HLFCTFlXRykYstBIH1FpyWvj8,613
2
2
  inferencesh/models/__init__.py,sha256=FDwcdtT6c4hbRitymjmN-hZMlQa8RbKSftkZZyjtUXA,536
3
3
  inferencesh/models/base.py,sha256=4gZQRi8J7y9U6PrGD9pRIehd1MJVJAqGakPQDs2AKFM,3251
4
4
  inferencesh/models/file.py,sha256=5xnpypcRahM1YcEjj64rv9g2gTimxrZb41YT4r440hU,7393
5
- inferencesh/models/llm.py,sha256=Yj7BGtAlBJBgvEawZQeGb4AB2WvHWY5DG8wEMIXiPoo,27047
5
+ inferencesh/models/llm.py,sha256=KTy9XAXQS_2p4HThyofSl4Nu8F87sA4IPJqmP233VA8,28304
6
6
  inferencesh/utils/__init__.py,sha256=-xiD6uo2XzcrPAWFb_fUbaimmnW4KFKc-8IvBzaxNd4,148
7
7
  inferencesh/utils/download.py,sha256=7n5twvoNYDcFnKJyefImaj2YfzRI7vddQw4usZbj38c,1521
8
8
  inferencesh/utils/storage.py,sha256=E4J8emd4eFKdmdDgAqzz3TpaaDd3n0l8gYlMHuY8yIU,519
9
- inferencesh-0.2.31.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
10
- inferencesh-0.2.31.dist-info/METADATA,sha256=cXGDMenDbFRfRfCugNmRlTzoFBw8kC-UuVEKFLMWqZI,2757
11
- inferencesh-0.2.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
- inferencesh-0.2.31.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
13
- inferencesh-0.2.31.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
14
- inferencesh-0.2.31.dist-info/RECORD,,
9
+ inferencesh-0.2.33.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
10
+ inferencesh-0.2.33.dist-info/METADATA,sha256=Thb63zVHWjix6fCnfOEn4FFwnWnZ_XXCUWGrwpnkeTk,2757
11
+ inferencesh-0.2.33.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
+ inferencesh-0.2.33.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
13
+ inferencesh-0.2.33.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
14
+ inferencesh-0.2.33.dist-info/RECORD,,