inferencesh 0.2.30__tar.gz → 0.2.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of inferencesh might be problematic. Click here for more details.

Files changed (21) hide show
  1. {inferencesh-0.2.30/src/inferencesh.egg-info → inferencesh-0.2.32}/PKG-INFO +1 -1
  2. {inferencesh-0.2.30 → inferencesh-0.2.32}/pyproject.toml +1 -1
  3. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh/models/llm.py +65 -23
  4. {inferencesh-0.2.30 → inferencesh-0.2.32/src/inferencesh.egg-info}/PKG-INFO +1 -1
  5. {inferencesh-0.2.30 → inferencesh-0.2.32}/LICENSE +0 -0
  6. {inferencesh-0.2.30 → inferencesh-0.2.32}/README.md +0 -0
  7. {inferencesh-0.2.30 → inferencesh-0.2.32}/setup.cfg +0 -0
  8. {inferencesh-0.2.30 → inferencesh-0.2.32}/setup.py +0 -0
  9. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh/__init__.py +0 -0
  10. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh/models/__init__.py +0 -0
  11. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh/models/base.py +0 -0
  12. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh/models/file.py +0 -0
  13. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh/utils/__init__.py +0 -0
  14. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh/utils/download.py +0 -0
  15. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh/utils/storage.py +0 -0
  16. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh.egg-info/SOURCES.txt +0 -0
  17. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh.egg-info/dependency_links.txt +0 -0
  18. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh.egg-info/entry_points.txt +0 -0
  19. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh.egg-info/requires.txt +0 -0
  20. {inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh.egg-info/top_level.txt +0 -0
  21. {inferencesh-0.2.30 → inferencesh-0.2.32}/tests/test_sdk.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.30
3
+ Version: 0.2.32
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "inferencesh"
7
- version = "0.2.30"
7
+ version = "0.2.32"
8
8
  description = "inference.sh Python SDK"
9
9
  authors = [
10
10
  {name = "Inference Shell Inc.", email = "hello@inference.sh"},
@@ -232,18 +232,42 @@ def build_messages(
232
232
  multipart = any(m.image for m in input_data.context) or input_data.image is not None
233
233
  messages = [{"role": "system", "content": input_data.system_prompt}] if input_data.system_prompt is not None and input_data.system_prompt != "" else []
234
234
 
235
+ def merge_messages(messages: List[ContextMessage]) -> ContextMessage:
236
+ text = "\n\n".join(msg.text for msg in messages if msg.text)
237
+ images = [msg.image for msg in messages if msg.image]
238
+ image = images[0] if images else None # TODO: handle multiple images
239
+ return ContextMessage(role=messages[0].role, text=text, image=image)
240
+
241
+ user_input_text = ""
242
+ if hasattr(input_data, "text"):
243
+ user_input_text = transform_user_message(input_data.text) if transform_user_message else input_data.text
244
+ user_input_image = None
245
+ if hasattr(input_data, "image"):
246
+ user_input_image = input_data.image
247
+ user_msg = ContextMessage(role=ContextMessageRole.USER, text=user_input_text, image=user_input_image)
248
+
249
+ input_data.context.append(user_msg)
250
+
251
+ current_role = None
252
+ current_messages = []
253
+
235
254
  for msg in input_data.context:
255
+ if msg.role == current_role or current_role is None:
256
+ current_messages.append(msg)
257
+ current_role = msg.role
258
+ else:
259
+ messages.append({
260
+ "role": current_role,
261
+ "content": render_message(merge_messages(current_messages), allow_multipart=multipart)
262
+ })
263
+ current_messages = [msg]
264
+ current_role = msg.role
265
+ if len(current_messages) > 0:
236
266
  messages.append({
237
- "role": msg.role,
238
- "content": render_message(msg, allow_multipart=multipart)
267
+ "role": current_role,
268
+ "content": render_message(merge_messages(current_messages), allow_multipart=multipart)
239
269
  })
240
270
 
241
- user_msg = ContextMessage(role=ContextMessageRole.USER, text=input_data.text, image=input_data.image)
242
- messages.append({
243
- "role": "user",
244
- "content": render_message(user_msg, allow_multipart=multipart)
245
- })
246
-
247
271
  return messages
248
272
 
249
273
 
@@ -567,7 +591,7 @@ def stream_generate(
567
591
  output_cls: type[BaseLLMOutput] = LLMOutput,
568
592
  ) -> Generator[BaseLLMOutput, None, None]:
569
593
  """Stream generate from LLaMA.cpp model with timing and usage tracking."""
570
-
594
+
571
595
  # Create queues for communication between threads
572
596
  response_queue = Queue()
573
597
  error_queue = Queue()
@@ -599,8 +623,6 @@ def stream_generate(
599
623
  completion = model.create_chat_completion(**completion_kwargs)
600
624
 
601
625
  for chunk in completion:
602
- if verbose:
603
- print(chunk)
604
626
  response_queue.put(("chunk", chunk))
605
627
  # Update keep-alive timestamp
606
628
  keep_alive_queue.put(("alive", time.time()))
@@ -609,7 +631,9 @@ def stream_generate(
609
631
  response_queue.put(("done", None))
610
632
 
611
633
  except Exception as e:
612
- error_queue.put(e)
634
+ # Preserve the full exception with traceback
635
+ import sys
636
+ error_queue.put((e, sys.exc_info()[2]))
613
637
  response_queue.put(("error", str(e)))
614
638
 
615
639
  with timing_context() as timing:
@@ -639,14 +663,22 @@ def stream_generate(
639
663
  raise RuntimeError(f"Model failed to initialize within {init_timeout} seconds")
640
664
 
641
665
  while True:
642
- # Check for errors
666
+ # Check for errors - now with proper exception chaining
643
667
  if not error_queue.empty():
644
- raise error_queue.get()
668
+ exc, tb = error_queue.get()
669
+ if isinstance(exc, Exception):
670
+ raise exc.with_traceback(tb)
671
+ else:
672
+ raise RuntimeError(f"Unknown error in worker thread: {exc}")
645
673
 
646
674
  # Check keep-alive
647
- while not keep_alive_queue.empty():
648
- _, timestamp = keep_alive_queue.get_nowait()
649
- last_activity = timestamp
675
+ try:
676
+ while not keep_alive_queue.empty():
677
+ _, timestamp = keep_alive_queue.get_nowait()
678
+ last_activity = timestamp
679
+ except Empty:
680
+ # Ignore empty queue - this is expected
681
+ pass
650
682
 
651
683
  # Check for timeout
652
684
  if time.time() - last_activity > chunk_timeout:
@@ -659,12 +691,17 @@ def stream_generate(
659
691
  continue
660
692
 
661
693
  if msg_type == "error":
694
+ # If we get an error message but no exception in error_queue,
695
+ # create a new error
662
696
  raise RuntimeError(f"Generation error: {data}")
663
697
  elif msg_type == "done":
664
698
  break
665
699
 
666
700
  chunk = data
667
701
 
702
+ if verbose:
703
+ print(chunk)
704
+
668
705
  # Mark first token time
669
706
  if not timing.first_token_time:
670
707
  timing.mark_first_token()
@@ -682,12 +719,17 @@ def stream_generate(
682
719
  break
683
720
 
684
721
  # Wait for generation thread to finish
685
- generation_thread.join(timeout=5.0) # Increased timeout to 5 seconds
686
722
  if generation_thread.is_alive():
687
- # Thread didn't finish - this shouldn't happen normally
688
- # but we handle it gracefully
689
- raise RuntimeError("Generation thread failed to finish")
723
+ generation_thread.join(timeout=5.0) # Increased timeout to 5 seconds
724
+ if generation_thread.is_alive():
725
+ # Thread didn't finish - this shouldn't happen normally
726
+ raise RuntimeError("Generation thread failed to finish")
690
727
 
691
728
  except Exception as e:
692
- # Ensure any error is properly propagated
693
- raise e
729
+ # Check if there's a thread error we should chain with
730
+ if not error_queue.empty():
731
+ thread_exc, thread_tb = error_queue.get()
732
+ if isinstance(thread_exc, Exception):
733
+ raise e from thread_exc
734
+ # If no thread error, raise the original exception
735
+ raise
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.30
3
+ Version: 0.2.32
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
File without changes
File without changes
File without changes
File without changes