inferencesh 0.2.31__tar.gz → 0.2.33__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of inferencesh might be problematic. Click here for more details.
- {inferencesh-0.2.31/src/inferencesh.egg-info → inferencesh-0.2.33}/PKG-INFO +1 -1
- {inferencesh-0.2.31 → inferencesh-0.2.33}/pyproject.toml +1 -1
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh/models/llm.py +44 -17
- {inferencesh-0.2.31 → inferencesh-0.2.33/src/inferencesh.egg-info}/PKG-INFO +1 -1
- {inferencesh-0.2.31 → inferencesh-0.2.33}/LICENSE +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/README.md +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/setup.cfg +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/setup.py +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh/__init__.py +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh/models/__init__.py +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh/models/base.py +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh/models/file.py +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh/utils/__init__.py +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh/utils/download.py +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh/utils/storage.py +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh.egg-info/SOURCES.txt +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh.egg-info/dependency_links.txt +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh.egg-info/entry_points.txt +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh.egg-info/requires.txt +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/src/inferencesh.egg-info/top_level.txt +0 -0
- {inferencesh-0.2.31 → inferencesh-0.2.33}/tests/test_sdk.py +0 -0
|
@@ -229,7 +229,6 @@ def build_messages(
|
|
|
229
229
|
return parts[0]["text"]
|
|
230
230
|
raise ValueError("Image content requires multipart support")
|
|
231
231
|
|
|
232
|
-
multipart = any(m.image for m in input_data.context) or input_data.image is not None
|
|
233
232
|
messages = [{"role": "system", "content": input_data.system_prompt}] if input_data.system_prompt is not None and input_data.system_prompt != "" else []
|
|
234
233
|
|
|
235
234
|
def merge_messages(messages: List[ContextMessage]) -> ContextMessage:
|
|
@@ -238,7 +237,17 @@ def build_messages(
|
|
|
238
237
|
image = images[0] if images else None # TODO: handle multiple images
|
|
239
238
|
return ContextMessage(role=messages[0].role, text=text, image=image)
|
|
240
239
|
|
|
241
|
-
|
|
240
|
+
user_input_text = ""
|
|
241
|
+
if hasattr(input_data, "text"):
|
|
242
|
+
user_input_text = transform_user_message(input_data.text) if transform_user_message else input_data.text
|
|
243
|
+
|
|
244
|
+
user_input_image = None
|
|
245
|
+
multipart = any(m.image for m in input_data.context)
|
|
246
|
+
if hasattr(input_data, "image"):
|
|
247
|
+
user_input_image = input_data.image
|
|
248
|
+
multipart = multipart or input_data.image is not None
|
|
249
|
+
|
|
250
|
+
user_msg = ContextMessage(role=ContextMessageRole.USER, text=user_input_text, image=user_input_image)
|
|
242
251
|
|
|
243
252
|
input_data.context.append(user_msg)
|
|
244
253
|
|
|
@@ -585,7 +594,7 @@ def stream_generate(
|
|
|
585
594
|
output_cls: type[BaseLLMOutput] = LLMOutput,
|
|
586
595
|
) -> Generator[BaseLLMOutput, None, None]:
|
|
587
596
|
"""Stream generate from LLaMA.cpp model with timing and usage tracking."""
|
|
588
|
-
|
|
597
|
+
|
|
589
598
|
# Create queues for communication between threads
|
|
590
599
|
response_queue = Queue()
|
|
591
600
|
error_queue = Queue()
|
|
@@ -617,8 +626,6 @@ def stream_generate(
|
|
|
617
626
|
completion = model.create_chat_completion(**completion_kwargs)
|
|
618
627
|
|
|
619
628
|
for chunk in completion:
|
|
620
|
-
if verbose:
|
|
621
|
-
print(chunk)
|
|
622
629
|
response_queue.put(("chunk", chunk))
|
|
623
630
|
# Update keep-alive timestamp
|
|
624
631
|
keep_alive_queue.put(("alive", time.time()))
|
|
@@ -627,7 +634,9 @@ def stream_generate(
|
|
|
627
634
|
response_queue.put(("done", None))
|
|
628
635
|
|
|
629
636
|
except Exception as e:
|
|
630
|
-
|
|
637
|
+
# Preserve the full exception with traceback
|
|
638
|
+
import sys
|
|
639
|
+
error_queue.put((e, sys.exc_info()[2]))
|
|
631
640
|
response_queue.put(("error", str(e)))
|
|
632
641
|
|
|
633
642
|
with timing_context() as timing:
|
|
@@ -657,14 +666,22 @@ def stream_generate(
|
|
|
657
666
|
raise RuntimeError(f"Model failed to initialize within {init_timeout} seconds")
|
|
658
667
|
|
|
659
668
|
while True:
|
|
660
|
-
# Check for errors
|
|
669
|
+
# Check for errors - now with proper exception chaining
|
|
661
670
|
if not error_queue.empty():
|
|
662
|
-
|
|
671
|
+
exc, tb = error_queue.get()
|
|
672
|
+
if isinstance(exc, Exception):
|
|
673
|
+
raise exc.with_traceback(tb)
|
|
674
|
+
else:
|
|
675
|
+
raise RuntimeError(f"Unknown error in worker thread: {exc}")
|
|
663
676
|
|
|
664
677
|
# Check keep-alive
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
678
|
+
try:
|
|
679
|
+
while not keep_alive_queue.empty():
|
|
680
|
+
_, timestamp = keep_alive_queue.get_nowait()
|
|
681
|
+
last_activity = timestamp
|
|
682
|
+
except Empty:
|
|
683
|
+
# Ignore empty queue - this is expected
|
|
684
|
+
pass
|
|
668
685
|
|
|
669
686
|
# Check for timeout
|
|
670
687
|
if time.time() - last_activity > chunk_timeout:
|
|
@@ -677,12 +694,17 @@ def stream_generate(
|
|
|
677
694
|
continue
|
|
678
695
|
|
|
679
696
|
if msg_type == "error":
|
|
697
|
+
# If we get an error message but no exception in error_queue,
|
|
698
|
+
# create a new error
|
|
680
699
|
raise RuntimeError(f"Generation error: {data}")
|
|
681
700
|
elif msg_type == "done":
|
|
682
701
|
break
|
|
683
702
|
|
|
684
703
|
chunk = data
|
|
685
704
|
|
|
705
|
+
if verbose:
|
|
706
|
+
print(chunk)
|
|
707
|
+
|
|
686
708
|
# Mark first token time
|
|
687
709
|
if not timing.first_token_time:
|
|
688
710
|
timing.mark_first_token()
|
|
@@ -700,12 +722,17 @@ def stream_generate(
|
|
|
700
722
|
break
|
|
701
723
|
|
|
702
724
|
# Wait for generation thread to finish
|
|
703
|
-
generation_thread.join(timeout=5.0) # Increased timeout to 5 seconds
|
|
704
725
|
if generation_thread.is_alive():
|
|
705
|
-
#
|
|
706
|
-
|
|
707
|
-
|
|
726
|
+
generation_thread.join(timeout=5.0) # Increased timeout to 5 seconds
|
|
727
|
+
if generation_thread.is_alive():
|
|
728
|
+
# Thread didn't finish - this shouldn't happen normally
|
|
729
|
+
raise RuntimeError("Generation thread failed to finish")
|
|
708
730
|
|
|
709
731
|
except Exception as e:
|
|
710
|
-
#
|
|
711
|
-
|
|
732
|
+
# Check if there's a thread error we should chain with
|
|
733
|
+
if not error_queue.empty():
|
|
734
|
+
thread_exc, thread_tb = error_queue.get()
|
|
735
|
+
if isinstance(thread_exc, Exception):
|
|
736
|
+
raise e from thread_exc
|
|
737
|
+
# If no thread error, raise the original exception
|
|
738
|
+
raise
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|