inferencesh 0.2.31__py3-none-any.whl → 0.2.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of inferencesh might be problematic. Click here for more details.
- inferencesh/models/llm.py +40 -16
- {inferencesh-0.2.31.dist-info → inferencesh-0.2.32.dist-info}/METADATA +1 -1
- {inferencesh-0.2.31.dist-info → inferencesh-0.2.32.dist-info}/RECORD +7 -7
- {inferencesh-0.2.31.dist-info → inferencesh-0.2.32.dist-info}/WHEEL +0 -0
- {inferencesh-0.2.31.dist-info → inferencesh-0.2.32.dist-info}/entry_points.txt +0 -0
- {inferencesh-0.2.31.dist-info → inferencesh-0.2.32.dist-info}/licenses/LICENSE +0 -0
- {inferencesh-0.2.31.dist-info → inferencesh-0.2.32.dist-info}/top_level.txt +0 -0
inferencesh/models/llm.py
CHANGED
|
@@ -238,7 +238,13 @@ def build_messages(
|
|
|
238
238
|
image = images[0] if images else None # TODO: handle multiple images
|
|
239
239
|
return ContextMessage(role=messages[0].role, text=text, image=image)
|
|
240
240
|
|
|
241
|
-
|
|
241
|
+
user_input_text = ""
|
|
242
|
+
if hasattr(input_data, "text"):
|
|
243
|
+
user_input_text = transform_user_message(input_data.text) if transform_user_message else input_data.text
|
|
244
|
+
user_input_image = None
|
|
245
|
+
if hasattr(input_data, "image"):
|
|
246
|
+
user_input_image = input_data.image
|
|
247
|
+
user_msg = ContextMessage(role=ContextMessageRole.USER, text=user_input_text, image=user_input_image)
|
|
242
248
|
|
|
243
249
|
input_data.context.append(user_msg)
|
|
244
250
|
|
|
@@ -585,7 +591,7 @@ def stream_generate(
|
|
|
585
591
|
output_cls: type[BaseLLMOutput] = LLMOutput,
|
|
586
592
|
) -> Generator[BaseLLMOutput, None, None]:
|
|
587
593
|
"""Stream generate from LLaMA.cpp model with timing and usage tracking."""
|
|
588
|
-
|
|
594
|
+
|
|
589
595
|
# Create queues for communication between threads
|
|
590
596
|
response_queue = Queue()
|
|
591
597
|
error_queue = Queue()
|
|
@@ -617,8 +623,6 @@ def stream_generate(
|
|
|
617
623
|
completion = model.create_chat_completion(**completion_kwargs)
|
|
618
624
|
|
|
619
625
|
for chunk in completion:
|
|
620
|
-
if verbose:
|
|
621
|
-
print(chunk)
|
|
622
626
|
response_queue.put(("chunk", chunk))
|
|
623
627
|
# Update keep-alive timestamp
|
|
624
628
|
keep_alive_queue.put(("alive", time.time()))
|
|
@@ -627,7 +631,9 @@ def stream_generate(
|
|
|
627
631
|
response_queue.put(("done", None))
|
|
628
632
|
|
|
629
633
|
except Exception as e:
|
|
630
|
-
|
|
634
|
+
# Preserve the full exception with traceback
|
|
635
|
+
import sys
|
|
636
|
+
error_queue.put((e, sys.exc_info()[2]))
|
|
631
637
|
response_queue.put(("error", str(e)))
|
|
632
638
|
|
|
633
639
|
with timing_context() as timing:
|
|
@@ -657,14 +663,22 @@ def stream_generate(
|
|
|
657
663
|
raise RuntimeError(f"Model failed to initialize within {init_timeout} seconds")
|
|
658
664
|
|
|
659
665
|
while True:
|
|
660
|
-
# Check for errors
|
|
666
|
+
# Check for errors - now with proper exception chaining
|
|
661
667
|
if not error_queue.empty():
|
|
662
|
-
|
|
668
|
+
exc, tb = error_queue.get()
|
|
669
|
+
if isinstance(exc, Exception):
|
|
670
|
+
raise exc.with_traceback(tb)
|
|
671
|
+
else:
|
|
672
|
+
raise RuntimeError(f"Unknown error in worker thread: {exc}")
|
|
663
673
|
|
|
664
674
|
# Check keep-alive
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
675
|
+
try:
|
|
676
|
+
while not keep_alive_queue.empty():
|
|
677
|
+
_, timestamp = keep_alive_queue.get_nowait()
|
|
678
|
+
last_activity = timestamp
|
|
679
|
+
except Empty:
|
|
680
|
+
# Ignore empty queue - this is expected
|
|
681
|
+
pass
|
|
668
682
|
|
|
669
683
|
# Check for timeout
|
|
670
684
|
if time.time() - last_activity > chunk_timeout:
|
|
@@ -677,12 +691,17 @@ def stream_generate(
|
|
|
677
691
|
continue
|
|
678
692
|
|
|
679
693
|
if msg_type == "error":
|
|
694
|
+
# If we get an error message but no exception in error_queue,
|
|
695
|
+
# create a new error
|
|
680
696
|
raise RuntimeError(f"Generation error: {data}")
|
|
681
697
|
elif msg_type == "done":
|
|
682
698
|
break
|
|
683
699
|
|
|
684
700
|
chunk = data
|
|
685
701
|
|
|
702
|
+
if verbose:
|
|
703
|
+
print(chunk)
|
|
704
|
+
|
|
686
705
|
# Mark first token time
|
|
687
706
|
if not timing.first_token_time:
|
|
688
707
|
timing.mark_first_token()
|
|
@@ -700,12 +719,17 @@ def stream_generate(
|
|
|
700
719
|
break
|
|
701
720
|
|
|
702
721
|
# Wait for generation thread to finish
|
|
703
|
-
generation_thread.join(timeout=5.0) # Increased timeout to 5 seconds
|
|
704
722
|
if generation_thread.is_alive():
|
|
705
|
-
#
|
|
706
|
-
|
|
707
|
-
|
|
723
|
+
generation_thread.join(timeout=5.0) # Increased timeout to 5 seconds
|
|
724
|
+
if generation_thread.is_alive():
|
|
725
|
+
# Thread didn't finish - this shouldn't happen normally
|
|
726
|
+
raise RuntimeError("Generation thread failed to finish")
|
|
708
727
|
|
|
709
728
|
except Exception as e:
|
|
710
|
-
#
|
|
711
|
-
|
|
729
|
+
# Check if there's a thread error we should chain with
|
|
730
|
+
if not error_queue.empty():
|
|
731
|
+
thread_exc, thread_tb = error_queue.get()
|
|
732
|
+
if isinstance(thread_exc, Exception):
|
|
733
|
+
raise e from thread_exc
|
|
734
|
+
# If no thread error, raise the original exception
|
|
735
|
+
raise
|
|
@@ -2,13 +2,13 @@ inferencesh/__init__.py,sha256=WdADtOhfa3HDOunoE9HLFCTFlXRykYstBIH1FpyWvj8,613
|
|
|
2
2
|
inferencesh/models/__init__.py,sha256=FDwcdtT6c4hbRitymjmN-hZMlQa8RbKSftkZZyjtUXA,536
|
|
3
3
|
inferencesh/models/base.py,sha256=4gZQRi8J7y9U6PrGD9pRIehd1MJVJAqGakPQDs2AKFM,3251
|
|
4
4
|
inferencesh/models/file.py,sha256=5xnpypcRahM1YcEjj64rv9g2gTimxrZb41YT4r440hU,7393
|
|
5
|
-
inferencesh/models/llm.py,sha256=
|
|
5
|
+
inferencesh/models/llm.py,sha256=nN0gGcVRB0YS3yQcKi-rPy1Fx3B_blLhS-obWxFbhCE,28264
|
|
6
6
|
inferencesh/utils/__init__.py,sha256=-xiD6uo2XzcrPAWFb_fUbaimmnW4KFKc-8IvBzaxNd4,148
|
|
7
7
|
inferencesh/utils/download.py,sha256=7n5twvoNYDcFnKJyefImaj2YfzRI7vddQw4usZbj38c,1521
|
|
8
8
|
inferencesh/utils/storage.py,sha256=E4J8emd4eFKdmdDgAqzz3TpaaDd3n0l8gYlMHuY8yIU,519
|
|
9
|
-
inferencesh-0.2.
|
|
10
|
-
inferencesh-0.2.
|
|
11
|
-
inferencesh-0.2.
|
|
12
|
-
inferencesh-0.2.
|
|
13
|
-
inferencesh-0.2.
|
|
14
|
-
inferencesh-0.2.
|
|
9
|
+
inferencesh-0.2.32.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
|
|
10
|
+
inferencesh-0.2.32.dist-info/METADATA,sha256=YQmwDhvu8aMtp-QNoka9aAjUqIcltH8pWpz-LIz_uT4,2757
|
|
11
|
+
inferencesh-0.2.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
12
|
+
inferencesh-0.2.32.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
|
|
13
|
+
inferencesh-0.2.32.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
|
|
14
|
+
inferencesh-0.2.32.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|