inferencesh 0.2.30__py3-none-any.whl → 0.2.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of inferencesh might be problematic. Click here for more details.
- inferencesh/models/llm.py +65 -23
- {inferencesh-0.2.30.dist-info → inferencesh-0.2.32.dist-info}/METADATA +1 -1
- {inferencesh-0.2.30.dist-info → inferencesh-0.2.32.dist-info}/RECORD +7 -7
- {inferencesh-0.2.30.dist-info → inferencesh-0.2.32.dist-info}/WHEEL +0 -0
- {inferencesh-0.2.30.dist-info → inferencesh-0.2.32.dist-info}/entry_points.txt +0 -0
- {inferencesh-0.2.30.dist-info → inferencesh-0.2.32.dist-info}/licenses/LICENSE +0 -0
- {inferencesh-0.2.30.dist-info → inferencesh-0.2.32.dist-info}/top_level.txt +0 -0
inferencesh/models/llm.py
CHANGED
|
@@ -232,18 +232,42 @@ def build_messages(
|
|
|
232
232
|
multipart = any(m.image for m in input_data.context) or input_data.image is not None
|
|
233
233
|
messages = [{"role": "system", "content": input_data.system_prompt}] if input_data.system_prompt is not None and input_data.system_prompt != "" else []
|
|
234
234
|
|
|
235
|
+
def merge_messages(messages: List[ContextMessage]) -> ContextMessage:
|
|
236
|
+
text = "\n\n".join(msg.text for msg in messages if msg.text)
|
|
237
|
+
images = [msg.image for msg in messages if msg.image]
|
|
238
|
+
image = images[0] if images else None # TODO: handle multiple images
|
|
239
|
+
return ContextMessage(role=messages[0].role, text=text, image=image)
|
|
240
|
+
|
|
241
|
+
user_input_text = ""
|
|
242
|
+
if hasattr(input_data, "text"):
|
|
243
|
+
user_input_text = transform_user_message(input_data.text) if transform_user_message else input_data.text
|
|
244
|
+
user_input_image = None
|
|
245
|
+
if hasattr(input_data, "image"):
|
|
246
|
+
user_input_image = input_data.image
|
|
247
|
+
user_msg = ContextMessage(role=ContextMessageRole.USER, text=user_input_text, image=user_input_image)
|
|
248
|
+
|
|
249
|
+
input_data.context.append(user_msg)
|
|
250
|
+
|
|
251
|
+
current_role = None
|
|
252
|
+
current_messages = []
|
|
253
|
+
|
|
235
254
|
for msg in input_data.context:
|
|
255
|
+
if msg.role == current_role or current_role is None:
|
|
256
|
+
current_messages.append(msg)
|
|
257
|
+
current_role = msg.role
|
|
258
|
+
else:
|
|
259
|
+
messages.append({
|
|
260
|
+
"role": current_role,
|
|
261
|
+
"content": render_message(merge_messages(current_messages), allow_multipart=multipart)
|
|
262
|
+
})
|
|
263
|
+
current_messages = [msg]
|
|
264
|
+
current_role = msg.role
|
|
265
|
+
if len(current_messages) > 0:
|
|
236
266
|
messages.append({
|
|
237
|
-
"role":
|
|
238
|
-
"content": render_message(
|
|
267
|
+
"role": current_role,
|
|
268
|
+
"content": render_message(merge_messages(current_messages), allow_multipart=multipart)
|
|
239
269
|
})
|
|
240
270
|
|
|
241
|
-
user_msg = ContextMessage(role=ContextMessageRole.USER, text=input_data.text, image=input_data.image)
|
|
242
|
-
messages.append({
|
|
243
|
-
"role": "user",
|
|
244
|
-
"content": render_message(user_msg, allow_multipart=multipart)
|
|
245
|
-
})
|
|
246
|
-
|
|
247
271
|
return messages
|
|
248
272
|
|
|
249
273
|
|
|
@@ -567,7 +591,7 @@ def stream_generate(
|
|
|
567
591
|
output_cls: type[BaseLLMOutput] = LLMOutput,
|
|
568
592
|
) -> Generator[BaseLLMOutput, None, None]:
|
|
569
593
|
"""Stream generate from LLaMA.cpp model with timing and usage tracking."""
|
|
570
|
-
|
|
594
|
+
|
|
571
595
|
# Create queues for communication between threads
|
|
572
596
|
response_queue = Queue()
|
|
573
597
|
error_queue = Queue()
|
|
@@ -599,8 +623,6 @@ def stream_generate(
|
|
|
599
623
|
completion = model.create_chat_completion(**completion_kwargs)
|
|
600
624
|
|
|
601
625
|
for chunk in completion:
|
|
602
|
-
if verbose:
|
|
603
|
-
print(chunk)
|
|
604
626
|
response_queue.put(("chunk", chunk))
|
|
605
627
|
# Update keep-alive timestamp
|
|
606
628
|
keep_alive_queue.put(("alive", time.time()))
|
|
@@ -609,7 +631,9 @@ def stream_generate(
|
|
|
609
631
|
response_queue.put(("done", None))
|
|
610
632
|
|
|
611
633
|
except Exception as e:
|
|
612
|
-
|
|
634
|
+
# Preserve the full exception with traceback
|
|
635
|
+
import sys
|
|
636
|
+
error_queue.put((e, sys.exc_info()[2]))
|
|
613
637
|
response_queue.put(("error", str(e)))
|
|
614
638
|
|
|
615
639
|
with timing_context() as timing:
|
|
@@ -639,14 +663,22 @@ def stream_generate(
|
|
|
639
663
|
raise RuntimeError(f"Model failed to initialize within {init_timeout} seconds")
|
|
640
664
|
|
|
641
665
|
while True:
|
|
642
|
-
# Check for errors
|
|
666
|
+
# Check for errors - now with proper exception chaining
|
|
643
667
|
if not error_queue.empty():
|
|
644
|
-
|
|
668
|
+
exc, tb = error_queue.get()
|
|
669
|
+
if isinstance(exc, Exception):
|
|
670
|
+
raise exc.with_traceback(tb)
|
|
671
|
+
else:
|
|
672
|
+
raise RuntimeError(f"Unknown error in worker thread: {exc}")
|
|
645
673
|
|
|
646
674
|
# Check keep-alive
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
675
|
+
try:
|
|
676
|
+
while not keep_alive_queue.empty():
|
|
677
|
+
_, timestamp = keep_alive_queue.get_nowait()
|
|
678
|
+
last_activity = timestamp
|
|
679
|
+
except Empty:
|
|
680
|
+
# Ignore empty queue - this is expected
|
|
681
|
+
pass
|
|
650
682
|
|
|
651
683
|
# Check for timeout
|
|
652
684
|
if time.time() - last_activity > chunk_timeout:
|
|
@@ -659,12 +691,17 @@ def stream_generate(
|
|
|
659
691
|
continue
|
|
660
692
|
|
|
661
693
|
if msg_type == "error":
|
|
694
|
+
# If we get an error message but no exception in error_queue,
|
|
695
|
+
# create a new error
|
|
662
696
|
raise RuntimeError(f"Generation error: {data}")
|
|
663
697
|
elif msg_type == "done":
|
|
664
698
|
break
|
|
665
699
|
|
|
666
700
|
chunk = data
|
|
667
701
|
|
|
702
|
+
if verbose:
|
|
703
|
+
print(chunk)
|
|
704
|
+
|
|
668
705
|
# Mark first token time
|
|
669
706
|
if not timing.first_token_time:
|
|
670
707
|
timing.mark_first_token()
|
|
@@ -682,12 +719,17 @@ def stream_generate(
|
|
|
682
719
|
break
|
|
683
720
|
|
|
684
721
|
# Wait for generation thread to finish
|
|
685
|
-
generation_thread.join(timeout=5.0) # Increased timeout to 5 seconds
|
|
686
722
|
if generation_thread.is_alive():
|
|
687
|
-
#
|
|
688
|
-
|
|
689
|
-
|
|
723
|
+
generation_thread.join(timeout=5.0) # Increased timeout to 5 seconds
|
|
724
|
+
if generation_thread.is_alive():
|
|
725
|
+
# Thread didn't finish - this shouldn't happen normally
|
|
726
|
+
raise RuntimeError("Generation thread failed to finish")
|
|
690
727
|
|
|
691
728
|
except Exception as e:
|
|
692
|
-
#
|
|
693
|
-
|
|
729
|
+
# Check if there's a thread error we should chain with
|
|
730
|
+
if not error_queue.empty():
|
|
731
|
+
thread_exc, thread_tb = error_queue.get()
|
|
732
|
+
if isinstance(thread_exc, Exception):
|
|
733
|
+
raise e from thread_exc
|
|
734
|
+
# If no thread error, raise the original exception
|
|
735
|
+
raise
|
|
@@ -2,13 +2,13 @@ inferencesh/__init__.py,sha256=WdADtOhfa3HDOunoE9HLFCTFlXRykYstBIH1FpyWvj8,613
|
|
|
2
2
|
inferencesh/models/__init__.py,sha256=FDwcdtT6c4hbRitymjmN-hZMlQa8RbKSftkZZyjtUXA,536
|
|
3
3
|
inferencesh/models/base.py,sha256=4gZQRi8J7y9U6PrGD9pRIehd1MJVJAqGakPQDs2AKFM,3251
|
|
4
4
|
inferencesh/models/file.py,sha256=5xnpypcRahM1YcEjj64rv9g2gTimxrZb41YT4r440hU,7393
|
|
5
|
-
inferencesh/models/llm.py,sha256=
|
|
5
|
+
inferencesh/models/llm.py,sha256=nN0gGcVRB0YS3yQcKi-rPy1Fx3B_blLhS-obWxFbhCE,28264
|
|
6
6
|
inferencesh/utils/__init__.py,sha256=-xiD6uo2XzcrPAWFb_fUbaimmnW4KFKc-8IvBzaxNd4,148
|
|
7
7
|
inferencesh/utils/download.py,sha256=7n5twvoNYDcFnKJyefImaj2YfzRI7vddQw4usZbj38c,1521
|
|
8
8
|
inferencesh/utils/storage.py,sha256=E4J8emd4eFKdmdDgAqzz3TpaaDd3n0l8gYlMHuY8yIU,519
|
|
9
|
-
inferencesh-0.2.
|
|
10
|
-
inferencesh-0.2.
|
|
11
|
-
inferencesh-0.2.
|
|
12
|
-
inferencesh-0.2.
|
|
13
|
-
inferencesh-0.2.
|
|
14
|
-
inferencesh-0.2.
|
|
9
|
+
inferencesh-0.2.32.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
|
|
10
|
+
inferencesh-0.2.32.dist-info/METADATA,sha256=YQmwDhvu8aMtp-QNoka9aAjUqIcltH8pWpz-LIz_uT4,2757
|
|
11
|
+
inferencesh-0.2.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
12
|
+
inferencesh-0.2.32.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
|
|
13
|
+
inferencesh-0.2.32.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
|
|
14
|
+
inferencesh-0.2.32.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|