PyPI - inferencesh - Versions diffs - 0.2.30__tar.gz → 0.2.32__tar.gz - Mend

inferencesh 0.2.30tar.gz → 0.2.32tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of inferencesh might be problematic. Click here for more details.

Files changed (21) hide show

{inferencesh-0.2.30/src/inferencesh.egg-info → inferencesh-0.2.32}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inferencesh
-Version: 0.2.30
+Version: 0.2.32
 Summary: inference.sh Python SDK
 Author: Inference Shell Inc.
 Author-email: "Inference Shell Inc." <hello@inference.sh>

{inferencesh-0.2.30 → inferencesh-0.2.32}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "inferencesh"
-version = "0.2.30"
+version = "0.2.32"
 description = "inference.sh Python SDK"
 authors = [
     {name = "Inference Shell Inc.", email = "hello@inference.sh"},

{inferencesh-0.2.30 → inferencesh-0.2.32}/src/inferencesh/models/llm.py RENAMED Viewed

@@ -232,18 +232,42 @@ def build_messages(
     multipart = any(m.image for m in input_data.context) or input_data.image is not None
     messages = [{"role": "system", "content": input_data.system_prompt}] if input_data.system_prompt is not None and input_data.system_prompt != "" else []
+    def merge_messages(messages: List[ContextMessage]) -> ContextMessage:
+        text = "\n\n".join(msg.text for msg in messages if msg.text)
+        images = [msg.image for msg in messages if msg.image]
+        image = images[0] if images else None # TODO: handle multiple images
+        return ContextMessage(role=messages[0].role, text=text, image=image)
+    user_input_text = ""
+    if hasattr(input_data, "text"):
+        user_input_text = transform_user_message(input_data.text) if transform_user_message else input_data.text
+    user_input_image = None
+    if hasattr(input_data, "image"):
+        user_input_image = input_data.image
+    user_msg = ContextMessage(role=ContextMessageRole.USER, text=user_input_text, image=user_input_image)
+    input_data.context.append(user_msg)
+    current_role = None
+    current_messages = []
     for msg in input_data.context:
+        if msg.role == current_role or current_role is None:
+            current_messages.append(msg)
+            current_role = msg.role
+        else:
+            messages.append({
+                "role": current_role,
+                "content": render_message(merge_messages(current_messages), allow_multipart=multipart)
+            })
+            current_messages = [msg]
+            current_role = msg.role
+    if len(current_messages) > 0:
         messages.append({
-            "role": msg.role,
-            "content": render_message(msg, allow_multipart=multipart)
+            "role": current_role,
+            "content": render_message(merge_messages(current_messages), allow_multipart=multipart)
         })
-    user_msg = ContextMessage(role=ContextMessageRole.USER, text=input_data.text, image=input_data.image)
-    messages.append({
-        "role": "user",
-        "content": render_message(user_msg, allow_multipart=multipart)
-    })
     return messages
@@ -567,7 +591,7 @@ def stream_generate(
     output_cls: type[BaseLLMOutput] = LLMOutput,
 ) -> Generator[BaseLLMOutput, None, None]:
     """Stream generate from LLaMA.cpp model with timing and usage tracking."""
     # Create queues for communication between threads
     response_queue = Queue()
     error_queue = Queue()
@@ -599,8 +623,6 @@ def stream_generate(
             completion = model.create_chat_completion(**completion_kwargs)
             for chunk in completion:
-                if verbose:
-                    print(chunk)
                 response_queue.put(("chunk", chunk))
                 # Update keep-alive timestamp
                 keep_alive_queue.put(("alive", time.time()))
@@ -609,7 +631,9 @@ def stream_generate(
             response_queue.put(("done", None))
         except Exception as e:
-            error_queue.put(e)
+            # Preserve the full exception with traceback
+            import sys
+            error_queue.put((e, sys.exc_info()[2]))
             response_queue.put(("error", str(e)))
     with timing_context() as timing:
@@ -639,14 +663,22 @@ def stream_generate(
                 raise RuntimeError(f"Model failed to initialize within {init_timeout} seconds")
             while True:
-                # Check for errors
+                # Check for errors - now with proper exception chaining
                 if not error_queue.empty():
-                    raise error_queue.get()
+                    exc, tb = error_queue.get()
+                    if isinstance(exc, Exception):
+                        raise exc.with_traceback(tb)
+                    else:
+                        raise RuntimeError(f"Unknown error in worker thread: {exc}")
                 # Check keep-alive
-                while not keep_alive_queue.empty():
-                    _, timestamp = keep_alive_queue.get_nowait()
-                    last_activity = timestamp
+                try:
+                    while not keep_alive_queue.empty():
+                        _, timestamp = keep_alive_queue.get_nowait()
+                        last_activity = timestamp
+                except Empty:
+                    # Ignore empty queue - this is expected
+                    pass
                 # Check for timeout
                 if time.time() - last_activity > chunk_timeout:
@@ -659,12 +691,17 @@ def stream_generate(
                     continue
                 if msg_type == "error":
+                    # If we get an error message but no exception in error_queue,
+                    # create a new error
                     raise RuntimeError(f"Generation error: {data}")
                 elif msg_type == "done":
                     break
                 chunk = data
+                if verbose:
+                    print(chunk)
                 # Mark first token time
                 if not timing.first_token_time:
                     timing.mark_first_token()
@@ -682,12 +719,17 @@ def stream_generate(
                     break
             # Wait for generation thread to finish
-            generation_thread.join(timeout=5.0)  # Increased timeout to 5 seconds
             if generation_thread.is_alive():
-                # Thread didn't finish - this shouldn't happen normally
-                # but we handle it gracefully
-                raise RuntimeError("Generation thread failed to finish")
+                generation_thread.join(timeout=5.0)  # Increased timeout to 5 seconds
+                if generation_thread.is_alive():
+                    # Thread didn't finish - this shouldn't happen normally
+                    raise RuntimeError("Generation thread failed to finish")
         except Exception as e:
-            # Ensure any error is properly propagated
-            raise e
+            # Check if there's a thread error we should chain with
+            if not error_queue.empty():
+                thread_exc, thread_tb = error_queue.get()
+                if isinstance(thread_exc, Exception):
+                    raise e from thread_exc
+            # If no thread error, raise the original exception
+            raise

{inferencesh-0.2.30 → inferencesh-0.2.32/src/inferencesh.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inferencesh
-Version: 0.2.30
+Version: 0.2.32
 Summary: inference.sh Python SDK
 Author: Inference Shell Inc.
 Author-email: "Inference Shell Inc." <hello@inference.sh>