PyPI - inferencesh - Versions diffs - 0.2.31__py3-none-any.whl → 0.4.29__py3-none-any.whl - Mend

inferencesh 0.2.31py3-none-any.whl → 0.4.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

inferencesh/__init__.py +5 -0
inferencesh/client.py +1081 -0
inferencesh/models/base.py +81 -3
inferencesh/models/file.py +120 -21
inferencesh/models/llm.py +251 -77
inferencesh/utils/download.py +15 -7
inferencesh-0.4.29.dist-info/METADATA +196 -0
inferencesh-0.4.29.dist-info/RECORD +15 -0
inferencesh-0.2.31.dist-info/METADATA +0 -105
inferencesh-0.2.31.dist-info/RECORD +0 -14
{inferencesh-0.2.31.dist-info → inferencesh-0.4.29.dist-info}/WHEEL +0 -0
{inferencesh-0.2.31.dist-info → inferencesh-0.4.29.dist-info}/entry_points.txt +0 -0
{inferencesh-0.2.31.dist-info → inferencesh-0.4.29.dist-info}/licenses/LICENSE +0 -0
{inferencesh-0.2.31.dist-info → inferencesh-0.4.29.dist-info}/top_level.txt +0 -0

inferencesh/models/llm.py CHANGED Viewed

@@ -6,6 +6,7 @@ from threading import Thread
 import time
 from contextlib import contextmanager
 import base64
+import json
 from .base import BaseAppInput, BaseAppOutput
 from .file import File
@@ -14,40 +15,48 @@ class ContextMessageRole(str, Enum):
     USER = "user"
     ASSISTANT = "assistant"
     SYSTEM = "system"
+    TOOL = "tool"
 class Message(BaseAppInput):
     role: ContextMessageRole
     content: str
 class ContextMessage(BaseAppInput):
     role: ContextMessageRole = Field(
-        description="The role of the message",
+        description="the role of the message. user, assistant, or system",
     )
     text: str = Field(
-        description="The text content of the message"
+        description="the text content of the message"
     )
     image: Optional[File] = Field(
-        description="The image url of the message",
+        description="the image file of the message",
+        default=None
+    )
+    images: Optional[List[File]] = Field(
+        description="the images of the message",
+        default=None
+    )
+    tool_calls: Optional[List[Dict[str, Any]]] = Field(
+        description="the tool calls of the message",
+        default=None
+    )
+    tool_call_id: Optional[str] = Field(
+        description="the tool call id for tool role messages",
         default=None
     )
 class BaseLLMInput(BaseAppInput):
     """Base class with common LLM fields."""
     system_prompt: str = Field(
-        description="The system prompt to use for the model",
-        default="You are a helpful assistant that can answer questions and help with tasks.",
+        description="the system prompt to use for the model",
+        default="you are a helpful assistant that can answer questions and help with tasks.",
         examples=[
-            "You are a helpful assistant that can answer questions and help with tasks.",
-            "You are a certified medical professional who can provide accurate health information.",
-            "You are a certified financial advisor who can give sound investment guidance.",
-            "You are a certified cybersecurity expert who can explain security best practices.",
-            "You are a certified environmental scientist who can discuss climate and sustainability.",
+            "you are a helpful assistant that can answer questions and help with tasks.",
         ]
     )
     context: List[ContextMessage] = Field(
-        description="The context to use for the model",
+        description="the context to use for the model",
         default=[],
         examples=[
             [
@@ -56,38 +65,50 @@ class BaseLLMInput(BaseAppInput):
             ]
         ]
     )
+    role: ContextMessageRole = Field(
+        description="the role of the input text",
+        default=ContextMessageRole.USER
+    )
     text: str = Field(
-        description="The user prompt to use for the model",
+        description="the input text to use for the model",
         examples=[
-            "What is the capital of France?",
-            "What is the weather like today?",
-            "Can you help me write a poem about spring?",
-            "Explain quantum computing in simple terms"
+            "write a haiku about artificial general intelligence"
         ]
     )
-    temperature: float = Field(default=0.7)
-    top_p: float = Field(default=0.95)
-    max_tokens: int = Field(default=4096)
+    temperature: float = Field(default=0.7, ge=0.0, le=1.0)
+    top_p: float = Field(default=0.95, ge=0.0, le=1.0)
     context_size: int = Field(default=4096)
 class ImageCapabilityMixin(BaseModel):
     """Mixin for models that support image inputs."""
     image: Optional[File] = Field(
-        description="The image to use for the model",
-        default=None
+        description="the image to use for the model",
+        default=None,
+        contentMediaType="image/*",
+    )
+class MultipleImageCapabilityMixin(BaseModel):
+    """Mixin for models that support image inputs."""
+    images: Optional[List[File]] = Field(
+        description="the images to use for the model",
+        default=None,
     )
 class ReasoningCapabilityMixin(BaseModel):
     """Mixin for models that support reasoning."""
     reasoning: bool = Field(
-        description="Enable step-by-step reasoning",
+        description="enable step-by-step reasoning",
         default=False
     )
 class ToolsCapabilityMixin(BaseModel):
     """Mixin for models that support tool/function calling."""
     tools: Optional[List[Dict[str, Any]]] = Field(
-        description="Tool definitions for function calling",
+        description="tool definitions for function calling",
+        default=None
+    )
+    tool_call_id: Optional[str] = Field(
+        description="the tool call id for tool role messages",
         default=None
     )
@@ -112,26 +133,26 @@ class LLMUsage(BaseAppOutput):
 class BaseLLMOutput(BaseAppOutput):
     """Base class for LLM outputs with common fields."""
-    response: str = Field(description="The generated text response")
+    response: str = Field(description="the generated text response")
 class LLMUsageMixin(BaseModel):
     """Mixin for models that provide token usage statistics."""
     usage: Optional[LLMUsage] = Field(
-        description="Token usage statistics",
+        description="token usage statistics",
         default=None
     )
 class ReasoningMixin(BaseModel):
     """Mixin for models that support reasoning."""
     reasoning: Optional[str] = Field(
-        description="The reasoning output of the model",
+        description="the reasoning output of the model",
         default=None
     )
 class ToolCallsMixin(BaseModel):
     """Mixin for models that support tool calls."""
     tool_calls: Optional[List[Dict[str, Any]]] = Field(
-        description="Tool calls for function calling",
+        description="tool calls for function calling",
         default=None
     )
@@ -217,28 +238,75 @@ def build_messages(
         text = transform_user_message(msg.text) if transform_user_message and msg.role == ContextMessageRole.USER else msg.text
         if text:
             parts.append({"type": "text", "text": text})
+        else:
+            parts.append({"type": "text", "text": ""})
         if msg.image:
             if msg.image.path:
                 image_data_uri = image_to_base64_data_uri(msg.image.path)
                 parts.append({"type": "image_url", "image_url": {"url": image_data_uri}})
             elif msg.image.uri:
                 parts.append({"type": "image_url", "image_url": {"url": msg.image.uri}})
+        if msg.images:
+            for image in msg.images:
+                if image.path:
+                    image_data_uri = image_to_base64_data_uri(image.path)
+                    parts.append({"type": "image_url", "image_url": {"url": image_data_uri}})
+                elif image.uri:
+                    parts.append({"type": "image_url", "image_url": {"url": image.uri}})
         if allow_multipart:
             return parts
         if len(parts) == 1 and parts[0]["type"] == "text":
             return parts[0]["text"]
-        raise ValueError("Image content requires multipart support")
+        if len(parts) > 1:
+            if parts.any(lambda x: x["type"] == "image_url"):
+                raise ValueError("Image content requires multipart support")
+            return parts
+        raise ValueError("Invalid message content")
-    multipart = any(m.image for m in input_data.context) or input_data.image is not None
     messages = [{"role": "system", "content": input_data.system_prompt}] if input_data.system_prompt is not None and input_data.system_prompt != "" else []
     def merge_messages(messages: List[ContextMessage]) -> ContextMessage:
         text = "\n\n".join(msg.text for msg in messages if msg.text)
-        images = [msg.image for msg in messages if msg.image]
-        image = images[0] if images else None # TODO: handle multiple images
-        return ContextMessage(role=messages[0].role, text=text, image=image)
+        images = []
+        # Collect single images
+        for msg in messages:
+            if msg.image:
+                images.append(msg.image)
+        # Collect multiple images (flatten the list)
+        for msg in messages:
+            if msg.images:
+                images.extend(msg.images)
+        # Set image to single File if there's exactly one, otherwise None
+        image = images[0] if len(images) == 1 else None
+        # Set images to the list if there are multiple, otherwise None
+        images_list = images if len(images) > 1 else None
+        return ContextMessage(role=messages[0].role, text=text, image=image, images=images_list)
+    def merge_tool_calls(messages: List[ContextMessage]) -> List[Dict[str, Any]]:
+        tool_calls = []
+        for msg in messages:
+            if msg.tool_calls:
+                tool_calls.extend(msg.tool_calls)
+        return tool_calls
+    user_input_text = ""
+    if hasattr(input_data, "text"):
+        user_input_text = transform_user_message(input_data.text) if transform_user_message else input_data.text
+    user_input_image = None
+    multipart = any(m.image for m in input_data.context)
+    if hasattr(input_data, "image"):
+        user_input_image = input_data.image
+        multipart = multipart or input_data.image is not None
+    user_input_images = None
+    if hasattr(input_data, "images"):
+        user_input_images = input_data.images
+        multipart = multipart or input_data.images is not None
-    user_msg = ContextMessage(role=ContextMessageRole.USER, text=input_data.text, image=input_data.image)
+    input_role = input_data.role if hasattr(input_data, "role") else ContextMessageRole.USER
+    input_tool_call_id = input_data.tool_call_id if hasattr(input_data, "tool_call_id") else None
+    user_msg = ContextMessage(role=input_role, text=user_input_text, image=user_input_image, images=user_input_images, tool_call_id=input_tool_call_id)
     input_data.context.append(user_msg)
@@ -250,21 +318,104 @@ def build_messages(
             current_messages.append(msg)
             current_role = msg.role
         else:
-            messages.append({
-                "role": current_role,
-                "content": render_message(merge_messages(current_messages), allow_multipart=multipart)
-            })
+            # Convert role enum to string for OpenAI API compatibility
+            role_str = current_role.value if hasattr(current_role, "value") else current_role
+            msg_dict = {
+                "role": role_str,
+                "content": render_message(merge_messages(current_messages), allow_multipart=multipart),
+            }
+            # Only add tool_calls if not empty
+            tool_calls = merge_tool_calls(current_messages)
+            if tool_calls:
+                # Ensure arguments are JSON strings (OpenAI API requirement)
+                for tc in tool_calls:
+                    if "function" in tc and "arguments" in tc["function"]:
+                        if isinstance(tc["function"]["arguments"], dict):
+                            tc["function"]["arguments"] = json.dumps(tc["function"]["arguments"])
+                msg_dict["tool_calls"] = tool_calls
+            # Add tool_call_id for tool role messages (required by OpenAI API)
+            if role_str == "tool":
+                if current_messages and current_messages[0].tool_call_id:
+                    msg_dict["tool_call_id"] = current_messages[0].tool_call_id
+                else:
+                    # If not provided, use empty string to satisfy schema
+                    msg_dict["tool_call_id"] = ""
+            messages.append(msg_dict)
             current_messages = [msg]
             current_role = msg.role
     if len(current_messages) > 0:
-        messages.append({
-            "role": current_role,
-            "content": render_message(merge_messages(current_messages), allow_multipart=multipart)
-        })
+        # Convert role enum to string for OpenAI API compatibility
+        role_str = current_role.value if hasattr(current_role, "value") else current_role
+        msg_dict = {
+            "role": role_str,
+            "content": render_message(merge_messages(current_messages), allow_multipart=multipart),
+        }
+        # Only add tool_calls if not empty
+        tool_calls = merge_tool_calls(current_messages)
+        if tool_calls:
+            # Ensure arguments are JSON strings (OpenAI API requirement)
+            for tc in tool_calls:
+                if "function" in tc and "arguments" in tc["function"]:
+                    if isinstance(tc["function"]["arguments"], dict):
+                        tc["function"]["arguments"] = json.dumps(tc["function"]["arguments"])
+            msg_dict["tool_calls"] = tool_calls
+        # Add tool_call_id for tool role messages (required by OpenAI API)
+        if role_str == "tool":
+            if current_messages and current_messages[0].tool_call_id:
+                msg_dict["tool_call_id"] = current_messages[0].tool_call_id
+            else:
+                # If not provided, use empty string to satisfy schema
+                msg_dict["tool_call_id"] = ""
+        messages.append(msg_dict)
     return messages
+def build_tools(tools: Optional[List[Dict[str, Any]]]) -> Optional[List[Dict[str, Any]]]:
+    """Build tools in OpenAI API format.
+    Ensures tools are properly formatted:
+    - Wrapped in {"type": "function", "function": {...}}
+    - Parameters is never None (OpenAI API requirement)
+    """
+    if not tools:
+        return None
+    result = []
+    for tool in tools:
+        # Extract function definition
+        if "type" in tool and "function" in tool:
+            func_def = tool["function"].copy()
+        else:
+            func_def = tool.copy()
+        # Ensure parameters is not None (OpenAI API requirement)
+        if func_def.get("parameters") is None:
+            func_def["parameters"] = {"type": "object", "properties": {}}
+        # Also ensure properties within parameters is not None
+        elif func_def["parameters"].get("properties") is None:
+            func_def["parameters"]["properties"] = {}
+        else:
+            # Remove properties with null values (OpenAI API doesn't accept them)
+            properties = func_def["parameters"].get("properties", {})
+            if properties:
+                func_def["parameters"]["properties"] = {
+                    k: v for k, v in properties.items() if v is not None
+                }
+        # Wrap in OpenAI format
+        result.append({"type": "function", "function": func_def})
+    return result
 class StreamResponse:
     """Holds a single chunk of streamed response."""
     def __init__(self):
@@ -462,26 +613,27 @@ class ResponseTransformer:
             text: Cleaned text to process for reasoning
         """
         # Default implementation for <think> style reasoning
-        if "<think>" in text and not self.state.state_changes["reasoning_started"]:
+        # Check for tags in the complete buffer
+        if "<think>" in self.state.buffer and not self.state.state_changes["reasoning_started"]:
             self.state.state_changes["reasoning_started"] = True
             if self.timing:
                 self.timing.start_reasoning()
-        if "</think>" in text and not self.state.state_changes["reasoning_ended"]:
-            self.state.state_changes["reasoning_ended"] = True
-            if self.timing:
-                # Estimate token count from character count (rough approximation)
-                token_count = len(self.state.buffer.split("<think>")[1].split("</think>")[0]) // 4
-                self.timing.end_reasoning(token_count)
-        if "<think>" in self.state.buffer:
-            parts = self.state.buffer.split("</think>", 1)
-            if len(parts) > 1:
-                self.state.reasoning = parts[0].split("<think>", 1)[1].strip()
-                self.state.response = parts[1].strip()
-            else:
-                self.state.reasoning = self.state.buffer.split("<think>", 1)[1].strip()
-                self.state.response = ""
+        # Extract content and handle end of reasoning
+        parts = self.state.buffer.split("<think>", 1)
+        if len(parts) > 1:
+            reasoning_text = parts[1]
+            end_parts = reasoning_text.split("</think>", 1)
+            self.state.reasoning = end_parts[0].strip()
+            self.state.response = end_parts[1].strip() if len(end_parts) > 1 else ""
+            # Check for end tag in complete buffer
+            if "</think>" in self.state.buffer and not self.state.state_changes["reasoning_ended"]:
+                self.state.state_changes["reasoning_ended"] = True
+                if self.timing:
+                    # Estimate token count from character count (rough approximation)
+                    token_count = len(self.state.reasoning) // 4
+                    self.timing.end_reasoning(token_count)
         else:
             self.state.response = self.state.buffer
@@ -579,13 +731,13 @@ def stream_generate(
     tool_choice: Optional[Dict[str, Any]] = None,
     temperature: float = 0.7,
     top_p: float = 0.95,
-    max_tokens: int = 4096,
     stop: Optional[List[str]] = None,
     verbose: bool = False,
     output_cls: type[BaseLLMOutput] = LLMOutput,
+    kwargs: Optional[Dict[str, Any]] = None,
 ) -> Generator[BaseLLMOutput, None, None]:
     """Stream generate from LLaMA.cpp model with timing and usage tracking."""
     # Create queues for communication between threads
     response_queue = Queue()
     error_queue = Queue()
@@ -603,9 +755,10 @@ def stream_generate(
                 "stream": True,
                 "temperature": temperature,
                 "top_p": top_p,
-                "max_tokens": max_tokens,
-                "stop": stop
+                "stop": stop,
             }
+            if kwargs:
+                completion_kwargs.update(kwargs)
             if tools is not None:
                 completion_kwargs["tools"] = tools
             if tool_choice is not None:
@@ -617,8 +770,6 @@ def stream_generate(
             completion = model.create_chat_completion(**completion_kwargs)
             for chunk in completion:
-                if verbose:
-                    print(chunk)
                 response_queue.put(("chunk", chunk))
                 # Update keep-alive timestamp
                 keep_alive_queue.put(("alive", time.time()))
@@ -627,7 +778,9 @@ def stream_generate(
             response_queue.put(("done", None))
         except Exception as e:
-            error_queue.put(e)
+            # Preserve the full exception with traceback
+            import sys
+            error_queue.put((e, sys.exc_info()[2]))
             response_queue.put(("error", str(e)))
     with timing_context() as timing:
@@ -645,6 +798,7 @@ def stream_generate(
         last_activity = time.time()
         init_timeout = 30.0  # 30 seconds for initial response
         chunk_timeout = 10.0  # 10 seconds between chunks
+        chunks_begun = False
         try:
             # Wait for initial setup
@@ -657,17 +811,25 @@ def stream_generate(
                 raise RuntimeError(f"Model failed to initialize within {init_timeout} seconds")
             while True:
-                # Check for errors
+                # Check for errors - now with proper exception chaining
                 if not error_queue.empty():
-                    raise error_queue.get()
+                    exc, tb = error_queue.get()
+                    if isinstance(exc, Exception):
+                        raise exc.with_traceback(tb)
+                    else:
+                        raise RuntimeError(f"Unknown error in worker thread: {exc}")
                 # Check keep-alive
-                while not keep_alive_queue.empty():
-                    _, timestamp = keep_alive_queue.get_nowait()
-                    last_activity = timestamp
+                try:
+                    while not keep_alive_queue.empty():
+                        _, timestamp = keep_alive_queue.get_nowait()
+                        last_activity = timestamp
+                except Empty:
+                    # Ignore empty queue - this is expected
+                    pass
                 # Check for timeout
-                if time.time() - last_activity > chunk_timeout:
+                if chunks_begun and time.time() - last_activity > chunk_timeout:
                     raise RuntimeError(f"No response from model for {chunk_timeout} seconds")
                 # Get next chunk
@@ -677,16 +839,23 @@ def stream_generate(
                     continue
                 if msg_type == "error":
+                    # If we get an error message but no exception in error_queue,
+                    # create a new error
                     raise RuntimeError(f"Generation error: {data}")
                 elif msg_type == "done":
                     break
                 chunk = data
+                if verbose:
+                    print(chunk)
                 # Mark first token time
                 if not timing.first_token_time:
                     timing.mark_first_token()
+                chunks_begun = True
                 # Update response state from chunk
                 response.update_from_chunk(chunk, timing)
@@ -700,12 +869,17 @@ def stream_generate(
                     break
             # Wait for generation thread to finish
-            generation_thread.join(timeout=5.0)  # Increased timeout to 5 seconds
             if generation_thread.is_alive():
-                # Thread didn't finish - this shouldn't happen normally
-                # but we handle it gracefully
-                raise RuntimeError("Generation thread failed to finish")
+                generation_thread.join(timeout=5.0)  # Increased timeout to 5 seconds
+                if generation_thread.is_alive():
+                    # Thread didn't finish - this shouldn't happen normally
+                    raise RuntimeError("Generation thread failed to finish")
         except Exception as e:
-            # Ensure any error is properly propagated
-            raise e
+            # Check if there's a thread error we should chain with
+            if not error_queue.empty():
+                thread_exc, thread_tb = error_queue.get()
+                if isinstance(thread_exc, Exception):
+                    raise e from thread_exc
+            # If no thread error, raise the original exception
+            raise

inferencesh/utils/download.py CHANGED Viewed

@@ -24,16 +24,24 @@ def download(url: str, directory: Union[str, Path, StorageDir]) -> str:
     dir_path = Path(directory)
     dir_path.mkdir(exist_ok=True)
-    # Create hash directory from URL
-    url_hash = hashlib.sha256(url.encode()).hexdigest()[:12]
-    hash_dir = dir_path / url_hash
-    hash_dir.mkdir(exist_ok=True)
+    # Parse URL components
+    parsed_url = urllib.parse.urlparse(url)
-    # Keep original filename
-    filename = os.path.basename(urllib.parse.urlparse(url).path)
+    # Create hash from URL path and query parameters for uniqueness
+    url_components = parsed_url.netloc + parsed_url.path
+    if parsed_url.query:
+        url_components += '?' + parsed_url.query
+    url_hash = hashlib.sha256(url_components.encode()).hexdigest()[:12]
+    # Keep original filename or use a default
+    filename = os.path.basename(parsed_url.path)
     if not filename:
         filename = 'download'
+    # Create hash directory and store file
+    hash_dir = dir_path / url_hash
+    hash_dir.mkdir(exist_ok=True)
     output_path = hash_dir / filename
     # If file exists in directory and it's not a temp directory, return it

inferencesh 0.2.31__py3-none-any.whl → 0.4.29__py3-none-any.whl

inferencesh 0.2.31py3-none-any.whl → 0.4.29py3-none-any.whl