PyPI - livellm - Versions diffs - 1.5.5__py3-none-any.whl → 1.7.1__py3-none-any.whl - Mend

livellm 1.5.5py3-none-any.whl → 1.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

livellm/livellm.py +258 -98
livellm/models/__init__.py +5 -1
livellm/models/agent/__init__.py +5 -1
livellm/models/agent/agent.py +15 -4
livellm/models/agent/output_schema.py +120 -0
livellm/models/transcription.py +2 -0
livellm/transcripton.py +61 -19
{livellm-1.5.5.dist-info → livellm-1.7.1.dist-info}/METADATA +299 -33
{livellm-1.5.5.dist-info → livellm-1.7.1.dist-info}/RECORD +11 -10
{livellm-1.5.5.dist-info → livellm-1.7.1.dist-info}/WHEEL +0 -0
{livellm-1.5.5.dist-info → livellm-1.7.1.dist-info}/licenses/LICENSE +0 -0

livellm/models/__init__.py CHANGED Viewed

@@ -1,8 +1,9 @@
 from .common import BaseRequest, ProviderKind, Settings, SuccessResponse
 from .fallback import AgentFallbackRequest, AudioFallbackRequest, TranscribeFallbackRequest, FallbackStrategy
-from .agent.agent import AgentRequest, AgentResponse, AgentResponseUsage
+from .agent.agent import AgentRequest, AgentResponse, AgentResponseUsage, ContextOverflowStrategy
 from .agent.chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
 from .agent.tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
+from .agent.output_schema import OutputSchema, PropertyDef
 from .audio.speak import SpeakMimeType, SpeakRequest, SpeakStreamResponse
 from .audio.transcribe import TranscribeRequest, TranscribeResponse, File
 from .transcription import TranscriptionInitWsRequest, TranscriptionAudioChunkWsRequest, TranscriptionWsResponse
@@ -23,6 +24,7 @@ __all__ = [
     "AgentRequest",
     "AgentResponse",
     "AgentResponseUsage",
+    "ContextOverflowStrategy",
     "Message",
     "MessageRole",
     "TextMessage",
@@ -34,6 +36,8 @@ __all__ = [
     "ToolKind",
     "WebSearchInput",
     "MCPStreamableServerInput",
+    "OutputSchema",
+    "PropertyDef",
     # Audio
     "SpeakMimeType",
     "SpeakRequest",

livellm/models/agent/__init__.py CHANGED Viewed

@@ -1,12 +1,14 @@
-from .agent import AgentRequest, AgentResponse, AgentResponseUsage
+from .agent import AgentRequest, AgentResponse, AgentResponseUsage, ContextOverflowStrategy
 from .chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
 from .tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
+from .output_schema import OutputSchema, PropertyDef
 __all__ = [
     "AgentRequest",
     "AgentResponse",
     "AgentResponseUsage",
+    "ContextOverflowStrategy",
     "Message",
     "MessageRole",
     "TextMessage",
@@ -18,4 +20,6 @@ __all__ = [
     "ToolKind",
     "WebSearchInput",
     "MCPStreamableServerInput",
+    "OutputSchema",
+    "PropertyDef",
 ]

livellm/models/agent/agent.py CHANGED Viewed

@@ -1,24 +1,35 @@
 # models for full run: AgentRequest, AgentResponse
-from pydantic import BaseModel, Field, field_validator
-from typing import Optional, List, Union
+from pydantic import BaseModel, Field
+from typing import Optional, List, Union, Any, Dict
+from enum import Enum
 from .chat import TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
 from .tools import WebSearchInput, MCPStreamableServerInput
+from .output_schema import OutputSchema, PropertyDef
 from ..common import BaseRequest
+class ContextOverflowStrategy(str, Enum):
+    """Strategy for handling context overflow when text exceeds context_limit."""
+    TRUNCATE = "truncate"  # Take beginning, middle, and end portions
+    RECYCLE = "recycle"    # Iteratively process chunks, merging results
 class AgentRequest(BaseRequest):
     model: str = Field(..., description="The model to use")
     messages: List[Union[TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage]] = Field(..., description="The messages to use")
     tools: List[Union[WebSearchInput, MCPStreamableServerInput]] = Field(default_factory=list, description="The tools to use")
     gen_config: Optional[dict] = Field(default=None, description="The configuration for the generation")
     include_history: bool = Field(default=False, description="Whether to include full conversation history in the response")
+    output_schema: Optional[Union[OutputSchema, Dict[str, Any]]] = Field(default=None, description="JSON schema for structured output. Can be an OutputSchema, a dict representing a JSON schema, or will be converted from a Pydantic BaseModel.")
+    context_limit: int = Field(default=0, description="Maximum context size in tokens. If <= 0, context overflow handling is disabled.")
+    context_overflow_strategy: ContextOverflowStrategy = Field(default=ContextOverflowStrategy.TRUNCATE, description="Strategy for handling context overflow: 'truncate' or 'recycle'")
 class AgentResponseUsage(BaseModel):
     input_tokens: int = Field(..., description="The number of input tokens used")
     output_tokens: int = Field(..., description="The number of output tokens used")
 class AgentResponse(BaseModel):
-    output: str = Field(..., description="The output of the response")
+    output: str = Field(..., description="The output of the response (JSON string when using output_schema)")
     usage: AgentResponseUsage = Field(..., description="The usage of the response")
-    history: Optional[List[Union[TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage]]] = Field(default=None, description="Full conversation history including tool calls and returns (only included when include_history=true)")
+    history: Optional[List[Union[TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage]]] = Field(default=None, description="Full conversation history including tool calls and returns (only included when include_history=true)")

livellm/models/agent/output_schema.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Output schema models for structured output support."""
+from pydantic import BaseModel, ConfigDict, Field
+from typing import Optional, List, Dict, Any, Union
+class PropertyDef(BaseModel):
+    """Definition of a property in the output schema."""
+    model_config = ConfigDict(extra="allow")
+    type: Union[str, List[str]] = Field(..., description="Property type: string, integer, number, boolean, array, object, null")
+    description: Optional[str] = Field(default=None, description="Description of the property")
+    enum: Optional[List[Any]] = Field(default=None, description="Allowed values for the property")
+    default: Optional[Any] = Field(default=None, description="Default value")
+    # String constraints
+    minLength: Optional[int] = Field(default=None, description="Minimum string length")
+    maxLength: Optional[int] = Field(default=None, description="Maximum string length")
+    pattern: Optional[str] = Field(default=None, description="Regex pattern for string validation")
+    # Number constraints
+    minimum: Optional[float] = Field(default=None, description="Minimum number value")
+    maximum: Optional[float] = Field(default=None, description="Maximum number value")
+    exclusiveMinimum: Optional[float] = Field(default=None, description="Exclusive minimum number value")
+    exclusiveMaximum: Optional[float] = Field(default=None, description="Exclusive maximum number value")
+    # Array constraints
+    items: Optional[Union["PropertyDef", Dict[str, Any]]] = Field(default=None, description="Schema for array items")
+    minItems: Optional[int] = Field(default=None, description="Minimum array length")
+    maxItems: Optional[int] = Field(default=None, description="Maximum array length")
+    uniqueItems: Optional[bool] = Field(default=None, description="Whether array items must be unique")
+    # Object constraints
+    properties: Optional[Dict[str, Union["PropertyDef", Dict[str, Any]]]] = Field(default=None, description="Nested object properties")
+    required: Optional[List[str]] = Field(default=None, description="Required properties for nested objects")
+    additionalProperties: Optional[Union[bool, "PropertyDef", Dict[str, Any]]] = Field(default=None, description="Schema for additional properties")
+class OutputSchema(BaseModel):
+    """
+    Schema definition for structured output.
+    This model represents a JSON Schema that the AI model must follow when generating responses.
+    When provided, the agent will return a JSON string matching the specified schema.
+    Example:
+        schema = OutputSchema(
+            title="Person",
+            description="A person's information",
+            properties={
+                "name": PropertyDef(type="string", description="The person's name"),
+                "age": PropertyDef(type="integer", minimum=0, maximum=150),
+            },
+            required=["name", "age"]
+        )
+    """
+    model_config = ConfigDict(extra="allow")
+    title: str = Field(..., description="Name of the schema, used as the output tool name")
+    description: Optional[str] = Field(default=None, description="Description to help the model understand what to output")
+    properties: Dict[str, Union[PropertyDef, Dict[str, Any]]] = Field(..., description="Dictionary of property definitions")
+    required: Optional[List[str]] = Field(default=None, description="List of required property names")
+    additionalProperties: Optional[Union[bool, PropertyDef, Dict[str, Any]]] = Field(default=None, description="Whether extra properties are allowed")
+    @classmethod
+    def from_pydantic(cls, model: type[BaseModel]) -> "OutputSchema":
+        """
+        Create an OutputSchema from a Pydantic BaseModel class.
+        Args:
+            model: A Pydantic BaseModel class to convert to OutputSchema.
+        Returns:
+            An OutputSchema instance representing the model's schema.
+        Example:
+            class Person(BaseModel):
+                name: str
+                age: int
+            schema = OutputSchema.from_pydantic(Person)
+        """
+        json_schema = model.model_json_schema()
+        # Extract the main properties
+        title = json_schema.get("title", model.__name__)
+        description = json_schema.get("description")
+        properties = json_schema.get("properties", {})
+        required = json_schema.get("required")
+        # Handle $defs for nested models (Pydantic generates these for complex models)
+        defs = json_schema.get("$defs", {})
+        if defs:
+            # Inline the definitions into properties
+            properties = cls._resolve_refs(properties, defs)
+        return cls(
+            title=title,
+            description=description,
+            properties=properties,
+            required=required,
+        )
+    @classmethod
+    def _resolve_refs(cls, obj: Any, defs: Dict[str, Any]) -> Any:
+        """Recursively resolve $ref references in the schema."""
+        if isinstance(obj, dict):
+            if "$ref" in obj:
+                ref_path = obj["$ref"]
+                # Extract the definition name from "#/$defs/ModelName"
+                if ref_path.startswith("#/$defs/"):
+                    def_name = ref_path[len("#/$defs/"):]
+                    if def_name in defs:
+                        # Return the resolved definition (also resolve any nested refs)
+                        return cls._resolve_refs(defs[def_name], defs)
+                return obj
+            else:
+                return {k: cls._resolve_refs(v, defs) for k, v in obj.items()}
+        elif isinstance(obj, list):
+            return [cls._resolve_refs(item, defs) for item in obj]
+        else:
+            return obj

livellm/models/transcription.py CHANGED Viewed

@@ -2,6 +2,7 @@ from pydantic import BaseModel, Field, field_validator
 from livellm.models.audio.speak import SpeakMimeType
 from typing import Optional
 import base64
+from datetime import datetime
 class TranscriptionInitWsRequest(BaseModel):
     provider_uid: str = Field(..., description="The provider uid")
@@ -33,3 +34,4 @@ class TranscriptionAudioChunkWsRequest(BaseModel):
 class TranscriptionWsResponse(BaseModel):
     transcription: str = Field(..., description="The transcription")
+    received_at: datetime = Field(default_factory=datetime.now, description="The datetime when the transcription was received")

livellm/transcripton.py CHANGED Viewed

@@ -47,7 +47,7 @@ class TranscriptionWsClient:
         self,
         request: TranscriptionInitWsRequest,
         source: AsyncIterator[TranscriptionAudioChunkWsRequest]
-    ) -> AsyncIterator[TranscriptionWsResponse]:
+    ) -> AsyncIterator[list[TranscriptionWsResponse]]:
         """
         Start a transcription session.
@@ -56,7 +56,10 @@ class TranscriptionWsClient:
             source: An async iterator that yields audio chunks to transcribe.
         Returns:
-            An async iterator of transcription session responses.
+            An async iterator that yields lists of transcription responses.
+            Each list contains all responses that accumulated since the last yield,
+            ordered from oldest to newest (last element is the most recent).
+            This prevents slow processing from stalling the entire loop.
         Example:
             ```python
@@ -66,8 +69,14 @@ class TranscriptionWsClient:
                         yield TranscriptionAudioChunkWsRequest(audio=chunk)
             async with TranscriptionWsClient(url) as client:
-                async for response in client.start_session(init_request, audio_source()):
-                    print(response.transcription)
+                async for responses in client.start_session(init_request, audio_source()):
+                    # responses is a list, newest transcription is last
+                    latest = responses[-1]
+                    print(f"Latest: {latest.transcription}")
+                    # Process all transcriptions if needed
+                    for resp in responses:
+                        print(resp.transcription)
             ```
         """
         # Send initialization request as JSON
@@ -79,6 +88,10 @@ class TranscriptionWsClient:
         if not init_response.success:
             raise Exception(f"Failed to start transcription session: {init_response.error}")
+        # Queue to collect incoming transcription responses
+        response_queue: asyncio.Queue[TranscriptionWsResponse | None] = asyncio.Queue()
+        receiver_done = False
         # Start sending audio chunks in background
         async def send_chunks():
             try:
@@ -93,23 +106,52 @@ class TranscriptionWsClient:
                 await self.websocket.close()
                 raise e
+        # Receive transcription responses in background
+        async def receive_responses():
+            nonlocal receiver_done
+            try:
+                while True:
+                    try:
+                        response_data = await self.websocket.recv()
+                        transcription_response = TranscriptionWsResponse(**json.loads(response_data))
+                        await response_queue.put(transcription_response)
+                    except websockets.ConnectionClosed:
+                        break
+            finally:
+                receiver_done = True
+                await response_queue.put(None)  # Signal end of stream
         send_task = asyncio.create_task(send_chunks())
+        receive_task = asyncio.create_task(receive_responses())
-        # Receive transcription responses
         try:
-            while True:
-                try:
-                    response_data = await self.websocket.recv()
-                    transcription_response = TranscriptionWsResponse(**json.loads(response_data))
-                    yield transcription_response
-                except websockets.ConnectionClosed:
-                    # Connection closed, stop receiving
+            while True:
+                # Wait for at least one response
+                first_response = await response_queue.get()
+                if first_response is None:
+                    # End of stream
                     break
+                # Collect all additional responses that have accumulated (non-blocking)
+                responses = [first_response]
+                while True:
+                    try:
+                        additional = response_queue.get_nowait()
+                        if additional is None:
+                            # End of stream, yield what we have and exit
+                            yield responses
+                            return
+                        responses.append(additional)
+                    except asyncio.QueueEmpty:
+                        break
+                yield responses
         finally:
-            # Cancel the send task if still running
-            if not send_task.done():
-                send_task.cancel()
-                try:
-                    await send_task
-                except asyncio.CancelledError:
-                    pass
+            # Cancel tasks if still running
+            for task in [send_task, receive_task]:
+                if not task.done():
+                    task.cancel()
+                    try:
+                        await task
+                    except asyncio.CancelledError:
+                        pass

livellm 1.5.5__py3-none-any.whl → 1.7.1__py3-none-any.whl

livellm 1.5.5py3-none-any.whl → 1.7.1py3-none-any.whl