PyPI - livellm - Versions diffs - 1.5.4__tar.gz → 1.6.1__tar.gz - Mend

livellm 1.5.4tar.gz → 1.6.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{livellm-1.5.4 → livellm-1.6.1}/.gitignore RENAMED Viewed

@@ -3,4 +3,5 @@ __pycache__
 .pytest_cache
 .coverage
-test.py
+test.py
+test_*.py

{livellm-1.5.4 → livellm-1.6.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livellm
-Version: 1.5.4
+Version: 1.6.1
 Summary: Python client for the LiveLLM Server
 Project-URL: Homepage, https://github.com/qalby-tech/livellm-client-py
 Project-URL: Repository, https://github.com/qalby-tech/livellm-client-py
@@ -19,10 +19,6 @@ Requires-Dist: httpx>=0.27.0
 Requires-Dist: pydantic>=2.0.0
 Requires-Dist: sounddevice>=0.5.3
 Requires-Dist: websockets>=15.0.1
-Provides-Extra: testing
-Requires-Dist: pytest-asyncio>=0.21.0; extra == 'testing'
-Requires-Dist: pytest-cov>=4.1.0; extra == 'testing'
-Requires-Dist: pytest>=8.4.2; extra == 'testing'
 Description-Content-Type: text/markdown
 # LiveLLM Python Client
@@ -39,6 +35,7 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
 - 🎯 **Multi-provider** - OpenAI, Google, Anthropic, Groq, ElevenLabs
 - 🔄 **Streaming** - Real-time streaming for agent and audio
 - 🛠️ **Flexible API** - Use request objects or keyword arguments
+- 📋 **Structured Output** - Get validated JSON responses with schema support (Pydantic, OutputSchema, or dict)
 - 🎙️ **Audio services** - Text-to-speech and transcription
 - 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
 - ⚡ **Fallback strategies** - Sequential and parallel handling
@@ -302,6 +299,146 @@ if response.history:
 - Auditing and logging complete conversations
 - Building conversational UIs with full context visibility
+#### Agent with Structured Output
+Get structured JSON responses from the agent by providing an output schema. The agent will return a JSON string matching your schema in the `output` field.
+**Three ways to define a schema:**
+**1. Using Pydantic BaseModel (Recommended)**
+```python
+import json
+from pydantic import BaseModel
+from livellm.models import TextMessage
+class Person(BaseModel):
+    name: str
+    age: int
+    occupation: str
+response = await client.agent_run(
+    provider_uid="openai",
+    model="gpt-4",
+    messages=[TextMessage(role="user", content="Extract info: John is a 28-year-old engineer")],
+    output_schema=Person  # Pass the BaseModel class directly
+)
+# response.output is a JSON string: '{"name": "John", "age": 28, "occupation": "engineer"}'
+print(type(response.output))  # <class 'str'>
+# Parse the JSON string yourself if needed
+data = json.loads(response.output)
+print(f"Name: {data['name']}")
+print(f"Age: {data['age']}")
+print(f"Occupation: {data['occupation']}")
+# Or validate with your Pydantic model
+person = Person.model_validate_json(response.output)
+print(f"Name: {person.name}")
+```
+**2. Using OutputSchema**
+```python
+from livellm.models import OutputSchema, PropertyDef, TextMessage
+schema = OutputSchema(
+    title="Person",
+    description="A person's information",
+    properties={
+        "name": PropertyDef(type="string", description="The person's name"),
+        "age": PropertyDef(type="integer", minimum=0, maximum=150, description="Age in years"),
+        "email": PropertyDef(type="string", pattern="^[^@]+@[^@]+\\.[^@]+$", description="Email address"),
+    },
+    required=["name", "age", "email"]
+)
+response = await client.agent_run(
+    provider_uid="openai",
+    model="gpt-4",
+    messages=[TextMessage(role="user", content="Tell me about a person")],
+    output_schema=schema
+)
+```
+**3. Using a dictionary (JSON Schema)**
+```python
+schema_dict = {
+    "title": "Person",
+    "type": "object",
+    "properties": {
+        "name": {"type": "string", "description": "The person's name"},
+        "age": {"type": "integer", "minimum": 0, "maximum": 150},
+        "email": {"type": "string", "pattern": "^[^@]+@[^@]+\\.[^@]+$"}
+    },
+    "required": ["name", "age", "email"]
+}
+response = await client.agent_run(
+    provider_uid="openai",
+    model="gpt-4",
+    messages=[TextMessage(role="user", content="Extract person info")],
+    output_schema=schema_dict
+)
+```
+**Complex nested schemas:**
+```python
+from pydantic import BaseModel
+from typing import List, Optional
+class Address(BaseModel):
+    street: str
+    city: str
+    zip_code: str
+class Person(BaseModel):
+    name: str
+    age: int
+    addresses: List[Address]
+    phone: Optional[str] = None
+response = await client.agent_run(
+    provider_uid="openai",
+    model="gpt-4",
+    messages=[TextMessage(role="user", content="Extract person with addresses")],
+    output_schema=Person  # Nested models are automatically resolved
+)
+```
+**With streaming:**
+```python
+from pydantic import BaseModel
+class Summary(BaseModel):
+    title: str
+    key_points: List[str]
+    word_count: int
+stream = client.agent_run_stream(
+    provider_uid="openai",
+    model="gpt-4",
+    messages=[TextMessage(role="user", content="Summarize this article")],
+    output_schema=Summary
+)
+async for chunk in stream:
+    print(chunk.output, end="", flush=True)
+# After streaming completes, parse the full JSON output
+full_output = "".join([chunk.output async for chunk in stream])
+data = json.loads(full_output)
+```
+**Response fields:**
+- `output` - The JSON string response matching your schema
+**Use cases:**
+- Data extraction and parsing
+- API response formatting
+- Structured data generation
+- Type-safe responses
+- Integration with type-checked code
 ### Audio Services
 #### Text-to-Speech
@@ -411,11 +548,17 @@ async def transcribe_live_direct():
         )
         # Stream audio and receive transcriptions
-        async for response in client.start_session(init_request, audio_source()):
-            print(f"Transcription: {response.transcription}")
-            if response.is_end:
-                print("Transcription complete!")
-                break
+        # Each iteration yields a list of responses (oldest to newest)
+        async for responses in client.start_session(init_request, audio_source()):
+            # Get the latest transcription (last element)
+            latest = responses[-1]
+            print(f"Latest transcription: {latest.transcription}")
+            # Process all accumulated transcriptions if needed
+            if len(responses) > 1:
+                print(f"  (received {len(responses)} chunks)")
+                for resp in responses:
+                    print(f"    - {resp.transcription}")
 asyncio.run(transcribe_live_direct())
 ```
@@ -453,25 +596,25 @@ async def transcribe_and_chat():
                 gen_config={},
             )
-            # Listen for transcriptions and, for each chunk, run an agent request
-            async for resp in t_client.start_session(init_request, audio_source()):
-                print("User said:", resp.transcription)
+            # Listen for transcriptions and, for each batch, run an agent request
+            # Each iteration yields a list of responses - newest is last
+            async for responses in t_client.start_session(init_request, audio_source()):
+                # Use the latest transcription for the agent
+                latest = responses[-1]
+                print("User said:", latest.transcription)
                 # You can call agent_run (or speak, etc.) while the transcription stream is active
+                # Even if this is slow, transcriptions accumulate and won't stall the loop
                 agent_response = await realtime.agent_run(
                     provider_uid="openai",
                     model="gpt-4",
                     messages=[
-                        TextMessage(role="user", content=resp.transcription),
+                        TextMessage(role="user", content=latest.transcription),
                     ],
                     temperature=0.7,
                 )
                 print("Agent:", agent_response.output)
-                if resp.is_end:
-                    print("Transcription session complete")
-                    break
 asyncio.run(transcribe_and_chat())
 ```
@@ -586,7 +729,7 @@ response = await client.ping()
 **Real-Time Transcription (TranscriptionWsClient)**
 - `connect()` - Establish WebSocket connection
 - `disconnect()` - Close WebSocket connection
-- `start_session(init_request, audio_source)` - Start bidirectional streaming transcription
+- `start_session(init_request, audio_source)` - Start bidirectional streaming transcription; yields `list[TranscriptionWsResponse]` (accumulated responses, newest last)
 - `async with client:` - Auto connection management (recommended)
 **Cleanup**
@@ -607,7 +750,7 @@ response = await client.ping()
 - `MessageRole` - `USER` | `MODEL` | `SYSTEM` | `TOOL_CALL` | `TOOL_RETURN` (or use strings)
 **Requests**
-- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?)` - Set `include_history=True` to get full conversation
+- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output.
 - `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
 - `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
 - `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
@@ -617,15 +760,20 @@ response = await client.ping()
 - `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
 - `MCPStreamableServerInput(kind=ToolKind.MCP_STREAMABLE_SERVER, url, prefix?, timeout?)`
+**Structured Output**
+- `OutputSchema(title, description?, properties, required?, additionalProperties?)` - JSON Schema for structured output
+- `PropertyDef(type, description?, enum?, default?, minLength?, maxLength?, pattern?, minimum?, maximum?, items?, ...)` - Property definition with validation constraints
+- `OutputSchema.from_pydantic(model)` - Convert a Pydantic BaseModel class to OutputSchema
 **Fallback**
 - `AgentFallbackRequest(strategy, requests, timeout_per_request?)`
 - `AudioFallbackRequest(strategy, requests, timeout_per_request?)`
 - `FallbackStrategy` - `SEQUENTIAL` | `PARALLEL`
 **Responses**
-- `AgentResponse(output, usage{input_tokens, output_tokens}, history?)` - `history` included when `include_history=True`
+- `AgentResponse(output, usage{input_tokens, output_tokens}, history?)` - `history` included when `include_history=True`. `output` is a JSON string when `output_schema` is provided.
 - `TranscribeResponse(text, language)`
-- `TranscriptionWsResponse(transcription, is_end)` - Real-time transcription result
+- `TranscriptionWsResponse(transcription, received_at)` - Real-time transcription result; yielded as `list[TranscriptionWsResponse]` with newest last
 ## Error Handling

{livellm-1.5.4 → livellm-1.6.1}/README.md RENAMED Viewed

@@ -12,6 +12,7 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
 - 🎯 **Multi-provider** - OpenAI, Google, Anthropic, Groq, ElevenLabs
 - 🔄 **Streaming** - Real-time streaming for agent and audio
 - 🛠️ **Flexible API** - Use request objects or keyword arguments
+- 📋 **Structured Output** - Get validated JSON responses with schema support (Pydantic, OutputSchema, or dict)
 - 🎙️ **Audio services** - Text-to-speech and transcription
 - 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
 - ⚡ **Fallback strategies** - Sequential and parallel handling
@@ -275,6 +276,146 @@ if response.history:
 - Auditing and logging complete conversations
 - Building conversational UIs with full context visibility
+#### Agent with Structured Output
+Get structured JSON responses from the agent by providing an output schema. The agent will return a JSON string matching your schema in the `output` field.
+**Three ways to define a schema:**
+**1. Using Pydantic BaseModel (Recommended)**
+```python
+import json
+from pydantic import BaseModel
+from livellm.models import TextMessage
+class Person(BaseModel):
+    name: str
+    age: int
+    occupation: str
+response = await client.agent_run(
+    provider_uid="openai",
+    model="gpt-4",
+    messages=[TextMessage(role="user", content="Extract info: John is a 28-year-old engineer")],
+    output_schema=Person  # Pass the BaseModel class directly
+)
+# response.output is a JSON string: '{"name": "John", "age": 28, "occupation": "engineer"}'
+print(type(response.output))  # <class 'str'>
+# Parse the JSON string yourself if needed
+data = json.loads(response.output)
+print(f"Name: {data['name']}")
+print(f"Age: {data['age']}")
+print(f"Occupation: {data['occupation']}")
+# Or validate with your Pydantic model
+person = Person.model_validate_json(response.output)
+print(f"Name: {person.name}")
+```
+**2. Using OutputSchema**
+```python
+from livellm.models import OutputSchema, PropertyDef, TextMessage
+schema = OutputSchema(
+    title="Person",
+    description="A person's information",
+    properties={
+        "name": PropertyDef(type="string", description="The person's name"),
+        "age": PropertyDef(type="integer", minimum=0, maximum=150, description="Age in years"),
+        "email": PropertyDef(type="string", pattern="^[^@]+@[^@]+\\.[^@]+$", description="Email address"),
+    },
+    required=["name", "age", "email"]
+)
+response = await client.agent_run(
+    provider_uid="openai",
+    model="gpt-4",
+    messages=[TextMessage(role="user", content="Tell me about a person")],
+    output_schema=schema
+)
+```
+**3. Using a dictionary (JSON Schema)**
+```python
+schema_dict = {
+    "title": "Person",
+    "type": "object",
+    "properties": {
+        "name": {"type": "string", "description": "The person's name"},
+        "age": {"type": "integer", "minimum": 0, "maximum": 150},
+        "email": {"type": "string", "pattern": "^[^@]+@[^@]+\\.[^@]+$"}
+    },
+    "required": ["name", "age", "email"]
+}
+response = await client.agent_run(
+    provider_uid="openai",
+    model="gpt-4",
+    messages=[TextMessage(role="user", content="Extract person info")],
+    output_schema=schema_dict
+)
+```
+**Complex nested schemas:**
+```python
+from pydantic import BaseModel
+from typing import List, Optional
+class Address(BaseModel):
+    street: str
+    city: str
+    zip_code: str
+class Person(BaseModel):
+    name: str
+    age: int
+    addresses: List[Address]
+    phone: Optional[str] = None
+response = await client.agent_run(
+    provider_uid="openai",
+    model="gpt-4",
+    messages=[TextMessage(role="user", content="Extract person with addresses")],
+    output_schema=Person  # Nested models are automatically resolved
+)
+```
+**With streaming:**
+```python
+from pydantic import BaseModel
+class Summary(BaseModel):
+    title: str
+    key_points: List[str]
+    word_count: int
+stream = client.agent_run_stream(
+    provider_uid="openai",
+    model="gpt-4",
+    messages=[TextMessage(role="user", content="Summarize this article")],
+    output_schema=Summary
+)
+async for chunk in stream:
+    print(chunk.output, end="", flush=True)
+# After streaming completes, parse the full JSON output
+full_output = "".join([chunk.output async for chunk in stream])
+data = json.loads(full_output)
+```
+**Response fields:**
+- `output` - The JSON string response matching your schema
+**Use cases:**
+- Data extraction and parsing
+- API response formatting
+- Structured data generation
+- Type-safe responses
+- Integration with type-checked code
 ### Audio Services
 #### Text-to-Speech
@@ -384,11 +525,17 @@ async def transcribe_live_direct():
         )
         # Stream audio and receive transcriptions
-        async for response in client.start_session(init_request, audio_source()):
-            print(f"Transcription: {response.transcription}")
-            if response.is_end:
-                print("Transcription complete!")
-                break
+        # Each iteration yields a list of responses (oldest to newest)
+        async for responses in client.start_session(init_request, audio_source()):
+            # Get the latest transcription (last element)
+            latest = responses[-1]
+            print(f"Latest transcription: {latest.transcription}")
+            # Process all accumulated transcriptions if needed
+            if len(responses) > 1:
+                print(f"  (received {len(responses)} chunks)")
+                for resp in responses:
+                    print(f"    - {resp.transcription}")
 asyncio.run(transcribe_live_direct())
 ```
@@ -426,25 +573,25 @@ async def transcribe_and_chat():
                 gen_config={},
             )
-            # Listen for transcriptions and, for each chunk, run an agent request
-            async for resp in t_client.start_session(init_request, audio_source()):
-                print("User said:", resp.transcription)
+            # Listen for transcriptions and, for each batch, run an agent request
+            # Each iteration yields a list of responses - newest is last
+            async for responses in t_client.start_session(init_request, audio_source()):
+                # Use the latest transcription for the agent
+                latest = responses[-1]
+                print("User said:", latest.transcription)
                 # You can call agent_run (or speak, etc.) while the transcription stream is active
+                # Even if this is slow, transcriptions accumulate and won't stall the loop
                 agent_response = await realtime.agent_run(
                     provider_uid="openai",
                     model="gpt-4",
                     messages=[
-                        TextMessage(role="user", content=resp.transcription),
+                        TextMessage(role="user", content=latest.transcription),
                     ],
                     temperature=0.7,
                 )
                 print("Agent:", agent_response.output)
-                if resp.is_end:
-                    print("Transcription session complete")
-                    break
 asyncio.run(transcribe_and_chat())
 ```
@@ -559,7 +706,7 @@ response = await client.ping()
 **Real-Time Transcription (TranscriptionWsClient)**
 - `connect()` - Establish WebSocket connection
 - `disconnect()` - Close WebSocket connection
-- `start_session(init_request, audio_source)` - Start bidirectional streaming transcription
+- `start_session(init_request, audio_source)` - Start bidirectional streaming transcription; yields `list[TranscriptionWsResponse]` (accumulated responses, newest last)
 - `async with client:` - Auto connection management (recommended)
 **Cleanup**
@@ -580,7 +727,7 @@ response = await client.ping()
 - `MessageRole` - `USER` | `MODEL` | `SYSTEM` | `TOOL_CALL` | `TOOL_RETURN` (or use strings)
 **Requests**
-- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?)` - Set `include_history=True` to get full conversation
+- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output.
 - `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
 - `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
 - `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
@@ -590,15 +737,20 @@ response = await client.ping()
 - `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
 - `MCPStreamableServerInput(kind=ToolKind.MCP_STREAMABLE_SERVER, url, prefix?, timeout?)`
+**Structured Output**
+- `OutputSchema(title, description?, properties, required?, additionalProperties?)` - JSON Schema for structured output
+- `PropertyDef(type, description?, enum?, default?, minLength?, maxLength?, pattern?, minimum?, maximum?, items?, ...)` - Property definition with validation constraints
+- `OutputSchema.from_pydantic(model)` - Convert a Pydantic BaseModel class to OutputSchema
 **Fallback**
 - `AgentFallbackRequest(strategy, requests, timeout_per_request?)`
 - `AudioFallbackRequest(strategy, requests, timeout_per_request?)`
 - `FallbackStrategy` - `SEQUENTIAL` | `PARALLEL`
 **Responses**
-- `AgentResponse(output, usage{input_tokens, output_tokens}, history?)` - `history` included when `include_history=True`
+- `AgentResponse(output, usage{input_tokens, output_tokens}, history?)` - `history` included when `include_history=True`. `output` is a JSON string when `output_schema` is provided.
 - `TranscribeResponse(text, language)`
-- `TranscriptionWsResponse(transcription, is_end)` - Real-time transcription result
+- `TranscriptionWsResponse(transcription, received_at)` - Real-time transcription result; yielded as `list[TranscriptionWsResponse]` with newest last
 ## Error Handling

{livellm-1.5.4 → livellm-1.6.1}/livellm/livellm.py RENAMED Viewed

@@ -3,9 +3,10 @@ import asyncio
 import httpx
 import json
 import warnings
-from typing import List, Optional, AsyncIterator, Union, overload, Dict
+from typing import List, Optional, AsyncIterator, Union, overload, Dict, Any, Type
 from .models.common import Settings, SuccessResponse
 from .models.agent.agent import AgentRequest, AgentResponse
+from .models.agent.output_schema import OutputSchema
 from .models.audio.speak import SpeakRequest, EncodedSpeakResponse
 from .models.audio.transcribe import TranscribeRequest, TranscribeResponse, File
 from .models.fallback import AgentFallbackRequest, AudioFallbackRequest, TranscribeFallbackRequest
@@ -15,10 +16,19 @@ from .transcripton import TranscriptionWsClient
 from uuid import uuid4
 import logging
 from abc import ABC, abstractmethod
+from importlib.metadata import version, PackageNotFoundError
+from pydantic import BaseModel
 logger = logging.getLogger(__name__)
+try:
+    __version__ = version("livellm")
+except PackageNotFoundError:
+    __version__ = "unknown"
+DEFAULT_USER_AGENT = f"livellm-python/{__version__}"
 class BaseLivellmClient(ABC):
     @overload
@@ -37,6 +47,7 @@ class BaseLivellmClient(ABC):
         messages: list,
         tools: Optional[list] = None,
         include_history: bool = False,
+        output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
         **kwargs
     ) -> AgentResponse:
         ...
@@ -55,6 +66,7 @@ class BaseLivellmClient(ABC):
         messages: Optional[list] = None,
         tools: Optional[list] = None,
         include_history: bool = False,
+        output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
         **kwargs
     ) -> AgentResponse:
         """
@@ -72,7 +84,8 @@ class BaseLivellmClient(ABC):
                model="gpt-4",
                messages=[TextMessage(...)],
                tools=[],
-               include_history=False
+               include_history=False,
+               output_schema=MyPydanticModel  # or OutputSchema(...) or dict
            )
         Args:
@@ -83,9 +96,14 @@ class BaseLivellmClient(ABC):
             tools: Optional list of tools
             gen_config: Optional generation configuration
             include_history: Whether to include full conversation history in the response
+            output_schema: Optional schema for structured output. Can be:
+                - An OutputSchema instance
+                - A dict representing a JSON schema
+                - A Pydantic BaseModel class (will be converted to OutputSchema)
         Returns:
-            AgentResponse with the agent's output
+            AgentResponse with the agent's output. If output_schema was provided,
+            the output will be a JSON string matching the schema.
         """
         # Check if first argument is a request object
         if request is not None:
@@ -102,16 +120,39 @@ class BaseLivellmClient(ABC):
                 "Alternatively, pass an AgentRequest object as the first positional argument."
             )
+        # Convert output_schema if it's a Pydantic BaseModel class
+        resolved_schema = self._resolve_output_schema(output_schema)
         agent_request = AgentRequest(
             provider_uid=provider_uid,
             model=model,
             messages=messages,
             tools=tools or [],
             gen_config=kwargs or None,
-            include_history=include_history
+            include_history=include_history,
+            output_schema=resolved_schema
         )
         return await self.handle_agent_run(agent_request)
+    def _resolve_output_schema(
+        self,
+        output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]]
+    ) -> Optional[Union[OutputSchema, Dict[str, Any]]]:
+        """
+        Resolve the output_schema parameter to an OutputSchema or dict.
+        If a Pydantic BaseModel class is provided, convert it to OutputSchema.
+        """
+        if output_schema is None:
+            return None
+        # Check if it's a class (not an instance) that's a subclass of BaseModel
+        if isinstance(output_schema, type) and issubclass(output_schema, BaseModel):
+            return OutputSchema.from_pydantic(output_schema)
+        # Already an OutputSchema or dict, return as-is
+        return output_schema
     @overload
     def agent_run_stream(
         self,
@@ -128,6 +169,7 @@ class BaseLivellmClient(ABC):
         messages: list,
         tools: Optional[list] = None,
         include_history: bool = False,
+        output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
         **kwargs
     ) -> AsyncIterator[AgentResponse]:
         ...
@@ -146,6 +188,7 @@ class BaseLivellmClient(ABC):
         messages: Optional[list] = None,
         tools: Optional[list] = None,
         include_history: bool = False,
+        output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
         **kwargs
     ) -> AsyncIterator[AgentResponse]:
         """
@@ -165,7 +208,8 @@ class BaseLivellmClient(ABC):
                model="gpt-4",
                messages=[TextMessage(...)],
                tools=[],
-               include_history=False
+               include_history=False,
+               output_schema=MyPydanticModel  # or OutputSchema(...) or dict
            ):
                ...
@@ -177,9 +221,14 @@ class BaseLivellmClient(ABC):
             tools: Optional list of tools
             gen_config: Optional generation configuration
             include_history: Whether to include full conversation history in the response
+            output_schema: Optional schema for structured output. Can be:
+                - An OutputSchema instance
+                - A dict representing a JSON schema
+                - A Pydantic BaseModel class (will be converted to OutputSchema)
         Returns:
-            AsyncIterator of AgentResponse chunks
+            AsyncIterator of AgentResponse chunks. If output_schema was provided,
+            the output will be a JSON string matching the schema.
         """
         # Check if first argument is a request object
         if request is not None:
@@ -196,13 +245,17 @@ class BaseLivellmClient(ABC):
                     "Alternatively, pass an AgentRequest object as the first positional argument."
                 )
+            # Convert output_schema if it's a Pydantic BaseModel class
+            resolved_schema = self._resolve_output_schema(output_schema)
             agent_request = AgentRequest(
                 provider_uid=provider_uid,
                 model=model,
                 messages=messages,
                 tools=tools or [],
                 gen_config=kwargs or None,
-                include_history=include_history
+                include_history=include_history,
+                output_schema=resolved_schema
             )
             stream = self.handle_agent_run_stream(agent_request)
@@ -505,7 +558,8 @@ class LivellmWsClient(BaseLivellmClient):
     def __init__(
         self,
-        base_url: str,
+        base_url: str,
+        user_agent: Optional[str] = None,
         timeout: Optional[float] = None,
         max_size: Optional[int] = None,
         max_buffer_size: Optional[int] = None
@@ -523,6 +577,7 @@ class LivellmWsClient(BaseLivellmClient):
         self._ws_root_base_url = ws_url
         self.base_url = f"{ws_url}/livellm/ws"
         self.timeout = timeout
+        self.user_agent = user_agent or DEFAULT_USER_AGENT
         self.websocket = None
         self.sessions: Dict[str, asyncio.Queue] = {}
         self.max_buffer_size = max_buffer_size or 0 # None means unlimited buffer size
@@ -541,7 +596,8 @@ class LivellmWsClient(BaseLivellmClient):
             self.base_url,
             open_timeout=self.timeout,
             close_timeout=self.timeout,
-            max_size=self.max_size
+            max_size=self.max_size,
+            additional_headers={"User-Agent": self.user_agent}
         )
         self.__listen_for_responses_task = asyncio.create_task(self.listen_for_responses())
@@ -680,7 +736,8 @@ class LivellmClient(BaseLivellmClient):
     def __init__(
         self,
-        base_url: str,
+        base_url: str,
+        user_agent: Optional[str] = None,
         timeout: Optional[float] = None,
         configs: Optional[List[Settings]] = None
         ):
@@ -689,11 +746,13 @@ class LivellmClient(BaseLivellmClient):
         # HTTP API base URL for this client
         self.base_url = f"{self._root_base_url}/livellm"
         self.timeout = timeout
+        self.user_agent = user_agent or DEFAULT_USER_AGENT
         self.client = httpx.AsyncClient(base_url=self.base_url, timeout=self.timeout) \
             if self.timeout else httpx.AsyncClient(base_url=self.base_url)
         self.settings = []
         self.headers = {
             "Content-Type": "application/json",
+            "User-Agent": self.user_agent,
         }
         # Lazily-created realtime (WebSocket) client
         self._realtime = None
@@ -713,7 +772,7 @@ class LivellmClient(BaseLivellmClient):
         """
         if self._realtime is None:
             # Pass the same root base URL; LivellmWsClient will handle ws/wss conversion.
-            self._realtime = LivellmWsClient(self._root_base_url, timeout=self.timeout)
+            self._realtime = LivellmWsClient(self._root_base_url, user_agent=self.user_agent, timeout=self.timeout)
         return self._realtime
     def update_configs_post_init(self, configs: List[Settings]) -> SuccessResponse:
@@ -858,32 +917,32 @@ class LivellmClient(BaseLivellmClient):
         if self._realtime is not None:
             await self._realtime.disconnect()
-    def __del__(self):
-        """
-        Destructor to clean up resources when the client is garbage collected.
-        This will close the HTTP client and attempt to delete configs if cleanup wasn't called.
-        Note: It's recommended to use the async context manager or call cleanup() explicitly.
-        """
-        # Warn user if cleanup wasn't called
-        if self.settings:
-            warnings.warn(
-                "LivellmClient is being garbage collected without explicit cleanup. "
-                "Provider configs may not be deleted from the server. "
-                "Consider using 'async with' or calling 'await client.cleanup()' explicitly.",
-                ResourceWarning,
-                stacklevel=2
-            )
+    # def __del__(self):
+    #     """
+    #     Destructor to clean up resources when the client is garbage collected.
+    #     This will close the HTTP client and attempt to delete configs if cleanup wasn't called.
+    #     Note: It's recommended to use the async context manager or call cleanup() explicitly.
+    #     """
+    #     # Warn user if cleanup wasn't called
+    #     if self.settings:
+    #         warnings.warn(
+    #             "LivellmClient is being garbage collected without explicit cleanup. "
+    #             "Provider configs may not be deleted from the server. "
+    #             "Consider using 'async with' or calling 'await client.cleanup()' explicitly.",
+    #             ResourceWarning,
+    #             stacklevel=2
+    #         )
-        # Close the httpx client synchronously
-        # httpx.AsyncClient stores a sync Transport that needs cleanup
-        try:
-            with httpx.Client(base_url=self.base_url) as client:
-                for config in self.settings:
-                    config: Settings = config
-                    client.delete("providers/config/{config.uid}", headers=self.headers)
-        except Exception:
-            # Silently fail - we're in a destructor
-            pass
+    #     # Close the httpx client synchronously
+    #     # httpx.AsyncClient stores a sync Transport that needs cleanup
+    #     try:
+    #         with httpx.Client(base_url=self.base_url) as client:
+    #             for config in self.settings:
+    #                 config: Settings = config
+    #                 client.delete(f"providers/config/{config.uid}", headers=self.headers)
+    #     except Exception:
+    #         # Silently fail - we're in a destructor
+    #         pass
     # Implement abstract methods from BaseLivellmClient

{livellm-1.5.4 → livellm-1.6.1}/livellm/models/__init__.py RENAMED Viewed

@@ -3,6 +3,7 @@ from .fallback import AgentFallbackRequest, AudioFallbackRequest, TranscribeFall
 from .agent.agent import AgentRequest, AgentResponse, AgentResponseUsage
 from .agent.chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
 from .agent.tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
+from .agent.output_schema import OutputSchema, PropertyDef
 from .audio.speak import SpeakMimeType, SpeakRequest, SpeakStreamResponse
 from .audio.transcribe import TranscribeRequest, TranscribeResponse, File
 from .transcription import TranscriptionInitWsRequest, TranscriptionAudioChunkWsRequest, TranscriptionWsResponse
@@ -34,6 +35,8 @@ __all__ = [
     "ToolKind",
     "WebSearchInput",
     "MCPStreamableServerInput",
+    "OutputSchema",
+    "PropertyDef",
     # Audio
     "SpeakMimeType",
     "SpeakRequest",

{livellm-1.5.4 → livellm-1.6.1}/livellm/models/agent/__init__.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from .agent import AgentRequest, AgentResponse, AgentResponseUsage
 from .chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
 from .tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
+from .output_schema import OutputSchema, PropertyDef
 __all__ = [
@@ -18,4 +19,6 @@ __all__ = [
     "ToolKind",
     "WebSearchInput",
     "MCPStreamableServerInput",
+    "OutputSchema",
+    "PropertyDef",
 ]

{livellm-1.5.4 → livellm-1.6.1}/livellm/models/agent/agent.py RENAMED Viewed

@@ -1,9 +1,10 @@
 # models for full run: AgentRequest, AgentResponse
-from pydantic import BaseModel, Field, field_validator
-from typing import Optional, List, Union
+from pydantic import BaseModel, Field
+from typing import Optional, List, Union, Any, Dict
 from .chat import TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
 from .tools import WebSearchInput, MCPStreamableServerInput
+from .output_schema import OutputSchema, PropertyDef
 from ..common import BaseRequest
@@ -13,12 +14,13 @@ class AgentRequest(BaseRequest):
     tools: List[Union[WebSearchInput, MCPStreamableServerInput]] = Field(default_factory=list, description="The tools to use")
     gen_config: Optional[dict] = Field(default=None, description="The configuration for the generation")
     include_history: bool = Field(default=False, description="Whether to include full conversation history in the response")
+    output_schema: Optional[Union[OutputSchema, Dict[str, Any]]] = Field(default=None, description="JSON schema for structured output. Can be an OutputSchema, a dict representing a JSON schema, or will be converted from a Pydantic BaseModel.")
 class AgentResponseUsage(BaseModel):
     input_tokens: int = Field(..., description="The number of input tokens used")
     output_tokens: int = Field(..., description="The number of output tokens used")
 class AgentResponse(BaseModel):
-    output: str = Field(..., description="The output of the response")
+    output: str = Field(..., description="The output of the response (JSON string when using output_schema)")
     usage: AgentResponseUsage = Field(..., description="The usage of the response")
-    history: Optional[List[Union[TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage]]] = Field(default=None, description="Full conversation history including tool calls and returns (only included when include_history=true)")
+    history: Optional[List[Union[TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage]]] = Field(default=None, description="Full conversation history including tool calls and returns (only included when include_history=true)")

livellm-1.6.1/livellm/models/agent/output_schema.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Output schema models for structured output support."""
+from pydantic import BaseModel, ConfigDict, Field
+from typing import Optional, List, Dict, Any, Union
+class PropertyDef(BaseModel):
+    """Definition of a property in the output schema."""
+    model_config = ConfigDict(extra="allow")
+    type: Union[str, List[str]] = Field(..., description="Property type: string, integer, number, boolean, array, object, null")
+    description: Optional[str] = Field(default=None, description="Description of the property")
+    enum: Optional[List[Any]] = Field(default=None, description="Allowed values for the property")
+    default: Optional[Any] = Field(default=None, description="Default value")
+    # String constraints
+    minLength: Optional[int] = Field(default=None, description="Minimum string length")
+    maxLength: Optional[int] = Field(default=None, description="Maximum string length")
+    pattern: Optional[str] = Field(default=None, description="Regex pattern for string validation")
+    # Number constraints
+    minimum: Optional[float] = Field(default=None, description="Minimum number value")
+    maximum: Optional[float] = Field(default=None, description="Maximum number value")
+    exclusiveMinimum: Optional[float] = Field(default=None, description="Exclusive minimum number value")
+    exclusiveMaximum: Optional[float] = Field(default=None, description="Exclusive maximum number value")
+    # Array constraints
+    items: Optional[Union["PropertyDef", Dict[str, Any]]] = Field(default=None, description="Schema for array items")
+    minItems: Optional[int] = Field(default=None, description="Minimum array length")
+    maxItems: Optional[int] = Field(default=None, description="Maximum array length")
+    uniqueItems: Optional[bool] = Field(default=None, description="Whether array items must be unique")
+    # Object constraints
+    properties: Optional[Dict[str, Union["PropertyDef", Dict[str, Any]]]] = Field(default=None, description="Nested object properties")
+    required: Optional[List[str]] = Field(default=None, description="Required properties for nested objects")
+    additionalProperties: Optional[Union[bool, "PropertyDef", Dict[str, Any]]] = Field(default=None, description="Schema for additional properties")
+class OutputSchema(BaseModel):
+    """
+    Schema definition for structured output.
+    This model represents a JSON Schema that the AI model must follow when generating responses.
+    When provided, the agent will return a JSON string matching the specified schema.
+    Example:
+        schema = OutputSchema(
+            title="Person",
+            description="A person's information",
+            properties={
+                "name": PropertyDef(type="string", description="The person's name"),
+                "age": PropertyDef(type="integer", minimum=0, maximum=150),
+            },
+            required=["name", "age"]
+        )
+    """
+    model_config = ConfigDict(extra="allow")
+    title: str = Field(..., description="Name of the schema, used as the output tool name")
+    description: Optional[str] = Field(default=None, description="Description to help the model understand what to output")
+    properties: Dict[str, Union[PropertyDef, Dict[str, Any]]] = Field(..., description="Dictionary of property definitions")
+    required: Optional[List[str]] = Field(default=None, description="List of required property names")
+    additionalProperties: Optional[Union[bool, PropertyDef, Dict[str, Any]]] = Field(default=None, description="Whether extra properties are allowed")
+    @classmethod
+    def from_pydantic(cls, model: type[BaseModel]) -> "OutputSchema":
+        """
+        Create an OutputSchema from a Pydantic BaseModel class.
+        Args:
+            model: A Pydantic BaseModel class to convert to OutputSchema.
+        Returns:
+            An OutputSchema instance representing the model's schema.
+        Example:
+            class Person(BaseModel):
+                name: str
+                age: int
+            schema = OutputSchema.from_pydantic(Person)
+        """
+        json_schema = model.model_json_schema()
+        # Extract the main properties
+        title = json_schema.get("title", model.__name__)
+        description = json_schema.get("description")
+        properties = json_schema.get("properties", {})
+        required = json_schema.get("required")
+        # Handle $defs for nested models (Pydantic generates these for complex models)
+        defs = json_schema.get("$defs", {})
+        if defs:
+            # Inline the definitions into properties
+            properties = cls._resolve_refs(properties, defs)
+        return cls(
+            title=title,
+            description=description,
+            properties=properties,
+            required=required,
+        )
+    @classmethod
+    def _resolve_refs(cls, obj: Any, defs: Dict[str, Any]) -> Any:
+        """Recursively resolve $ref references in the schema."""
+        if isinstance(obj, dict):
+            if "$ref" in obj:
+                ref_path = obj["$ref"]
+                # Extract the definition name from "#/$defs/ModelName"
+                if ref_path.startswith("#/$defs/"):
+                    def_name = ref_path[len("#/$defs/"):]
+                    if def_name in defs:
+                        # Return the resolved definition (also resolve any nested refs)
+                        return cls._resolve_refs(defs[def_name], defs)
+                return obj
+            else:
+                return {k: cls._resolve_refs(v, defs) for k, v in obj.items()}
+        elif isinstance(obj, list):
+            return [cls._resolve_refs(item, defs) for item in obj]
+        else:
+            return obj

{livellm-1.5.4 → livellm-1.6.1}/livellm/models/transcription.py RENAMED Viewed

@@ -2,6 +2,7 @@ from pydantic import BaseModel, Field, field_validator
 from livellm.models.audio.speak import SpeakMimeType
 from typing import Optional
 import base64
+from datetime import datetime
 class TranscriptionInitWsRequest(BaseModel):
     provider_uid: str = Field(..., description="The provider uid")
@@ -33,3 +34,4 @@ class TranscriptionAudioChunkWsRequest(BaseModel):
 class TranscriptionWsResponse(BaseModel):
     transcription: str = Field(..., description="The transcription")
+    received_at: datetime = Field(default_factory=datetime.now, description="The datetime when the transcription was received")

{livellm-1.5.4 → livellm-1.6.1}/livellm/transcripton.py RENAMED Viewed

@@ -47,7 +47,7 @@ class TranscriptionWsClient:
         self,
         request: TranscriptionInitWsRequest,
         source: AsyncIterator[TranscriptionAudioChunkWsRequest]
-    ) -> AsyncIterator[TranscriptionWsResponse]:
+    ) -> AsyncIterator[list[TranscriptionWsResponse]]:
         """
         Start a transcription session.
@@ -56,7 +56,10 @@ class TranscriptionWsClient:
             source: An async iterator that yields audio chunks to transcribe.
         Returns:
-            An async iterator of transcription session responses.
+            An async iterator that yields lists of transcription responses.
+            Each list contains all responses that accumulated since the last yield,
+            ordered from oldest to newest (last element is the most recent).
+            This prevents slow processing from stalling the entire loop.
         Example:
             ```python
@@ -66,8 +69,14 @@ class TranscriptionWsClient:
                         yield TranscriptionAudioChunkWsRequest(audio=chunk)
             async with TranscriptionWsClient(url) as client:
-                async for response in client.start_session(init_request, audio_source()):
-                    print(response.transcription)
+                async for responses in client.start_session(init_request, audio_source()):
+                    # responses is a list, newest transcription is last
+                    latest = responses[-1]
+                    print(f"Latest: {latest.transcription}")
+                    # Process all transcriptions if needed
+                    for resp in responses:
+                        print(resp.transcription)
             ```
         """
         # Send initialization request as JSON
@@ -79,6 +88,10 @@ class TranscriptionWsClient:
         if not init_response.success:
             raise Exception(f"Failed to start transcription session: {init_response.error}")
+        # Queue to collect incoming transcription responses
+        response_queue: asyncio.Queue[TranscriptionWsResponse | None] = asyncio.Queue()
+        receiver_done = False
         # Start sending audio chunks in background
         async def send_chunks():
             try:
@@ -93,23 +106,52 @@ class TranscriptionWsClient:
                 await self.websocket.close()
                 raise e
+        # Receive transcription responses in background
+        async def receive_responses():
+            nonlocal receiver_done
+            try:
+                while True:
+                    try:
+                        response_data = await self.websocket.recv()
+                        transcription_response = TranscriptionWsResponse(**json.loads(response_data))
+                        await response_queue.put(transcription_response)
+                    except websockets.ConnectionClosed:
+                        break
+            finally:
+                receiver_done = True
+                await response_queue.put(None)  # Signal end of stream
         send_task = asyncio.create_task(send_chunks())
+        receive_task = asyncio.create_task(receive_responses())
-        # Receive transcription responses
         try:
-            while True:
-                try:
-                    response_data = await self.websocket.recv()
-                    transcription_response = TranscriptionWsResponse(**json.loads(response_data))
-                    yield transcription_response
-                except websockets.ConnectionClosed:
-                    # Connection closed, stop receiving
+            while True:
+                # Wait for at least one response
+                first_response = await response_queue.get()
+                if first_response is None:
+                    # End of stream
                     break
+                # Collect all additional responses that have accumulated (non-blocking)
+                responses = [first_response]
+                while True:
+                    try:
+                        additional = response_queue.get_nowait()
+                        if additional is None:
+                            # End of stream, yield what we have and exit
+                            yield responses
+                            return
+                        responses.append(additional)
+                    except asyncio.QueueEmpty:
+                        break
+                yield responses
         finally:
-            # Cancel the send task if still running
-            if not send_task.done():
-                send_task.cancel()
-                try:
-                    await send_task
-                except asyncio.CancelledError:
-                    pass
+            # Cancel tasks if still running
+            for task in [send_task, receive_task]:
+                if not task.done():
+                    task.cancel()
+                    try:
+                        await task
+                    except asyncio.CancelledError:
+                        pass

{livellm-1.5.4 → livellm-1.6.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "livellm"
-version = "1.5.4"
+version = "1.6.1"
 description = "Python client for the LiveLLM Server"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -24,8 +24,8 @@ classifiers = [
     "Typing :: Typed",
 ]
-[project.optional-dependencies]
-testing = [
+[dependency-groups]
+dev = [
     "pytest>=8.4.2",
     "pytest-asyncio>=0.21.0",
     "pytest-cov>=4.1.0"