livellm 1.5.4__py3-none-any.whl → 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
livellm/livellm.py CHANGED
@@ -3,9 +3,10 @@ import asyncio
3
3
  import httpx
4
4
  import json
5
5
  import warnings
6
- from typing import List, Optional, AsyncIterator, Union, overload, Dict
6
+ from typing import List, Optional, AsyncIterator, Union, overload, Dict, Any, Type
7
7
  from .models.common import Settings, SuccessResponse
8
8
  from .models.agent.agent import AgentRequest, AgentResponse
9
+ from .models.agent.output_schema import OutputSchema
9
10
  from .models.audio.speak import SpeakRequest, EncodedSpeakResponse
10
11
  from .models.audio.transcribe import TranscribeRequest, TranscribeResponse, File
11
12
  from .models.fallback import AgentFallbackRequest, AudioFallbackRequest, TranscribeFallbackRequest
@@ -15,10 +16,19 @@ from .transcripton import TranscriptionWsClient
15
16
  from uuid import uuid4
16
17
  import logging
17
18
  from abc import ABC, abstractmethod
19
+ from importlib.metadata import version, PackageNotFoundError
20
+ from pydantic import BaseModel
18
21
 
19
22
 
20
23
  logger = logging.getLogger(__name__)
21
24
 
25
+ try:
26
+ __version__ = version("livellm")
27
+ except PackageNotFoundError:
28
+ __version__ = "unknown"
29
+
30
+ DEFAULT_USER_AGENT = f"livellm-python/{__version__}"
31
+
22
32
  class BaseLivellmClient(ABC):
23
33
 
24
34
  @overload
@@ -37,6 +47,7 @@ class BaseLivellmClient(ABC):
37
47
  messages: list,
38
48
  tools: Optional[list] = None,
39
49
  include_history: bool = False,
50
+ output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
40
51
  **kwargs
41
52
  ) -> AgentResponse:
42
53
  ...
@@ -55,6 +66,7 @@ class BaseLivellmClient(ABC):
55
66
  messages: Optional[list] = None,
56
67
  tools: Optional[list] = None,
57
68
  include_history: bool = False,
69
+ output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
58
70
  **kwargs
59
71
  ) -> AgentResponse:
60
72
  """
@@ -72,7 +84,8 @@ class BaseLivellmClient(ABC):
72
84
  model="gpt-4",
73
85
  messages=[TextMessage(...)],
74
86
  tools=[],
75
- include_history=False
87
+ include_history=False,
88
+ output_schema=MyPydanticModel # or OutputSchema(...) or dict
76
89
  )
77
90
 
78
91
  Args:
@@ -83,9 +96,14 @@ class BaseLivellmClient(ABC):
83
96
  tools: Optional list of tools
84
97
  gen_config: Optional generation configuration
85
98
  include_history: Whether to include full conversation history in the response
99
+ output_schema: Optional schema for structured output. Can be:
100
+ - An OutputSchema instance
101
+ - A dict representing a JSON schema
102
+ - A Pydantic BaseModel class (will be converted to OutputSchema)
86
103
 
87
104
  Returns:
88
- AgentResponse with the agent's output
105
+ AgentResponse with the agent's output. If output_schema was provided,
106
+ the output will be a JSON string matching the schema.
89
107
  """
90
108
  # Check if first argument is a request object
91
109
  if request is not None:
@@ -102,16 +120,39 @@ class BaseLivellmClient(ABC):
102
120
  "Alternatively, pass an AgentRequest object as the first positional argument."
103
121
  )
104
122
 
123
+ # Convert output_schema if it's a Pydantic BaseModel class
124
+ resolved_schema = self._resolve_output_schema(output_schema)
125
+
105
126
  agent_request = AgentRequest(
106
127
  provider_uid=provider_uid,
107
128
  model=model,
108
129
  messages=messages,
109
130
  tools=tools or [],
110
131
  gen_config=kwargs or None,
111
- include_history=include_history
132
+ include_history=include_history,
133
+ output_schema=resolved_schema
112
134
  )
113
135
  return await self.handle_agent_run(agent_request)
114
136
 
137
+ def _resolve_output_schema(
138
+ self,
139
+ output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]]
140
+ ) -> Optional[Union[OutputSchema, Dict[str, Any]]]:
141
+ """
142
+ Resolve the output_schema parameter to an OutputSchema or dict.
143
+
144
+ If a Pydantic BaseModel class is provided, convert it to OutputSchema.
145
+ """
146
+ if output_schema is None:
147
+ return None
148
+
149
+ # Check if it's a class (not an instance) that's a subclass of BaseModel
150
+ if isinstance(output_schema, type) and issubclass(output_schema, BaseModel):
151
+ return OutputSchema.from_pydantic(output_schema)
152
+
153
+ # Already an OutputSchema or dict, return as-is
154
+ return output_schema
155
+
115
156
  @overload
116
157
  def agent_run_stream(
117
158
  self,
@@ -128,6 +169,7 @@ class BaseLivellmClient(ABC):
128
169
  messages: list,
129
170
  tools: Optional[list] = None,
130
171
  include_history: bool = False,
172
+ output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
131
173
  **kwargs
132
174
  ) -> AsyncIterator[AgentResponse]:
133
175
  ...
@@ -146,6 +188,7 @@ class BaseLivellmClient(ABC):
146
188
  messages: Optional[list] = None,
147
189
  tools: Optional[list] = None,
148
190
  include_history: bool = False,
191
+ output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
149
192
  **kwargs
150
193
  ) -> AsyncIterator[AgentResponse]:
151
194
  """
@@ -165,7 +208,8 @@ class BaseLivellmClient(ABC):
165
208
  model="gpt-4",
166
209
  messages=[TextMessage(...)],
167
210
  tools=[],
168
- include_history=False
211
+ include_history=False,
212
+ output_schema=MyPydanticModel # or OutputSchema(...) or dict
169
213
  ):
170
214
  ...
171
215
 
@@ -177,9 +221,14 @@ class BaseLivellmClient(ABC):
177
221
  tools: Optional list of tools
178
222
  gen_config: Optional generation configuration
179
223
  include_history: Whether to include full conversation history in the response
224
+ output_schema: Optional schema for structured output. Can be:
225
+ - An OutputSchema instance
226
+ - A dict representing a JSON schema
227
+ - A Pydantic BaseModel class (will be converted to OutputSchema)
180
228
 
181
229
  Returns:
182
- AsyncIterator of AgentResponse chunks
230
+ AsyncIterator of AgentResponse chunks. If output_schema was provided,
231
+ the output will be a JSON string matching the schema.
183
232
  """
184
233
  # Check if first argument is a request object
185
234
  if request is not None:
@@ -196,13 +245,17 @@ class BaseLivellmClient(ABC):
196
245
  "Alternatively, pass an AgentRequest object as the first positional argument."
197
246
  )
198
247
 
248
+ # Convert output_schema if it's a Pydantic BaseModel class
249
+ resolved_schema = self._resolve_output_schema(output_schema)
250
+
199
251
  agent_request = AgentRequest(
200
252
  provider_uid=provider_uid,
201
253
  model=model,
202
254
  messages=messages,
203
255
  tools=tools or [],
204
256
  gen_config=kwargs or None,
205
- include_history=include_history
257
+ include_history=include_history,
258
+ output_schema=resolved_schema
206
259
  )
207
260
  stream = self.handle_agent_run_stream(agent_request)
208
261
 
@@ -505,7 +558,8 @@ class LivellmWsClient(BaseLivellmClient):
505
558
 
506
559
  def __init__(
507
560
  self,
508
- base_url: str,
561
+ base_url: str,
562
+ user_agent: Optional[str] = None,
509
563
  timeout: Optional[float] = None,
510
564
  max_size: Optional[int] = None,
511
565
  max_buffer_size: Optional[int] = None
@@ -523,6 +577,7 @@ class LivellmWsClient(BaseLivellmClient):
523
577
  self._ws_root_base_url = ws_url
524
578
  self.base_url = f"{ws_url}/livellm/ws"
525
579
  self.timeout = timeout
580
+ self.user_agent = user_agent or DEFAULT_USER_AGENT
526
581
  self.websocket = None
527
582
  self.sessions: Dict[str, asyncio.Queue] = {}
528
583
  self.max_buffer_size = max_buffer_size or 0 # None means unlimited buffer size
@@ -541,7 +596,8 @@ class LivellmWsClient(BaseLivellmClient):
541
596
  self.base_url,
542
597
  open_timeout=self.timeout,
543
598
  close_timeout=self.timeout,
544
- max_size=self.max_size
599
+ max_size=self.max_size,
600
+ additional_headers={"User-Agent": self.user_agent}
545
601
  )
546
602
  self.__listen_for_responses_task = asyncio.create_task(self.listen_for_responses())
547
603
 
@@ -680,7 +736,8 @@ class LivellmClient(BaseLivellmClient):
680
736
 
681
737
  def __init__(
682
738
  self,
683
- base_url: str,
739
+ base_url: str,
740
+ user_agent: Optional[str] = None,
684
741
  timeout: Optional[float] = None,
685
742
  configs: Optional[List[Settings]] = None
686
743
  ):
@@ -689,11 +746,13 @@ class LivellmClient(BaseLivellmClient):
689
746
  # HTTP API base URL for this client
690
747
  self.base_url = f"{self._root_base_url}/livellm"
691
748
  self.timeout = timeout
749
+ self.user_agent = user_agent or DEFAULT_USER_AGENT
692
750
  self.client = httpx.AsyncClient(base_url=self.base_url, timeout=self.timeout) \
693
751
  if self.timeout else httpx.AsyncClient(base_url=self.base_url)
694
752
  self.settings = []
695
753
  self.headers = {
696
754
  "Content-Type": "application/json",
755
+ "User-Agent": self.user_agent,
697
756
  }
698
757
  # Lazily-created realtime (WebSocket) client
699
758
  self._realtime = None
@@ -713,7 +772,7 @@ class LivellmClient(BaseLivellmClient):
713
772
  """
714
773
  if self._realtime is None:
715
774
  # Pass the same root base URL; LivellmWsClient will handle ws/wss conversion.
716
- self._realtime = LivellmWsClient(self._root_base_url, timeout=self.timeout)
775
+ self._realtime = LivellmWsClient(self._root_base_url, user_agent=self.user_agent, timeout=self.timeout)
717
776
  return self._realtime
718
777
 
719
778
  def update_configs_post_init(self, configs: List[Settings]) -> SuccessResponse:
@@ -858,32 +917,32 @@ class LivellmClient(BaseLivellmClient):
858
917
  if self._realtime is not None:
859
918
  await self._realtime.disconnect()
860
919
 
861
- def __del__(self):
862
- """
863
- Destructor to clean up resources when the client is garbage collected.
864
- This will close the HTTP client and attempt to delete configs if cleanup wasn't called.
865
- Note: It's recommended to use the async context manager or call cleanup() explicitly.
866
- """
867
- # Warn user if cleanup wasn't called
868
- if self.settings:
869
- warnings.warn(
870
- "LivellmClient is being garbage collected without explicit cleanup. "
871
- "Provider configs may not be deleted from the server. "
872
- "Consider using 'async with' or calling 'await client.cleanup()' explicitly.",
873
- ResourceWarning,
874
- stacklevel=2
875
- )
920
+ # def __del__(self):
921
+ # """
922
+ # Destructor to clean up resources when the client is garbage collected.
923
+ # This will close the HTTP client and attempt to delete configs if cleanup wasn't called.
924
+ # Note: It's recommended to use the async context manager or call cleanup() explicitly.
925
+ # """
926
+ # # Warn user if cleanup wasn't called
927
+ # if self.settings:
928
+ # warnings.warn(
929
+ # "LivellmClient is being garbage collected without explicit cleanup. "
930
+ # "Provider configs may not be deleted from the server. "
931
+ # "Consider using 'async with' or calling 'await client.cleanup()' explicitly.",
932
+ # ResourceWarning,
933
+ # stacklevel=2
934
+ # )
876
935
 
877
- # Close the httpx client synchronously
878
- # httpx.AsyncClient stores a sync Transport that needs cleanup
879
- try:
880
- with httpx.Client(base_url=self.base_url) as client:
881
- for config in self.settings:
882
- config: Settings = config
883
- client.delete("providers/config/{config.uid}", headers=self.headers)
884
- except Exception:
885
- # Silently fail - we're in a destructor
886
- pass
936
+ # # Close the httpx client synchronously
937
+ # # httpx.AsyncClient stores a sync Transport that needs cleanup
938
+ # try:
939
+ # with httpx.Client(base_url=self.base_url) as client:
940
+ # for config in self.settings:
941
+ # config: Settings = config
942
+ # client.delete(f"providers/config/{config.uid}", headers=self.headers)
943
+ # except Exception:
944
+ # # Silently fail - we're in a destructor
945
+ # pass
887
946
 
888
947
  # Implement abstract methods from BaseLivellmClient
889
948
 
@@ -3,6 +3,7 @@ from .fallback import AgentFallbackRequest, AudioFallbackRequest, TranscribeFall
3
3
  from .agent.agent import AgentRequest, AgentResponse, AgentResponseUsage
4
4
  from .agent.chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
5
5
  from .agent.tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
6
+ from .agent.output_schema import OutputSchema, PropertyDef
6
7
  from .audio.speak import SpeakMimeType, SpeakRequest, SpeakStreamResponse
7
8
  from .audio.transcribe import TranscribeRequest, TranscribeResponse, File
8
9
  from .transcription import TranscriptionInitWsRequest, TranscriptionAudioChunkWsRequest, TranscriptionWsResponse
@@ -34,6 +35,8 @@ __all__ = [
34
35
  "ToolKind",
35
36
  "WebSearchInput",
36
37
  "MCPStreamableServerInput",
38
+ "OutputSchema",
39
+ "PropertyDef",
37
40
  # Audio
38
41
  "SpeakMimeType",
39
42
  "SpeakRequest",
@@ -1,6 +1,7 @@
1
1
  from .agent import AgentRequest, AgentResponse, AgentResponseUsage
2
2
  from .chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
3
3
  from .tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
4
+ from .output_schema import OutputSchema, PropertyDef
4
5
 
5
6
 
6
7
  __all__ = [
@@ -18,4 +19,6 @@ __all__ = [
18
19
  "ToolKind",
19
20
  "WebSearchInput",
20
21
  "MCPStreamableServerInput",
22
+ "OutputSchema",
23
+ "PropertyDef",
21
24
  ]
@@ -1,9 +1,10 @@
1
1
  # models for full run: AgentRequest, AgentResponse
2
2
 
3
- from pydantic import BaseModel, Field, field_validator
4
- from typing import Optional, List, Union
3
+ from pydantic import BaseModel, Field
4
+ from typing import Optional, List, Union, Any, Dict
5
5
  from .chat import TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
6
6
  from .tools import WebSearchInput, MCPStreamableServerInput
7
+ from .output_schema import OutputSchema, PropertyDef
7
8
  from ..common import BaseRequest
8
9
 
9
10
 
@@ -13,12 +14,13 @@ class AgentRequest(BaseRequest):
13
14
  tools: List[Union[WebSearchInput, MCPStreamableServerInput]] = Field(default_factory=list, description="The tools to use")
14
15
  gen_config: Optional[dict] = Field(default=None, description="The configuration for the generation")
15
16
  include_history: bool = Field(default=False, description="Whether to include full conversation history in the response")
17
+ output_schema: Optional[Union[OutputSchema, Dict[str, Any]]] = Field(default=None, description="JSON schema for structured output. Can be an OutputSchema, a dict representing a JSON schema, or will be converted from a Pydantic BaseModel.")
16
18
 
17
19
  class AgentResponseUsage(BaseModel):
18
20
  input_tokens: int = Field(..., description="The number of input tokens used")
19
21
  output_tokens: int = Field(..., description="The number of output tokens used")
20
22
 
21
23
  class AgentResponse(BaseModel):
22
- output: str = Field(..., description="The output of the response")
24
+ output: str = Field(..., description="The output of the response (JSON string when using output_schema)")
23
25
  usage: AgentResponseUsage = Field(..., description="The usage of the response")
24
- history: Optional[List[Union[TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage]]] = Field(default=None, description="Full conversation history including tool calls and returns (only included when include_history=true)")
26
+ history: Optional[List[Union[TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage]]] = Field(default=None, description="Full conversation history including tool calls and returns (only included when include_history=true)")
@@ -0,0 +1,120 @@
1
+ """Output schema models for structured output support."""
2
+
3
+ from pydantic import BaseModel, ConfigDict, Field
4
+ from typing import Optional, List, Dict, Any, Union
5
+
6
+
7
+ class PropertyDef(BaseModel):
8
+ """Definition of a property in the output schema."""
9
+ model_config = ConfigDict(extra="allow")
10
+
11
+ type: Union[str, List[str]] = Field(..., description="Property type: string, integer, number, boolean, array, object, null")
12
+ description: Optional[str] = Field(default=None, description="Description of the property")
13
+ enum: Optional[List[Any]] = Field(default=None, description="Allowed values for the property")
14
+ default: Optional[Any] = Field(default=None, description="Default value")
15
+ # String constraints
16
+ minLength: Optional[int] = Field(default=None, description="Minimum string length")
17
+ maxLength: Optional[int] = Field(default=None, description="Maximum string length")
18
+ pattern: Optional[str] = Field(default=None, description="Regex pattern for string validation")
19
+ # Number constraints
20
+ minimum: Optional[float] = Field(default=None, description="Minimum number value")
21
+ maximum: Optional[float] = Field(default=None, description="Maximum number value")
22
+ exclusiveMinimum: Optional[float] = Field(default=None, description="Exclusive minimum number value")
23
+ exclusiveMaximum: Optional[float] = Field(default=None, description="Exclusive maximum number value")
24
+ # Array constraints
25
+ items: Optional[Union["PropertyDef", Dict[str, Any]]] = Field(default=None, description="Schema for array items")
26
+ minItems: Optional[int] = Field(default=None, description="Minimum array length")
27
+ maxItems: Optional[int] = Field(default=None, description="Maximum array length")
28
+ uniqueItems: Optional[bool] = Field(default=None, description="Whether array items must be unique")
29
+ # Object constraints
30
+ properties: Optional[Dict[str, Union["PropertyDef", Dict[str, Any]]]] = Field(default=None, description="Nested object properties")
31
+ required: Optional[List[str]] = Field(default=None, description="Required properties for nested objects")
32
+ additionalProperties: Optional[Union[bool, "PropertyDef", Dict[str, Any]]] = Field(default=None, description="Schema for additional properties")
33
+
34
+
35
+ class OutputSchema(BaseModel):
36
+ """
37
+ Schema definition for structured output.
38
+
39
+ This model represents a JSON Schema that the AI model must follow when generating responses.
40
+ When provided, the agent will return a JSON string matching the specified schema.
41
+
42
+ Example:
43
+ schema = OutputSchema(
44
+ title="Person",
45
+ description="A person's information",
46
+ properties={
47
+ "name": PropertyDef(type="string", description="The person's name"),
48
+ "age": PropertyDef(type="integer", minimum=0, maximum=150),
49
+ },
50
+ required=["name", "age"]
51
+ )
52
+ """
53
+ model_config = ConfigDict(extra="allow")
54
+
55
+ title: str = Field(..., description="Name of the schema, used as the output tool name")
56
+ description: Optional[str] = Field(default=None, description="Description to help the model understand what to output")
57
+ properties: Dict[str, Union[PropertyDef, Dict[str, Any]]] = Field(..., description="Dictionary of property definitions")
58
+ required: Optional[List[str]] = Field(default=None, description="List of required property names")
59
+ additionalProperties: Optional[Union[bool, PropertyDef, Dict[str, Any]]] = Field(default=None, description="Whether extra properties are allowed")
60
+
61
+ @classmethod
62
+ def from_pydantic(cls, model: type[BaseModel]) -> "OutputSchema":
63
+ """
64
+ Create an OutputSchema from a Pydantic BaseModel class.
65
+
66
+ Args:
67
+ model: A Pydantic BaseModel class to convert to OutputSchema.
68
+
69
+ Returns:
70
+ An OutputSchema instance representing the model's schema.
71
+
72
+ Example:
73
+ class Person(BaseModel):
74
+ name: str
75
+ age: int
76
+
77
+ schema = OutputSchema.from_pydantic(Person)
78
+ """
79
+ json_schema = model.model_json_schema()
80
+
81
+ # Extract the main properties
82
+ title = json_schema.get("title", model.__name__)
83
+ description = json_schema.get("description")
84
+ properties = json_schema.get("properties", {})
85
+ required = json_schema.get("required")
86
+
87
+ # Handle $defs for nested models (Pydantic generates these for complex models)
88
+ defs = json_schema.get("$defs", {})
89
+ if defs:
90
+ # Inline the definitions into properties
91
+ properties = cls._resolve_refs(properties, defs)
92
+
93
+ return cls(
94
+ title=title,
95
+ description=description,
96
+ properties=properties,
97
+ required=required,
98
+ )
99
+
100
+ @classmethod
101
+ def _resolve_refs(cls, obj: Any, defs: Dict[str, Any]) -> Any:
102
+ """Recursively resolve $ref references in the schema."""
103
+ if isinstance(obj, dict):
104
+ if "$ref" in obj:
105
+ ref_path = obj["$ref"]
106
+ # Extract the definition name from "#/$defs/ModelName"
107
+ if ref_path.startswith("#/$defs/"):
108
+ def_name = ref_path[len("#/$defs/"):]
109
+ if def_name in defs:
110
+ # Return the resolved definition (also resolve any nested refs)
111
+ return cls._resolve_refs(defs[def_name], defs)
112
+ return obj
113
+ else:
114
+ return {k: cls._resolve_refs(v, defs) for k, v in obj.items()}
115
+ elif isinstance(obj, list):
116
+ return [cls._resolve_refs(item, defs) for item in obj]
117
+ else:
118
+ return obj
119
+
120
+
@@ -2,6 +2,7 @@ from pydantic import BaseModel, Field, field_validator
2
2
  from livellm.models.audio.speak import SpeakMimeType
3
3
  from typing import Optional
4
4
  import base64
5
+ from datetime import datetime
5
6
 
6
7
  class TranscriptionInitWsRequest(BaseModel):
7
8
  provider_uid: str = Field(..., description="The provider uid")
@@ -33,3 +34,4 @@ class TranscriptionAudioChunkWsRequest(BaseModel):
33
34
 
34
35
  class TranscriptionWsResponse(BaseModel):
35
36
  transcription: str = Field(..., description="The transcription")
37
+ received_at: datetime = Field(default_factory=datetime.now, description="The datetime when the transcription was received")
livellm/transcripton.py CHANGED
@@ -47,7 +47,7 @@ class TranscriptionWsClient:
47
47
  self,
48
48
  request: TranscriptionInitWsRequest,
49
49
  source: AsyncIterator[TranscriptionAudioChunkWsRequest]
50
- ) -> AsyncIterator[TranscriptionWsResponse]:
50
+ ) -> AsyncIterator[list[TranscriptionWsResponse]]:
51
51
  """
52
52
  Start a transcription session.
53
53
 
@@ -56,7 +56,10 @@ class TranscriptionWsClient:
56
56
  source: An async iterator that yields audio chunks to transcribe.
57
57
 
58
58
  Returns:
59
- An async iterator of transcription session responses.
59
+ An async iterator that yields lists of transcription responses.
60
+ Each list contains all responses that accumulated since the last yield,
61
+ ordered from oldest to newest (last element is the most recent).
62
+ This prevents slow processing from stalling the entire loop.
60
63
 
61
64
  Example:
62
65
  ```python
@@ -66,8 +69,14 @@ class TranscriptionWsClient:
66
69
  yield TranscriptionAudioChunkWsRequest(audio=chunk)
67
70
 
68
71
  async with TranscriptionWsClient(url) as client:
69
- async for response in client.start_session(init_request, audio_source()):
70
- print(response.transcription)
72
+ async for responses in client.start_session(init_request, audio_source()):
73
+ # responses is a list, newest transcription is last
74
+ latest = responses[-1]
75
+ print(f"Latest: {latest.transcription}")
76
+
77
+ # Process all transcriptions if needed
78
+ for resp in responses:
79
+ print(resp.transcription)
71
80
  ```
72
81
  """
73
82
  # Send initialization request as JSON
@@ -79,6 +88,10 @@ class TranscriptionWsClient:
79
88
  if not init_response.success:
80
89
  raise Exception(f"Failed to start transcription session: {init_response.error}")
81
90
 
91
+ # Queue to collect incoming transcription responses
92
+ response_queue: asyncio.Queue[TranscriptionWsResponse | None] = asyncio.Queue()
93
+ receiver_done = False
94
+
82
95
  # Start sending audio chunks in background
83
96
  async def send_chunks():
84
97
  try:
@@ -93,23 +106,52 @@ class TranscriptionWsClient:
93
106
  await self.websocket.close()
94
107
  raise e
95
108
 
109
+ # Receive transcription responses in background
110
+ async def receive_responses():
111
+ nonlocal receiver_done
112
+ try:
113
+ while True:
114
+ try:
115
+ response_data = await self.websocket.recv()
116
+ transcription_response = TranscriptionWsResponse(**json.loads(response_data))
117
+ await response_queue.put(transcription_response)
118
+ except websockets.ConnectionClosed:
119
+ break
120
+ finally:
121
+ receiver_done = True
122
+ await response_queue.put(None) # Signal end of stream
123
+
96
124
  send_task = asyncio.create_task(send_chunks())
125
+ receive_task = asyncio.create_task(receive_responses())
97
126
 
98
- # Receive transcription responses
99
127
  try:
100
- while True:
101
- try:
102
- response_data = await self.websocket.recv()
103
- transcription_response = TranscriptionWsResponse(**json.loads(response_data))
104
- yield transcription_response
105
- except websockets.ConnectionClosed:
106
- # Connection closed, stop receiving
128
+ while True:
129
+ # Wait for at least one response
130
+ first_response = await response_queue.get()
131
+ if first_response is None:
132
+ # End of stream
107
133
  break
134
+
135
+ # Collect all additional responses that have accumulated (non-blocking)
136
+ responses = [first_response]
137
+ while True:
138
+ try:
139
+ additional = response_queue.get_nowait()
140
+ if additional is None:
141
+ # End of stream, yield what we have and exit
142
+ yield responses
143
+ return
144
+ responses.append(additional)
145
+ except asyncio.QueueEmpty:
146
+ break
147
+
148
+ yield responses
108
149
  finally:
109
- # Cancel the send task if still running
110
- if not send_task.done():
111
- send_task.cancel()
112
- try:
113
- await send_task
114
- except asyncio.CancelledError:
115
- pass
150
+ # Cancel tasks if still running
151
+ for task in [send_task, receive_task]:
152
+ if not task.done():
153
+ task.cancel()
154
+ try:
155
+ await task
156
+ except asyncio.CancelledError:
157
+ pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livellm
3
- Version: 1.5.4
3
+ Version: 1.6.1
4
4
  Summary: Python client for the LiveLLM Server
5
5
  Project-URL: Homepage, https://github.com/qalby-tech/livellm-client-py
6
6
  Project-URL: Repository, https://github.com/qalby-tech/livellm-client-py
@@ -19,10 +19,6 @@ Requires-Dist: httpx>=0.27.0
19
19
  Requires-Dist: pydantic>=2.0.0
20
20
  Requires-Dist: sounddevice>=0.5.3
21
21
  Requires-Dist: websockets>=15.0.1
22
- Provides-Extra: testing
23
- Requires-Dist: pytest-asyncio>=0.21.0; extra == 'testing'
24
- Requires-Dist: pytest-cov>=4.1.0; extra == 'testing'
25
- Requires-Dist: pytest>=8.4.2; extra == 'testing'
26
22
  Description-Content-Type: text/markdown
27
23
 
28
24
  # LiveLLM Python Client
@@ -39,6 +35,7 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
39
35
  - 🎯 **Multi-provider** - OpenAI, Google, Anthropic, Groq, ElevenLabs
40
36
  - 🔄 **Streaming** - Real-time streaming for agent and audio
41
37
  - 🛠️ **Flexible API** - Use request objects or keyword arguments
38
+ - 📋 **Structured Output** - Get validated JSON responses with schema support (Pydantic, OutputSchema, or dict)
42
39
  - 🎙️ **Audio services** - Text-to-speech and transcription
43
40
  - 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
44
41
  - ⚡ **Fallback strategies** - Sequential and parallel handling
@@ -302,6 +299,146 @@ if response.history:
302
299
  - Auditing and logging complete conversations
303
300
  - Building conversational UIs with full context visibility
304
301
 
302
+ #### Agent with Structured Output
303
+
304
+ Get structured JSON responses from the agent by providing an output schema. The agent will return a JSON string matching your schema in the `output` field.
305
+
306
+ **Three ways to define a schema:**
307
+
308
+ **1. Using Pydantic BaseModel (Recommended)**
309
+ ```python
310
+ import json
311
+ from pydantic import BaseModel
312
+ from livellm.models import TextMessage
313
+
314
+ class Person(BaseModel):
315
+ name: str
316
+ age: int
317
+ occupation: str
318
+
319
+ response = await client.agent_run(
320
+ provider_uid="openai",
321
+ model="gpt-4",
322
+ messages=[TextMessage(role="user", content="Extract info: John is a 28-year-old engineer")],
323
+ output_schema=Person # Pass the BaseModel class directly
324
+ )
325
+
326
+ # response.output is a JSON string: '{"name": "John", "age": 28, "occupation": "engineer"}'
327
+ print(type(response.output)) # <class 'str'>
328
+
329
+ # Parse the JSON string yourself if needed
330
+ data = json.loads(response.output)
331
+ print(f"Name: {data['name']}")
332
+ print(f"Age: {data['age']}")
333
+ print(f"Occupation: {data['occupation']}")
334
+
335
+ # Or validate with your Pydantic model
336
+ person = Person.model_validate_json(response.output)
337
+ print(f"Name: {person.name}")
338
+ ```
339
+
340
+ **2. Using OutputSchema**
341
+ ```python
342
+ from livellm.models import OutputSchema, PropertyDef, TextMessage
343
+
344
+ schema = OutputSchema(
345
+ title="Person",
346
+ description="A person's information",
347
+ properties={
348
+ "name": PropertyDef(type="string", description="The person's name"),
349
+ "age": PropertyDef(type="integer", minimum=0, maximum=150, description="Age in years"),
350
+ "email": PropertyDef(type="string", pattern="^[^@]+@[^@]+\\.[^@]+$", description="Email address"),
351
+ },
352
+ required=["name", "age", "email"]
353
+ )
354
+
355
+ response = await client.agent_run(
356
+ provider_uid="openai",
357
+ model="gpt-4",
358
+ messages=[TextMessage(role="user", content="Tell me about a person")],
359
+ output_schema=schema
360
+ )
361
+ ```
362
+
363
+ **3. Using a dictionary (JSON Schema)**
364
+ ```python
365
+ schema_dict = {
366
+ "title": "Person",
367
+ "type": "object",
368
+ "properties": {
369
+ "name": {"type": "string", "description": "The person's name"},
370
+ "age": {"type": "integer", "minimum": 0, "maximum": 150},
371
+ "email": {"type": "string", "pattern": "^[^@]+@[^@]+\\.[^@]+$"}
372
+ },
373
+ "required": ["name", "age", "email"]
374
+ }
375
+
376
+ response = await client.agent_run(
377
+ provider_uid="openai",
378
+ model="gpt-4",
379
+ messages=[TextMessage(role="user", content="Extract person info")],
380
+ output_schema=schema_dict
381
+ )
382
+ ```
383
+
384
+ **Complex nested schemas:**
385
+ ```python
386
+ from pydantic import BaseModel
387
+ from typing import List, Optional
388
+
389
+ class Address(BaseModel):
390
+ street: str
391
+ city: str
392
+ zip_code: str
393
+
394
+ class Person(BaseModel):
395
+ name: str
396
+ age: int
397
+ addresses: List[Address]
398
+ phone: Optional[str] = None
399
+
400
+ response = await client.agent_run(
401
+ provider_uid="openai",
402
+ model="gpt-4",
403
+ messages=[TextMessage(role="user", content="Extract person with addresses")],
404
+ output_schema=Person # Nested models are automatically resolved
405
+ )
406
+ ```
407
+
408
+ **With streaming:**
409
+ ```python
410
+ from pydantic import BaseModel
411
+
412
+ class Summary(BaseModel):
413
+ title: str
414
+ key_points: List[str]
415
+ word_count: int
416
+
417
+ stream = client.agent_run_stream(
418
+ provider_uid="openai",
419
+ model="gpt-4",
420
+ messages=[TextMessage(role="user", content="Summarize this article")],
421
+ output_schema=Summary
422
+ )
423
+
424
+ async for chunk in stream:
425
+ print(chunk.output, end="", flush=True)
426
+
427
+ # After streaming completes, parse the full JSON output
428
+ full_output = "".join([chunk.output async for chunk in stream])
429
+ data = json.loads(full_output)
430
+ ```
431
+
432
+ **Response fields:**
433
+ - `output` - The JSON string response matching your schema
434
+
435
+ **Use cases:**
436
+ - Data extraction and parsing
437
+ - API response formatting
438
+ - Structured data generation
439
+ - Type-safe responses
440
+ - Integration with type-checked code
441
+
305
442
  ### Audio Services
306
443
 
307
444
  #### Text-to-Speech
@@ -411,11 +548,17 @@ async def transcribe_live_direct():
411
548
  )
412
549
 
413
550
  # Stream audio and receive transcriptions
414
- async for response in client.start_session(init_request, audio_source()):
415
- print(f"Transcription: {response.transcription}")
416
- if response.is_end:
417
- print("Transcription complete!")
418
- break
551
+ # Each iteration yields a list of responses (oldest to newest)
552
+ async for responses in client.start_session(init_request, audio_source()):
553
+ # Get the latest transcription (last element)
554
+ latest = responses[-1]
555
+ print(f"Latest transcription: {latest.transcription}")
556
+
557
+ # Process all accumulated transcriptions if needed
558
+ if len(responses) > 1:
559
+ print(f" (received {len(responses)} chunks)")
560
+ for resp in responses:
561
+ print(f" - {resp.transcription}")
419
562
 
420
563
  asyncio.run(transcribe_live_direct())
421
564
  ```
@@ -453,25 +596,25 @@ async def transcribe_and_chat():
453
596
  gen_config={},
454
597
  )
455
598
 
456
- # Listen for transcriptions and, for each chunk, run an agent request
457
- async for resp in t_client.start_session(init_request, audio_source()):
458
- print("User said:", resp.transcription)
599
+ # Listen for transcriptions and, for each batch, run an agent request
600
+ # Each iteration yields a list of responses - newest is last
601
+ async for responses in t_client.start_session(init_request, audio_source()):
602
+ # Use the latest transcription for the agent
603
+ latest = responses[-1]
604
+ print("User said:", latest.transcription)
459
605
 
460
606
  # You can call agent_run (or speak, etc.) while the transcription stream is active
607
+ # Even if this is slow, transcriptions accumulate and won't stall the loop
461
608
  agent_response = await realtime.agent_run(
462
609
  provider_uid="openai",
463
610
  model="gpt-4",
464
611
  messages=[
465
- TextMessage(role="user", content=resp.transcription),
612
+ TextMessage(role="user", content=latest.transcription),
466
613
  ],
467
614
  temperature=0.7,
468
615
  )
469
616
  print("Agent:", agent_response.output)
470
617
 
471
- if resp.is_end:
472
- print("Transcription session complete")
473
- break
474
-
475
618
  asyncio.run(transcribe_and_chat())
476
619
  ```
477
620
 
@@ -586,7 +729,7 @@ response = await client.ping()
586
729
  **Real-Time Transcription (TranscriptionWsClient)**
587
730
  - `connect()` - Establish WebSocket connection
588
731
  - `disconnect()` - Close WebSocket connection
589
- - `start_session(init_request, audio_source)` - Start bidirectional streaming transcription
732
+ - `start_session(init_request, audio_source)` - Start bidirectional streaming transcription; yields `list[TranscriptionWsResponse]` (accumulated responses, newest last)
590
733
  - `async with client:` - Auto connection management (recommended)
591
734
 
592
735
  **Cleanup**
@@ -607,7 +750,7 @@ response = await client.ping()
607
750
  - `MessageRole` - `USER` | `MODEL` | `SYSTEM` | `TOOL_CALL` | `TOOL_RETURN` (or use strings)
608
751
 
609
752
  **Requests**
610
- - `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?)` - Set `include_history=True` to get full conversation
753
+ - `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output.
611
754
  - `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
612
755
  - `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
613
756
  - `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
@@ -617,15 +760,20 @@ response = await client.ping()
617
760
  - `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
618
761
  - `MCPStreamableServerInput(kind=ToolKind.MCP_STREAMABLE_SERVER, url, prefix?, timeout?)`
619
762
 
763
+ **Structured Output**
764
+ - `OutputSchema(title, description?, properties, required?, additionalProperties?)` - JSON Schema for structured output
765
+ - `PropertyDef(type, description?, enum?, default?, minLength?, maxLength?, pattern?, minimum?, maximum?, items?, ...)` - Property definition with validation constraints
766
+ - `OutputSchema.from_pydantic(model)` - Convert a Pydantic BaseModel class to OutputSchema
767
+
620
768
  **Fallback**
621
769
  - `AgentFallbackRequest(strategy, requests, timeout_per_request?)`
622
770
  - `AudioFallbackRequest(strategy, requests, timeout_per_request?)`
623
771
  - `FallbackStrategy` - `SEQUENTIAL` | `PARALLEL`
624
772
 
625
773
  **Responses**
626
- - `AgentResponse(output, usage{input_tokens, output_tokens}, history?)` - `history` included when `include_history=True`
774
+ - `AgentResponse(output, usage{input_tokens, output_tokens}, history?)` - `history` included when `include_history=True`. `output` is a JSON string when `output_schema` is provided.
627
775
  - `TranscribeResponse(text, language)`
628
- - `TranscriptionWsResponse(transcription, is_end)` - Real-time transcription result
776
+ - `TranscriptionWsResponse(transcription, received_at)` - Real-time transcription result; yielded as `list[TranscriptionWsResponse]` with newest last
629
777
 
630
778
  ## Error Handling
631
779
 
@@ -1,20 +1,21 @@
1
1
  livellm/__init__.py,sha256=p2Szx7PELGYi-PTnSNnRPGVbU438ZBTFXYAQoMToUfE,440
2
- livellm/livellm.py,sha256=d-1PlWKtMkw44575KZlhRr-c7p0B34W78F4PS_rhwUA,34598
2
+ livellm/livellm.py,sha256=JaiJhXEX5Q0taE-gV5pv8IfexfLBQALYzqxzg20XyNQ,37680
3
3
  livellm/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- livellm/transcripton.py,sha256=WtL2PEFEVQ3RP2WOyjXqQFziQWKJvzJbG3T5cqfJ5qc,4187
5
- livellm/models/__init__.py,sha256=4w8yDf_79kNwBfct-jHjAPNPxz4nFzMSWqlIo_7MFwc,1531
4
+ livellm/transcripton.py,sha256=oxUxagfPNQYPjyizcWW4Rjp7Doqh9at9mkOuxx3cuvo,6167
5
+ livellm/models/__init__.py,sha256=s43CX7lix5FUVNgUBTMTCDRIHyJso3KAi4IVGSoJyH0,1629
6
6
  livellm/models/common.py,sha256=nx0w7Th9xeHeQg6Ssxi7jEEh3aEcGyzGAP0uk9l072c,1772
7
7
  livellm/models/fallback.py,sha256=zGG_MjdbaTx0fqKZTEg3ullej-CJznPfwaon0jEvRvI,1170
8
- livellm/models/transcription.py,sha256=LQG5u7hgXuw6WyCxS-us0VGh9eky-f16JPpeT3jHULc,1608
8
+ livellm/models/transcription.py,sha256=sACAywecLfuQdFmwy0OYP-in8pe-caCrHw2hKNTR5XI,1765
9
9
  livellm/models/ws.py,sha256=OCoJwAjQLOz6ErTiTtb-qD22N4wSsEGvi_4JQqCHIPQ,1111
10
- livellm/models/agent/__init__.py,sha256=CvP3xzDAiBZ4giQZUBQsqAJd1rWO4cvHhqr2Oh346A0,560
11
- livellm/models/agent/agent.py,sha256=ICS2AAAhPp5a69P_RNMzuM630GsHz6h_3MV_YVV1Boc,1540
10
+ livellm/models/agent/__init__.py,sha256=8ES1vZB5mgAOM6btLvX5LkEhtUB2CJvQ9jILiWBxDb0,652
11
+ livellm/models/agent/agent.py,sha256=84bpFFHNuT-sXIzE-50vl_SLmzyUXuW2K9ow1n8bz5o,1871
12
12
  livellm/models/agent/chat.py,sha256=VxdHTbJELMffxJJUSTdhT4behFbVq5XNyBLeg75wpsU,1632
13
+ livellm/models/agent/output_schema.py,sha256=jWCWqIfRMgNUvbPVt8NrYqiyuNSrbzKypHFC3rHu-Qc,5763
13
14
  livellm/models/agent/tools.py,sha256=wVWfx6_jxL3IcmX_Nt_PonZ3RQLtpfqJnszHz32BQiU,1403
14
15
  livellm/models/audio/__init__.py,sha256=sz2NxCOfFGVvp-XQUsdgOR_TYBO1Wb-8LLXaZDEiAZk,282
15
16
  livellm/models/audio/speak.py,sha256=lDITZ7fiLRuDhA-LxCPQ6Yraxr33B6Lg7VyR4CkuGk8,1872
16
17
  livellm/models/audio/transcribe.py,sha256=Leji2lk5zfq4GE-fw-z2dZR8BuijzW8TJ12GHw_UZJY,2085
17
- livellm-1.5.4.dist-info/METADATA,sha256=cJmTbjUs7x8zihRJkGtY0k_F5MW0ZgMCmztfup98Z0Q,20664
18
- livellm-1.5.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
19
- livellm-1.5.4.dist-info/licenses/LICENSE,sha256=yapGO2C_00ymEx6TADdbU8Oyc1bWOrZY-fjP-agmFL4,1071
20
- livellm-1.5.4.dist-info/RECORD,,
18
+ livellm-1.6.1.dist-info/METADATA,sha256=tP3VdyY7R6nxjZEG0QfZGfwtqMifFkq_B9YA45lAJe0,25527
19
+ livellm-1.6.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
20
+ livellm-1.6.1.dist-info/licenses/LICENSE,sha256=yapGO2C_00ymEx6TADdbU8Oyc1bWOrZY-fjP-agmFL4,1071
21
+ livellm-1.6.1.dist-info/RECORD,,