livellm 1.5.5__py3-none-any.whl → 1.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
livellm/livellm.py CHANGED
@@ -3,9 +3,10 @@ import asyncio
3
3
  import httpx
4
4
  import json
5
5
  import warnings
6
- from typing import List, Optional, AsyncIterator, Union, overload, Dict
6
+ from typing import List, Optional, AsyncIterator, Union, overload, Dict, Any, Type
7
7
  from .models.common import Settings, SuccessResponse
8
8
  from .models.agent.agent import AgentRequest, AgentResponse
9
+ from .models.agent.output_schema import OutputSchema
9
10
  from .models.audio.speak import SpeakRequest, EncodedSpeakResponse
10
11
  from .models.audio.transcribe import TranscribeRequest, TranscribeResponse, File
11
12
  from .models.fallback import AgentFallbackRequest, AudioFallbackRequest, TranscribeFallbackRequest
@@ -15,16 +16,30 @@ from .transcripton import TranscriptionWsClient
15
16
  from uuid import uuid4
16
17
  import logging
17
18
  from abc import ABC, abstractmethod
19
+ from importlib.metadata import version, PackageNotFoundError
20
+ from pydantic import BaseModel
18
21
 
19
22
 
20
23
  logger = logging.getLogger(__name__)
21
24
 
25
+ try:
26
+ __version__ = version("livellm")
27
+ except PackageNotFoundError:
28
+ __version__ = "unknown"
29
+
30
+ DEFAULT_USER_AGENT = f"livellm-python/{__version__}"
31
+
22
32
  class BaseLivellmClient(ABC):
23
33
 
34
+ # Default timeout (set by subclasses)
35
+ timeout: Optional[float] = None
36
+
24
37
  @overload
25
38
  async def agent_run(
26
39
  self,
27
40
  request: Union[AgentRequest, AgentFallbackRequest],
41
+ *,
42
+ timeout: Optional[float] = None,
28
43
  ) -> AgentResponse:
29
44
  ...
30
45
 
@@ -37,13 +52,19 @@ class BaseLivellmClient(ABC):
37
52
  messages: list,
38
53
  tools: Optional[list] = None,
39
54
  include_history: bool = False,
55
+ output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
56
+ timeout: Optional[float] = None,
40
57
  **kwargs
41
58
  ) -> AgentResponse:
42
59
  ...
43
60
 
44
61
 
45
62
  @abstractmethod
46
- async def handle_agent_run(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AgentResponse:
63
+ async def handle_agent_run(
64
+ self,
65
+ request: Union[AgentRequest, AgentFallbackRequest],
66
+ timeout: Optional[float] = None
67
+ ) -> AgentResponse:
47
68
  ...
48
69
 
49
70
  async def agent_run(
@@ -55,6 +76,8 @@ class BaseLivellmClient(ABC):
55
76
  messages: Optional[list] = None,
56
77
  tools: Optional[list] = None,
57
78
  include_history: bool = False,
79
+ output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
80
+ timeout: Optional[float] = None,
58
81
  **kwargs
59
82
  ) -> AgentResponse:
60
83
  """
@@ -72,7 +95,8 @@ class BaseLivellmClient(ABC):
72
95
  model="gpt-4",
73
96
  messages=[TextMessage(...)],
74
97
  tools=[],
75
- include_history=False
98
+ include_history=False,
99
+ output_schema=MyPydanticModel # or OutputSchema(...) or dict
76
100
  )
77
101
 
78
102
  Args:
@@ -83,9 +107,15 @@ class BaseLivellmClient(ABC):
83
107
  tools: Optional list of tools
84
108
  gen_config: Optional generation configuration
85
109
  include_history: Whether to include full conversation history in the response
110
+ output_schema: Optional schema for structured output. Can be:
111
+ - An OutputSchema instance
112
+ - A dict representing a JSON schema
113
+ - A Pydantic BaseModel class (will be converted to OutputSchema)
114
+ timeout: Optional timeout in seconds (overrides default client timeout)
86
115
 
87
116
  Returns:
88
- AgentResponse with the agent's output
117
+ AgentResponse with the agent's output. If output_schema was provided,
118
+ the output will be a JSON string matching the schema.
89
119
  """
90
120
  # Check if first argument is a request object
91
121
  if request is not None:
@@ -93,7 +123,7 @@ class BaseLivellmClient(ABC):
93
123
  raise TypeError(
94
124
  f"First positional argument must be AgentRequest or AgentFallbackRequest, got {type(request)}"
95
125
  )
96
- return await self.handle_agent_run(request)
126
+ return await self.handle_agent_run(request, timeout=timeout)
97
127
 
98
128
  # Otherwise, use keyword arguments
99
129
  if provider_uid is None or model is None or messages is None:
@@ -102,20 +132,45 @@ class BaseLivellmClient(ABC):
102
132
  "Alternatively, pass an AgentRequest object as the first positional argument."
103
133
  )
104
134
 
135
+ # Convert output_schema if it's a Pydantic BaseModel class
136
+ resolved_schema = self._resolve_output_schema(output_schema)
137
+
105
138
  agent_request = AgentRequest(
106
139
  provider_uid=provider_uid,
107
140
  model=model,
108
141
  messages=messages,
109
142
  tools=tools or [],
110
143
  gen_config=kwargs or None,
111
- include_history=include_history
144
+ include_history=include_history,
145
+ output_schema=resolved_schema
112
146
  )
113
- return await self.handle_agent_run(agent_request)
147
+ return await self.handle_agent_run(agent_request, timeout=timeout)
148
+
149
+ def _resolve_output_schema(
150
+ self,
151
+ output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]]
152
+ ) -> Optional[Union[OutputSchema, Dict[str, Any]]]:
153
+ """
154
+ Resolve the output_schema parameter to an OutputSchema or dict.
155
+
156
+ If a Pydantic BaseModel class is provided, convert it to OutputSchema.
157
+ """
158
+ if output_schema is None:
159
+ return None
160
+
161
+ # Check if it's a class (not an instance) that's a subclass of BaseModel
162
+ if isinstance(output_schema, type) and issubclass(output_schema, BaseModel):
163
+ return OutputSchema.from_pydantic(output_schema)
164
+
165
+ # Already an OutputSchema or dict, return as-is
166
+ return output_schema
114
167
 
115
168
  @overload
116
169
  def agent_run_stream(
117
170
  self,
118
171
  request: Union[AgentRequest, AgentFallbackRequest],
172
+ *,
173
+ timeout: Optional[float] = None,
119
174
  ) -> AsyncIterator[AgentResponse]:
120
175
  ...
121
176
 
@@ -128,13 +183,19 @@ class BaseLivellmClient(ABC):
128
183
  messages: list,
129
184
  tools: Optional[list] = None,
130
185
  include_history: bool = False,
186
+ output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
187
+ timeout: Optional[float] = None,
131
188
  **kwargs
132
189
  ) -> AsyncIterator[AgentResponse]:
133
190
  ...
134
191
 
135
192
 
136
193
  @abstractmethod
137
- async def handle_agent_run_stream(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AsyncIterator[AgentResponse]:
194
+ async def handle_agent_run_stream(
195
+ self,
196
+ request: Union[AgentRequest, AgentFallbackRequest],
197
+ timeout: Optional[float] = None
198
+ ) -> AsyncIterator[AgentResponse]:
138
199
  ...
139
200
 
140
201
  async def agent_run_stream(
@@ -146,6 +207,8 @@ class BaseLivellmClient(ABC):
146
207
  messages: Optional[list] = None,
147
208
  tools: Optional[list] = None,
148
209
  include_history: bool = False,
210
+ output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
211
+ timeout: Optional[float] = None,
149
212
  **kwargs
150
213
  ) -> AsyncIterator[AgentResponse]:
151
214
  """
@@ -165,7 +228,8 @@ class BaseLivellmClient(ABC):
165
228
  model="gpt-4",
166
229
  messages=[TextMessage(...)],
167
230
  tools=[],
168
- include_history=False
231
+ include_history=False,
232
+ output_schema=MyPydanticModel # or OutputSchema(...) or dict
169
233
  ):
170
234
  ...
171
235
 
@@ -177,9 +241,15 @@ class BaseLivellmClient(ABC):
177
241
  tools: Optional list of tools
178
242
  gen_config: Optional generation configuration
179
243
  include_history: Whether to include full conversation history in the response
244
+ output_schema: Optional schema for structured output. Can be:
245
+ - An OutputSchema instance
246
+ - A dict representing a JSON schema
247
+ - A Pydantic BaseModel class (will be converted to OutputSchema)
248
+ timeout: Optional timeout in seconds (overrides default client timeout)
180
249
 
181
250
  Returns:
182
- AsyncIterator of AgentResponse chunks
251
+ AsyncIterator of AgentResponse chunks. If output_schema was provided,
252
+ the output will be a JSON string matching the schema.
183
253
  """
184
254
  # Check if first argument is a request object
185
255
  if request is not None:
@@ -187,7 +257,7 @@ class BaseLivellmClient(ABC):
187
257
  raise TypeError(
188
258
  f"First positional argument must be AgentRequest or AgentFallbackRequest, got {type(request)}"
189
259
  )
190
- stream = self.handle_agent_run_stream(request)
260
+ stream = self.handle_agent_run_stream(request, timeout=timeout)
191
261
  else:
192
262
  # Otherwise, use keyword arguments
193
263
  if provider_uid is None or model is None or messages is None:
@@ -196,15 +266,19 @@ class BaseLivellmClient(ABC):
196
266
  "Alternatively, pass an AgentRequest object as the first positional argument."
197
267
  )
198
268
 
269
+ # Convert output_schema if it's a Pydantic BaseModel class
270
+ resolved_schema = self._resolve_output_schema(output_schema)
271
+
199
272
  agent_request = AgentRequest(
200
273
  provider_uid=provider_uid,
201
274
  model=model,
202
275
  messages=messages,
203
276
  tools=tools or [],
204
277
  gen_config=kwargs or None,
205
- include_history=include_history
278
+ include_history=include_history,
279
+ output_schema=resolved_schema
206
280
  )
207
- stream = self.handle_agent_run_stream(agent_request)
281
+ stream = self.handle_agent_run_stream(agent_request, timeout=timeout)
208
282
 
209
283
  async for chunk in stream:
210
284
  yield chunk
@@ -213,6 +287,8 @@ class BaseLivellmClient(ABC):
213
287
  async def speak(
214
288
  self,
215
289
  request: Union[SpeakRequest, AudioFallbackRequest],
290
+ *,
291
+ timeout: Optional[float] = None,
216
292
  ) -> bytes:
217
293
  ...
218
294
 
@@ -227,13 +303,18 @@ class BaseLivellmClient(ABC):
227
303
  mime_type: str,
228
304
  sample_rate: int,
229
305
  chunk_size: int = 20,
306
+ timeout: Optional[float] = None,
230
307
  **kwargs
231
308
  ) -> bytes:
232
309
  ...
233
310
 
234
311
 
235
312
  @abstractmethod
236
- async def handle_speak(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> bytes:
313
+ async def handle_speak(
314
+ self,
315
+ request: Union[SpeakRequest, AudioFallbackRequest],
316
+ timeout: Optional[float] = None
317
+ ) -> bytes:
237
318
  ...
238
319
 
239
320
  async def speak(
@@ -247,6 +328,7 @@ class BaseLivellmClient(ABC):
247
328
  mime_type: Optional[str] = None,
248
329
  sample_rate: Optional[int] = None,
249
330
  chunk_size: int = 20,
331
+ timeout: Optional[float] = None,
250
332
  **kwargs
251
333
  ) -> bytes:
252
334
  """
@@ -277,6 +359,7 @@ class BaseLivellmClient(ABC):
277
359
  mime_type: The MIME type of the output audio
278
360
  sample_rate: The sample rate of the output audio
279
361
  chunk_size: Chunk size in milliseconds (default: 20ms)
362
+ timeout: Optional timeout in seconds (overrides default client timeout)
280
363
  gen_config: Optional generation configuration
281
364
 
282
365
  Returns:
@@ -288,7 +371,7 @@ class BaseLivellmClient(ABC):
288
371
  raise TypeError(
289
372
  f"First positional argument must be SpeakRequest or AudioFallbackRequest, got {type(request)}"
290
373
  )
291
- return await self.handle_speak(request)
374
+ return await self.handle_speak(request, timeout=timeout)
292
375
 
293
376
  # Otherwise, use keyword arguments
294
377
  if provider_uid is None or model is None or text is None or voice is None or mime_type is None or sample_rate is None:
@@ -307,12 +390,14 @@ class BaseLivellmClient(ABC):
307
390
  chunk_size=chunk_size,
308
391
  gen_config=kwargs or None
309
392
  )
310
- return await self.handle_speak(speak_request)
393
+ return await self.handle_speak(speak_request, timeout=timeout)
311
394
 
312
395
  @overload
313
396
  def speak_stream(
314
397
  self,
315
398
  request: Union[SpeakRequest, AudioFallbackRequest],
399
+ *,
400
+ timeout: Optional[float] = None,
316
401
  ) -> AsyncIterator[bytes]:
317
402
  ...
318
403
 
@@ -327,13 +412,18 @@ class BaseLivellmClient(ABC):
327
412
  mime_type: str,
328
413
  sample_rate: int,
329
414
  chunk_size: int = 20,
415
+ timeout: Optional[float] = None,
330
416
  **kwargs
331
417
  ) -> AsyncIterator[bytes]:
332
418
  ...
333
419
 
334
420
 
335
421
  @abstractmethod
336
- async def handle_speak_stream(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> AsyncIterator[bytes]:
422
+ async def handle_speak_stream(
423
+ self,
424
+ request: Union[SpeakRequest, AudioFallbackRequest],
425
+ timeout: Optional[float] = None
426
+ ) -> AsyncIterator[bytes]:
337
427
  ...
338
428
 
339
429
  async def speak_stream(
@@ -347,6 +437,7 @@ class BaseLivellmClient(ABC):
347
437
  mime_type: Optional[str] = None,
348
438
  sample_rate: Optional[int] = None,
349
439
  chunk_size: int = 20,
440
+ timeout: Optional[float] = None,
350
441
  **kwargs
351
442
  ) -> AsyncIterator[bytes]:
352
443
  """
@@ -380,6 +471,7 @@ class BaseLivellmClient(ABC):
380
471
  mime_type: The MIME type of the output audio
381
472
  sample_rate: The sample rate of the output audio
382
473
  chunk_size: Chunk size in milliseconds (default: 20ms)
474
+ timeout: Optional timeout in seconds (overrides default client timeout)
383
475
  gen_config: Optional generation configuration
384
476
 
385
477
  Returns:
@@ -391,7 +483,7 @@ class BaseLivellmClient(ABC):
391
483
  raise TypeError(
392
484
  f"First positional argument must be SpeakRequest or AudioFallbackRequest, got {type(request)}"
393
485
  )
394
- speak_stream = self.handle_speak_stream(request)
486
+ speak_stream = self.handle_speak_stream(request, timeout=timeout)
395
487
  else:
396
488
  # Otherwise, use keyword arguments
397
489
  if provider_uid is None or model is None or text is None or voice is None or mime_type is None or sample_rate is None:
@@ -410,7 +502,7 @@ class BaseLivellmClient(ABC):
410
502
  chunk_size=chunk_size,
411
503
  gen_config=kwargs or None
412
504
  )
413
- speak_stream = self.handle_speak_stream(speak_request)
505
+ speak_stream = self.handle_speak_stream(speak_request, timeout=timeout)
414
506
  async for chunk in speak_stream:
415
507
  yield chunk
416
508
 
@@ -418,6 +510,8 @@ class BaseLivellmClient(ABC):
418
510
  async def transcribe(
419
511
  self,
420
512
  request: Union[TranscribeRequest, TranscribeFallbackRequest],
513
+ *,
514
+ timeout: Optional[float] = None,
421
515
  ) -> TranscribeResponse:
422
516
  ...
423
517
 
@@ -429,13 +523,18 @@ class BaseLivellmClient(ABC):
429
523
  file: File,
430
524
  model: str,
431
525
  language: Optional[str] = None,
526
+ timeout: Optional[float] = None,
432
527
  **kwargs
433
528
  ) -> TranscribeResponse:
434
529
  ...
435
530
 
436
531
 
437
532
  @abstractmethod
438
- async def handle_transcribe(self, request: Union[TranscribeRequest, TranscribeFallbackRequest]) -> TranscribeResponse:
533
+ async def handle_transcribe(
534
+ self,
535
+ request: Union[TranscribeRequest, TranscribeFallbackRequest],
536
+ timeout: Optional[float] = None
537
+ ) -> TranscribeResponse:
439
538
  ...
440
539
 
441
540
  async def transcribe(
@@ -446,6 +545,7 @@ class BaseLivellmClient(ABC):
446
545
  file: Optional[File] = None,
447
546
  model: Optional[str] = None,
448
547
  language: Optional[str] = None,
548
+ timeout: Optional[float] = None,
449
549
  **kwargs
450
550
  ) -> TranscribeResponse:
451
551
  """
@@ -469,6 +569,7 @@ class BaseLivellmClient(ABC):
469
569
  file: The audio file as a tuple (filename, content, content_type)
470
570
  model: The model to use for transcription
471
571
  language: Optional language code
572
+ timeout: Optional timeout in seconds (overrides default client timeout)
472
573
  gen_config: Optional generation configuration
473
574
 
474
575
  Returns:
@@ -481,7 +582,7 @@ class BaseLivellmClient(ABC):
481
582
  f"First positional argument must be TranscribeRequest or TranscribeFallbackRequest, got {type(request)}"
482
583
  )
483
584
  # JSON-based request
484
- return await self.handle_transcribe(request)
585
+ return await self.handle_transcribe(request, timeout=timeout)
485
586
 
486
587
  # Otherwise, use keyword arguments with multipart form-data request
487
588
  if provider_uid is None or file is None or model is None:
@@ -497,7 +598,7 @@ class BaseLivellmClient(ABC):
497
598
  language=language,
498
599
  gen_config=kwargs or None
499
600
  )
500
- return await self.handle_transcribe(transcribe_request)
601
+ return await self.handle_transcribe(transcribe_request, timeout=timeout)
501
602
 
502
603
 
503
604
  class LivellmWsClient(BaseLivellmClient):
@@ -505,7 +606,8 @@ class LivellmWsClient(BaseLivellmClient):
505
606
 
506
607
  def __init__(
507
608
  self,
508
- base_url: str,
609
+ base_url: str,
610
+ user_agent: Optional[str] = None,
509
611
  timeout: Optional[float] = None,
510
612
  max_size: Optional[int] = None,
511
613
  max_buffer_size: Optional[int] = None
@@ -523,6 +625,7 @@ class LivellmWsClient(BaseLivellmClient):
523
625
  self._ws_root_base_url = ws_url
524
626
  self.base_url = f"{ws_url}/livellm/ws"
525
627
  self.timeout = timeout
628
+ self.user_agent = user_agent or DEFAULT_USER_AGENT
526
629
  self.websocket = None
527
630
  self.sessions: Dict[str, asyncio.Queue] = {}
528
631
  self.max_buffer_size = max_buffer_size or 0 # None means unlimited buffer size
@@ -541,7 +644,8 @@ class LivellmWsClient(BaseLivellmClient):
541
644
  self.base_url,
542
645
  open_timeout=self.timeout,
543
646
  close_timeout=self.timeout,
544
- max_size=self.max_size
647
+ max_size=self.max_size,
648
+ additional_headers={"User-Agent": self.user_agent}
545
649
  )
546
650
  self.__listen_for_responses_task = asyncio.create_task(self.listen_for_responses())
547
651
 
@@ -572,7 +676,11 @@ class LivellmWsClient(BaseLivellmClient):
572
676
  self.__listen_for_responses_task = None
573
677
  self.sessions.clear()
574
678
 
575
- async def get_response(self, action: WsAction, payload: dict) -> dict:
679
+ def _get_effective_timeout(self, timeout: Optional[float]) -> Optional[float]:
680
+ """Get effective timeout: per-request timeout overrides default."""
681
+ return timeout if timeout is not None else self.timeout
682
+
683
+ async def get_response(self, action: WsAction, payload: dict, timeout: Optional[float] = None) -> dict:
576
684
  """Send a request and wait for response."""
577
685
  if self.websocket is None:
578
686
  await self.connect()
@@ -582,7 +690,17 @@ class LivellmWsClient(BaseLivellmClient):
582
690
  q = await self.get_or_update_session(session_id)
583
691
  await self.websocket.send(json.dumps(request.model_dump()))
584
692
 
585
- response: WsResponse = await q.get()
693
+ effective_timeout = self._get_effective_timeout(timeout)
694
+
695
+ try:
696
+ if effective_timeout:
697
+ response: WsResponse = await asyncio.wait_for(q.get(), timeout=effective_timeout)
698
+ else:
699
+ response: WsResponse = await q.get()
700
+ except asyncio.TimeoutError:
701
+ self.sessions.pop(session_id, None)
702
+ raise TimeoutError(f"Request timed out after {effective_timeout} seconds")
703
+
586
704
  self.sessions.pop(session_id)
587
705
  if response.status == WsStatus.ERROR:
588
706
  raise Exception(f"WebSocket failed: {response.error}")
@@ -591,7 +709,7 @@ class LivellmWsClient(BaseLivellmClient):
591
709
  else:
592
710
  raise Exception(f"WebSocket failed with unknown status: {response}")
593
711
 
594
- async def get_response_stream(self, action: WsAction, payload: dict) -> AsyncIterator[dict]:
712
+ async def get_response_stream(self, action: WsAction, payload: dict, timeout: Optional[float] = None) -> AsyncIterator[dict]:
595
713
  """Send a request and stream responses."""
596
714
  if self.websocket is None:
597
715
  await self.connect()
@@ -601,8 +719,17 @@ class LivellmWsClient(BaseLivellmClient):
601
719
  q = await self.get_or_update_session(session_id)
602
720
  await self.websocket.send(json.dumps(request.model_dump()))
603
721
 
722
+ effective_timeout = self._get_effective_timeout(timeout)
723
+
604
724
  while True:
605
- response: WsResponse = await q.get()
725
+ try:
726
+ if effective_timeout:
727
+ response: WsResponse = await asyncio.wait_for(q.get(), timeout=effective_timeout)
728
+ else:
729
+ response: WsResponse = await q.get()
730
+ except asyncio.TimeoutError:
731
+ self.sessions.pop(session_id, None)
732
+ raise TimeoutError(f"Request timed out after {effective_timeout} seconds")
606
733
 
607
734
  if response.status == WsStatus.STREAMING:
608
735
  yield response.data
@@ -618,37 +745,60 @@ class LivellmWsClient(BaseLivellmClient):
618
745
 
619
746
  # Implement abstract methods from BaseLivellmClient
620
747
 
621
- async def handle_agent_run(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AgentResponse:
748
+ async def handle_agent_run(
749
+ self,
750
+ request: Union[AgentRequest, AgentFallbackRequest],
751
+ timeout: Optional[float] = None
752
+ ) -> AgentResponse:
622
753
  """Handle agent run via WebSocket."""
623
754
  response = await self.get_response(
624
755
  WsAction.AGENT_RUN,
625
- request.model_dump()
756
+ request.model_dump(),
757
+ timeout=timeout
626
758
  )
627
759
  return AgentResponse(**response)
628
760
 
629
- async def handle_agent_run_stream(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AsyncIterator[AgentResponse]:
761
+ async def handle_agent_run_stream(
762
+ self,
763
+ request: Union[AgentRequest, AgentFallbackRequest],
764
+ timeout: Optional[float] = None
765
+ ) -> AsyncIterator[AgentResponse]:
630
766
  """Handle streaming agent run via WebSocket."""
631
- async for response in self.get_response_stream(WsAction.AGENT_RUN_STREAM, request.model_dump()):
767
+ async for response in self.get_response_stream(WsAction.AGENT_RUN_STREAM, request.model_dump(), timeout=timeout):
632
768
  yield AgentResponse(**response)
633
769
 
634
- async def handle_speak(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> bytes:
770
+ async def handle_speak(
771
+ self,
772
+ request: Union[SpeakRequest, AudioFallbackRequest],
773
+ timeout: Optional[float] = None
774
+ ) -> bytes:
635
775
  """Handle speak request via WebSocket."""
636
776
  response = await self.get_response(
637
777
  WsAction.AUDIO_SPEAK,
638
- request.model_dump()
778
+ request.model_dump(),
779
+ timeout=timeout
639
780
  )
640
781
  return EncodedSpeakResponse(**response).audio
641
782
 
642
- async def handle_speak_stream(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> AsyncIterator[bytes]:
783
+ async def handle_speak_stream(
784
+ self,
785
+ request: Union[SpeakRequest, AudioFallbackRequest],
786
+ timeout: Optional[float] = None
787
+ ) -> AsyncIterator[bytes]:
643
788
  """Handle streaming speak request via WebSocket."""
644
- async for response in self.get_response_stream(WsAction.AUDIO_SPEAK_STREAM, request.model_dump()):
789
+ async for response in self.get_response_stream(WsAction.AUDIO_SPEAK_STREAM, request.model_dump(), timeout=timeout):
645
790
  yield EncodedSpeakResponse(**response).audio
646
791
 
647
- async def handle_transcribe(self, request: Union[TranscribeRequest, TranscribeFallbackRequest]) -> TranscribeResponse:
792
+ async def handle_transcribe(
793
+ self,
794
+ request: Union[TranscribeRequest, TranscribeFallbackRequest],
795
+ timeout: Optional[float] = None
796
+ ) -> TranscribeResponse:
648
797
  """Handle transcribe request via WebSocket."""
649
798
  response = await self.get_response(
650
799
  WsAction.AUDIO_TRANSCRIBE,
651
- request.model_dump()
800
+ request.model_dump(),
801
+ timeout=timeout
652
802
  )
653
803
  return TranscribeResponse(**response)
654
804
 
@@ -680,7 +830,8 @@ class LivellmClient(BaseLivellmClient):
680
830
 
681
831
  def __init__(
682
832
  self,
683
- base_url: str,
833
+ base_url: str,
834
+ user_agent: Optional[str] = None,
684
835
  timeout: Optional[float] = None,
685
836
  configs: Optional[List[Settings]] = None
686
837
  ):
@@ -689,17 +840,23 @@ class LivellmClient(BaseLivellmClient):
689
840
  # HTTP API base URL for this client
690
841
  self.base_url = f"{self._root_base_url}/livellm"
691
842
  self.timeout = timeout
692
- self.client = httpx.AsyncClient(base_url=self.base_url, timeout=self.timeout) \
693
- if self.timeout else httpx.AsyncClient(base_url=self.base_url)
843
+ self.user_agent = user_agent or DEFAULT_USER_AGENT
844
+ # Create client without timeout - we'll pass timeout per-request
845
+ self.client = httpx.AsyncClient(base_url=self.base_url)
694
846
  self.settings = []
695
847
  self.headers = {
696
848
  "Content-Type": "application/json",
849
+ "User-Agent": self.user_agent,
697
850
  }
698
851
  # Lazily-created realtime (WebSocket) client
699
852
  self._realtime = None
700
853
  if configs:
701
854
  self.update_configs_post_init(configs)
702
855
 
856
+ def _get_effective_timeout(self, timeout: Optional[float]) -> Optional[float]:
857
+ """Get effective timeout: per-request timeout overrides default."""
858
+ return timeout if timeout is not None else self.timeout
859
+
703
860
  @property
704
861
  def realtime(self) -> LivellmWsClient:
705
862
  """
@@ -713,7 +870,7 @@ class LivellmClient(BaseLivellmClient):
713
870
  """
714
871
  if self._realtime is None:
715
872
  # Pass the same root base URL; LivellmWsClient will handle ws/wss conversion.
716
- self._realtime = LivellmWsClient(self._root_base_url, timeout=self.timeout)
873
+ self._realtime = LivellmWsClient(self._root_base_url, user_agent=self.user_agent, timeout=self.timeout)
717
874
  return self._realtime
718
875
 
719
876
  def update_configs_post_init(self, configs: List[Settings]) -> SuccessResponse:
@@ -730,15 +887,17 @@ class LivellmClient(BaseLivellmClient):
730
887
  return SuccessResponse(success=True, message="Configs updated successfully")
731
888
 
732
889
 
733
- async def delete(self, endpoint: str) -> dict:
890
+ async def delete(self, endpoint: str, timeout: Optional[float] = None) -> dict:
734
891
  """
735
892
  Delete a resource from the given endpoint and return the response.
736
893
  Args:
737
894
  endpoint: The endpoint to delete from.
895
+ timeout: Optional timeout override.
738
896
  Returns:
739
897
  The response from the endpoint.
740
898
  """
741
- response = await self.client.delete(endpoint, headers=self.headers)
899
+ effective_timeout = self._get_effective_timeout(timeout)
900
+ response = await self.client.delete(endpoint, headers=self.headers, timeout=effective_timeout)
742
901
  response.raise_for_status()
743
902
  return response.json()
744
903
 
@@ -746,7 +905,8 @@ class LivellmClient(BaseLivellmClient):
746
905
  self,
747
906
  files: dict,
748
907
  data: dict,
749
- endpoint: str
908
+ endpoint: str,
909
+ timeout: Optional[float] = None
750
910
  ) -> dict:
751
911
  """
752
912
  Post a multipart request to the given endpoint and return the response.
@@ -754,27 +914,32 @@ class LivellmClient(BaseLivellmClient):
754
914
  files: The files to send in the request.
755
915
  data: The data to send in the request.
756
916
  endpoint: The endpoint to post to.
917
+ timeout: Optional timeout override.
757
918
  Returns:
758
919
  The response from the endpoint.
759
920
  """
921
+ effective_timeout = self._get_effective_timeout(timeout)
760
922
  # Don't pass Content-Type header for multipart - httpx will set it automatically
761
- response = await self.client.post(endpoint, files=files, data=data)
923
+ response = await self.client.post(endpoint, files=files, data=data, timeout=effective_timeout)
762
924
  response.raise_for_status()
763
925
  return response.json()
764
926
 
765
927
 
766
928
  async def get(
767
929
  self,
768
- endpoint: str
930
+ endpoint: str,
931
+ timeout: Optional[float] = None
769
932
  ) -> dict:
770
933
  """
771
934
  Get a request from the given endpoint and return the response.
772
935
  Args:
773
936
  endpoint: The endpoint to get from.
937
+ timeout: Optional timeout override.
774
938
  Returns:
775
939
  The response from the endpoint.
776
940
  """
777
- response = await self.client.get(endpoint, headers=self.headers)
941
+ effective_timeout = self._get_effective_timeout(timeout)
942
+ response = await self.client.get(endpoint, headers=self.headers, timeout=effective_timeout)
778
943
  response.raise_for_status()
779
944
  return response.json()
780
945
 
@@ -783,7 +948,8 @@ class LivellmClient(BaseLivellmClient):
783
948
  json_data: dict,
784
949
  endpoint: str,
785
950
  expect_stream: bool = False,
786
- expect_json: bool = True
951
+ expect_json: bool = True,
952
+ timeout: Optional[float] = None
787
953
  ) -> Union[dict, bytes, AsyncIterator[Union[dict, bytes]]]:
788
954
  """
789
955
  Post a request to the given endpoint and return the response.
@@ -795,12 +961,14 @@ class LivellmClient(BaseLivellmClient):
795
961
  endpoint: The endpoint to post to.
796
962
  expect_stream: Whether to expect a stream response.
797
963
  expect_json: Whether to expect a JSON response.
964
+ timeout: Optional timeout override.
798
965
  Returns:
799
966
  The response from the endpoint.
800
967
  Raises:
801
968
  Exception: If the response is not 200 or 201.
802
969
  """
803
- response = await self.client.post(endpoint, json=json_data, headers=self.headers)
970
+ effective_timeout = self._get_effective_timeout(timeout)
971
+ response = await self.client.post(endpoint, json=json_data, headers=self.headers, timeout=effective_timeout)
804
972
  if response.status_code not in [200, 201]:
805
973
  error_response = await response.aread()
806
974
  error_response = error_response.decode("utf-8")
@@ -823,26 +991,26 @@ class LivellmClient(BaseLivellmClient):
823
991
  else:
824
992
  return response.content
825
993
 
826
- async def ping(self) -> SuccessResponse:
827
- result = await self.get("ping")
994
+ async def ping(self, timeout: Optional[float] = None) -> SuccessResponse:
995
+ result = await self.get("ping", timeout=timeout)
828
996
  return SuccessResponse(**result)
829
997
 
830
- async def update_config(self, config: Settings) -> SuccessResponse:
831
- result = await self.post(config.model_dump(), "providers/config", expect_json=True)
998
+ async def update_config(self, config: Settings, timeout: Optional[float] = None) -> SuccessResponse:
999
+ result = await self.post(config.model_dump(), "providers/config", expect_json=True, timeout=timeout)
832
1000
  self.settings.append(config)
833
1001
  return SuccessResponse(**result)
834
1002
 
835
- async def update_configs(self, configs: List[Settings]) -> SuccessResponse:
1003
+ async def update_configs(self, configs: List[Settings], timeout: Optional[float] = None) -> SuccessResponse:
836
1004
  for config in configs:
837
- await self.update_config(config)
1005
+ await self.update_config(config, timeout=timeout)
838
1006
  return SuccessResponse(success=True, message="Configs updated successfully")
839
1007
 
840
- async def get_configs(self) -> List[Settings]:
841
- result = await self.get("providers/configs")
1008
+ async def get_configs(self, timeout: Optional[float] = None) -> List[Settings]:
1009
+ result = await self.get("providers/configs", timeout=timeout)
842
1010
  return [Settings(**config) for config in result]
843
1011
 
844
- async def delete_config(self, config_uid: str) -> SuccessResponse:
845
- result = await self.delete(f"providers/config/{config_uid}")
1012
+ async def delete_config(self, config_uid: str, timeout: Optional[float] = None) -> SuccessResponse:
1013
+ result = await self.delete(f"providers/config/{config_uid}", timeout=timeout)
846
1014
  return SuccessResponse(**result)
847
1015
 
848
1016
  async def cleanup(self):
@@ -857,59 +1025,51 @@ class LivellmClient(BaseLivellmClient):
857
1025
  # Also close any realtime WebSocket client if it was created
858
1026
  if self._realtime is not None:
859
1027
  await self._realtime.disconnect()
860
-
861
- def __del__(self):
862
- """
863
- Destructor to clean up resources when the client is garbage collected.
864
- This will close the HTTP client and attempt to delete configs if cleanup wasn't called.
865
- Note: It's recommended to use the async context manager or call cleanup() explicitly.
866
- """
867
- # Warn user if cleanup wasn't called
868
- if self.settings:
869
- warnings.warn(
870
- "LivellmClient is being garbage collected without explicit cleanup. "
871
- "Provider configs may not be deleted from the server. "
872
- "Consider using 'async with' or calling 'await client.cleanup()' explicitly.",
873
- ResourceWarning,
874
- stacklevel=2
875
- )
876
-
877
- # Close the httpx client synchronously
878
- # httpx.AsyncClient stores a sync Transport that needs cleanup
879
- try:
880
- with httpx.Client(base_url=self.base_url) as client:
881
- for config in self.settings:
882
- config: Settings = config
883
- client.delete(f"providers/config/{config.uid}", headers=self.headers)
884
- except Exception:
885
- # Silently fail - we're in a destructor
886
- pass
887
1028
 
888
1029
  # Implement abstract methods from BaseLivellmClient
889
1030
 
890
- async def handle_agent_run(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AgentResponse:
1031
+ async def handle_agent_run(
1032
+ self,
1033
+ request: Union[AgentRequest, AgentFallbackRequest],
1034
+ timeout: Optional[float] = None
1035
+ ) -> AgentResponse:
891
1036
  """Handle agent run via HTTP."""
892
- result = await self.post(request.model_dump(), "agent/run", expect_json=True)
1037
+ result = await self.post(request.model_dump(), "agent/run", expect_json=True, timeout=timeout)
893
1038
  return AgentResponse(**result)
894
1039
 
895
- async def handle_agent_run_stream(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AsyncIterator[AgentResponse]:
1040
+ async def handle_agent_run_stream(
1041
+ self,
1042
+ request: Union[AgentRequest, AgentFallbackRequest],
1043
+ timeout: Optional[float] = None
1044
+ ) -> AsyncIterator[AgentResponse]:
896
1045
  """Handle streaming agent run via HTTP."""
897
- stream = await self.post(request.model_dump(), "agent/run_stream", expect_stream=True, expect_json=True)
1046
+ stream = await self.post(request.model_dump(), "agent/run_stream", expect_stream=True, expect_json=True, timeout=timeout)
898
1047
  async for chunk in stream:
899
1048
  yield AgentResponse(**chunk)
900
1049
 
901
- async def handle_speak(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> bytes:
1050
+ async def handle_speak(
1051
+ self,
1052
+ request: Union[SpeakRequest, AudioFallbackRequest],
1053
+ timeout: Optional[float] = None
1054
+ ) -> bytes:
902
1055
  """Handle speak request via HTTP."""
903
- return await self.post(request.model_dump(), "audio/speak", expect_json=False)
1056
+ return await self.post(request.model_dump(), "audio/speak", expect_json=False, timeout=timeout)
904
1057
 
905
- async def handle_speak_stream(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> AsyncIterator[bytes]:
1058
+ async def handle_speak_stream(
1059
+ self,
1060
+ request: Union[SpeakRequest, AudioFallbackRequest],
1061
+ timeout: Optional[float] = None
1062
+ ) -> AsyncIterator[bytes]:
906
1063
  """Handle streaming speak request via HTTP."""
907
- speak_stream = await self.post(request.model_dump(), "audio/speak_stream", expect_stream=True, expect_json=False)
1064
+ speak_stream = await self.post(request.model_dump(), "audio/speak_stream", expect_stream=True, expect_json=False, timeout=timeout)
908
1065
  async for chunk in speak_stream:
909
1066
  yield chunk
910
1067
 
911
- async def handle_transcribe(self, request: Union[TranscribeRequest, TranscribeFallbackRequest]) -> TranscribeResponse:
1068
+ async def handle_transcribe(
1069
+ self,
1070
+ request: Union[TranscribeRequest, TranscribeFallbackRequest],
1071
+ timeout: Optional[float] = None
1072
+ ) -> TranscribeResponse:
912
1073
  """Handle transcribe request via HTTP."""
913
- result = await self.post(request.model_dump(), "audio/transcribe_json", expect_json=True)
1074
+ result = await self.post(request.model_dump(), "audio/transcribe_json", expect_json=True, timeout=timeout)
914
1075
  return TranscribeResponse(**result)
915
-