livellm 1.6.1__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livellm/livellm.py +207 -90
- livellm/models/__init__.py +2 -1
- livellm/models/agent/__init__.py +2 -1
- livellm/models/agent/agent.py +9 -0
- {livellm-1.6.1.dist-info → livellm-1.7.2.dist-info}/METADATA +131 -13
- {livellm-1.6.1.dist-info → livellm-1.7.2.dist-info}/RECORD +8 -8
- {livellm-1.6.1.dist-info → livellm-1.7.2.dist-info}/WHEEL +0 -0
- {livellm-1.6.1.dist-info → livellm-1.7.2.dist-info}/licenses/LICENSE +0 -0
livellm/livellm.py
CHANGED
|
@@ -5,7 +5,7 @@ import json
|
|
|
5
5
|
import warnings
|
|
6
6
|
from typing import List, Optional, AsyncIterator, Union, overload, Dict, Any, Type
|
|
7
7
|
from .models.common import Settings, SuccessResponse
|
|
8
|
-
from .models.agent.agent import AgentRequest, AgentResponse
|
|
8
|
+
from .models.agent.agent import AgentRequest, AgentResponse, ContextOverflowStrategy
|
|
9
9
|
from .models.agent.output_schema import OutputSchema
|
|
10
10
|
from .models.audio.speak import SpeakRequest, EncodedSpeakResponse
|
|
11
11
|
from .models.audio.transcribe import TranscribeRequest, TranscribeResponse, File
|
|
@@ -31,10 +31,15 @@ DEFAULT_USER_AGENT = f"livellm-python/{__version__}"
|
|
|
31
31
|
|
|
32
32
|
class BaseLivellmClient(ABC):
|
|
33
33
|
|
|
34
|
+
# Default timeout (set by subclasses)
|
|
35
|
+
timeout: Optional[float] = None
|
|
36
|
+
|
|
34
37
|
@overload
|
|
35
38
|
async def agent_run(
|
|
36
39
|
self,
|
|
37
40
|
request: Union[AgentRequest, AgentFallbackRequest],
|
|
41
|
+
*,
|
|
42
|
+
timeout: Optional[float] = None,
|
|
38
43
|
) -> AgentResponse:
|
|
39
44
|
...
|
|
40
45
|
|
|
@@ -48,13 +53,20 @@ class BaseLivellmClient(ABC):
|
|
|
48
53
|
tools: Optional[list] = None,
|
|
49
54
|
include_history: bool = False,
|
|
50
55
|
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
|
|
56
|
+
context_limit: int = 0,
|
|
57
|
+
context_overflow_strategy: ContextOverflowStrategy = ContextOverflowStrategy.TRUNCATE,
|
|
58
|
+
timeout: Optional[float] = None,
|
|
51
59
|
**kwargs
|
|
52
60
|
) -> AgentResponse:
|
|
53
61
|
...
|
|
54
62
|
|
|
55
63
|
|
|
56
64
|
@abstractmethod
|
|
57
|
-
async def handle_agent_run(
|
|
65
|
+
async def handle_agent_run(
|
|
66
|
+
self,
|
|
67
|
+
request: Union[AgentRequest, AgentFallbackRequest],
|
|
68
|
+
timeout: Optional[float] = None
|
|
69
|
+
) -> AgentResponse:
|
|
58
70
|
...
|
|
59
71
|
|
|
60
72
|
async def agent_run(
|
|
@@ -67,6 +79,9 @@ class BaseLivellmClient(ABC):
|
|
|
67
79
|
tools: Optional[list] = None,
|
|
68
80
|
include_history: bool = False,
|
|
69
81
|
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
|
|
82
|
+
context_limit: int = 0,
|
|
83
|
+
context_overflow_strategy: ContextOverflowStrategy = ContextOverflowStrategy.TRUNCATE,
|
|
84
|
+
timeout: Optional[float] = None,
|
|
70
85
|
**kwargs
|
|
71
86
|
) -> AgentResponse:
|
|
72
87
|
"""
|
|
@@ -100,6 +115,9 @@ class BaseLivellmClient(ABC):
|
|
|
100
115
|
- An OutputSchema instance
|
|
101
116
|
- A dict representing a JSON schema
|
|
102
117
|
- A Pydantic BaseModel class (will be converted to OutputSchema)
|
|
118
|
+
context_limit: Maximum context size in tokens. If <= 0, context overflow handling is disabled.
|
|
119
|
+
context_overflow_strategy: Strategy for handling context overflow: 'truncate' or 'recycle'
|
|
120
|
+
timeout: Optional timeout in seconds (overrides default client timeout)
|
|
103
121
|
|
|
104
122
|
Returns:
|
|
105
123
|
AgentResponse with the agent's output. If output_schema was provided,
|
|
@@ -111,7 +129,7 @@ class BaseLivellmClient(ABC):
|
|
|
111
129
|
raise TypeError(
|
|
112
130
|
f"First positional argument must be AgentRequest or AgentFallbackRequest, got {type(request)}"
|
|
113
131
|
)
|
|
114
|
-
return await self.handle_agent_run(request)
|
|
132
|
+
return await self.handle_agent_run(request, timeout=timeout)
|
|
115
133
|
|
|
116
134
|
# Otherwise, use keyword arguments
|
|
117
135
|
if provider_uid is None or model is None or messages is None:
|
|
@@ -130,9 +148,11 @@ class BaseLivellmClient(ABC):
|
|
|
130
148
|
tools=tools or [],
|
|
131
149
|
gen_config=kwargs or None,
|
|
132
150
|
include_history=include_history,
|
|
133
|
-
output_schema=resolved_schema
|
|
151
|
+
output_schema=resolved_schema,
|
|
152
|
+
context_limit=context_limit,
|
|
153
|
+
context_overflow_strategy=context_overflow_strategy
|
|
134
154
|
)
|
|
135
|
-
return await self.handle_agent_run(agent_request)
|
|
155
|
+
return await self.handle_agent_run(agent_request, timeout=timeout)
|
|
136
156
|
|
|
137
157
|
def _resolve_output_schema(
|
|
138
158
|
self,
|
|
@@ -157,6 +177,8 @@ class BaseLivellmClient(ABC):
|
|
|
157
177
|
def agent_run_stream(
|
|
158
178
|
self,
|
|
159
179
|
request: Union[AgentRequest, AgentFallbackRequest],
|
|
180
|
+
*,
|
|
181
|
+
timeout: Optional[float] = None,
|
|
160
182
|
) -> AsyncIterator[AgentResponse]:
|
|
161
183
|
...
|
|
162
184
|
|
|
@@ -170,13 +192,20 @@ class BaseLivellmClient(ABC):
|
|
|
170
192
|
tools: Optional[list] = None,
|
|
171
193
|
include_history: bool = False,
|
|
172
194
|
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
|
|
195
|
+
context_limit: int = 0,
|
|
196
|
+
context_overflow_strategy: ContextOverflowStrategy = ContextOverflowStrategy.TRUNCATE,
|
|
197
|
+
timeout: Optional[float] = None,
|
|
173
198
|
**kwargs
|
|
174
199
|
) -> AsyncIterator[AgentResponse]:
|
|
175
200
|
...
|
|
176
201
|
|
|
177
202
|
|
|
178
203
|
@abstractmethod
|
|
179
|
-
async def handle_agent_run_stream(
|
|
204
|
+
async def handle_agent_run_stream(
|
|
205
|
+
self,
|
|
206
|
+
request: Union[AgentRequest, AgentFallbackRequest],
|
|
207
|
+
timeout: Optional[float] = None
|
|
208
|
+
) -> AsyncIterator[AgentResponse]:
|
|
180
209
|
...
|
|
181
210
|
|
|
182
211
|
async def agent_run_stream(
|
|
@@ -189,6 +218,9 @@ class BaseLivellmClient(ABC):
|
|
|
189
218
|
tools: Optional[list] = None,
|
|
190
219
|
include_history: bool = False,
|
|
191
220
|
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
|
|
221
|
+
context_limit: int = 0,
|
|
222
|
+
context_overflow_strategy: ContextOverflowStrategy = ContextOverflowStrategy.TRUNCATE,
|
|
223
|
+
timeout: Optional[float] = None,
|
|
192
224
|
**kwargs
|
|
193
225
|
) -> AsyncIterator[AgentResponse]:
|
|
194
226
|
"""
|
|
@@ -225,6 +257,9 @@ class BaseLivellmClient(ABC):
|
|
|
225
257
|
- An OutputSchema instance
|
|
226
258
|
- A dict representing a JSON schema
|
|
227
259
|
- A Pydantic BaseModel class (will be converted to OutputSchema)
|
|
260
|
+
context_limit: Maximum context size in tokens. If <= 0, context overflow handling is disabled.
|
|
261
|
+
context_overflow_strategy: Strategy for handling context overflow: 'truncate' or 'recycle'
|
|
262
|
+
timeout: Optional timeout in seconds (overrides default client timeout)
|
|
228
263
|
|
|
229
264
|
Returns:
|
|
230
265
|
AsyncIterator of AgentResponse chunks. If output_schema was provided,
|
|
@@ -236,7 +271,7 @@ class BaseLivellmClient(ABC):
|
|
|
236
271
|
raise TypeError(
|
|
237
272
|
f"First positional argument must be AgentRequest or AgentFallbackRequest, got {type(request)}"
|
|
238
273
|
)
|
|
239
|
-
stream = self.handle_agent_run_stream(request)
|
|
274
|
+
stream = self.handle_agent_run_stream(request, timeout=timeout)
|
|
240
275
|
else:
|
|
241
276
|
# Otherwise, use keyword arguments
|
|
242
277
|
if provider_uid is None or model is None or messages is None:
|
|
@@ -255,9 +290,11 @@ class BaseLivellmClient(ABC):
|
|
|
255
290
|
tools=tools or [],
|
|
256
291
|
gen_config=kwargs or None,
|
|
257
292
|
include_history=include_history,
|
|
258
|
-
output_schema=resolved_schema
|
|
293
|
+
output_schema=resolved_schema,
|
|
294
|
+
context_limit=context_limit,
|
|
295
|
+
context_overflow_strategy=context_overflow_strategy
|
|
259
296
|
)
|
|
260
|
-
stream = self.handle_agent_run_stream(agent_request)
|
|
297
|
+
stream = self.handle_agent_run_stream(agent_request, timeout=timeout)
|
|
261
298
|
|
|
262
299
|
async for chunk in stream:
|
|
263
300
|
yield chunk
|
|
@@ -266,6 +303,8 @@ class BaseLivellmClient(ABC):
|
|
|
266
303
|
async def speak(
|
|
267
304
|
self,
|
|
268
305
|
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
306
|
+
*,
|
|
307
|
+
timeout: Optional[float] = None,
|
|
269
308
|
) -> bytes:
|
|
270
309
|
...
|
|
271
310
|
|
|
@@ -280,13 +319,18 @@ class BaseLivellmClient(ABC):
|
|
|
280
319
|
mime_type: str,
|
|
281
320
|
sample_rate: int,
|
|
282
321
|
chunk_size: int = 20,
|
|
322
|
+
timeout: Optional[float] = None,
|
|
283
323
|
**kwargs
|
|
284
324
|
) -> bytes:
|
|
285
325
|
...
|
|
286
326
|
|
|
287
327
|
|
|
288
328
|
@abstractmethod
|
|
289
|
-
async def handle_speak(
|
|
329
|
+
async def handle_speak(
|
|
330
|
+
self,
|
|
331
|
+
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
332
|
+
timeout: Optional[float] = None
|
|
333
|
+
) -> bytes:
|
|
290
334
|
...
|
|
291
335
|
|
|
292
336
|
async def speak(
|
|
@@ -300,6 +344,7 @@ class BaseLivellmClient(ABC):
|
|
|
300
344
|
mime_type: Optional[str] = None,
|
|
301
345
|
sample_rate: Optional[int] = None,
|
|
302
346
|
chunk_size: int = 20,
|
|
347
|
+
timeout: Optional[float] = None,
|
|
303
348
|
**kwargs
|
|
304
349
|
) -> bytes:
|
|
305
350
|
"""
|
|
@@ -330,6 +375,7 @@ class BaseLivellmClient(ABC):
|
|
|
330
375
|
mime_type: The MIME type of the output audio
|
|
331
376
|
sample_rate: The sample rate of the output audio
|
|
332
377
|
chunk_size: Chunk size in milliseconds (default: 20ms)
|
|
378
|
+
timeout: Optional timeout in seconds (overrides default client timeout)
|
|
333
379
|
gen_config: Optional generation configuration
|
|
334
380
|
|
|
335
381
|
Returns:
|
|
@@ -341,7 +387,7 @@ class BaseLivellmClient(ABC):
|
|
|
341
387
|
raise TypeError(
|
|
342
388
|
f"First positional argument must be SpeakRequest or AudioFallbackRequest, got {type(request)}"
|
|
343
389
|
)
|
|
344
|
-
return await self.handle_speak(request)
|
|
390
|
+
return await self.handle_speak(request, timeout=timeout)
|
|
345
391
|
|
|
346
392
|
# Otherwise, use keyword arguments
|
|
347
393
|
if provider_uid is None or model is None or text is None or voice is None or mime_type is None or sample_rate is None:
|
|
@@ -360,12 +406,14 @@ class BaseLivellmClient(ABC):
|
|
|
360
406
|
chunk_size=chunk_size,
|
|
361
407
|
gen_config=kwargs or None
|
|
362
408
|
)
|
|
363
|
-
return await self.handle_speak(speak_request)
|
|
409
|
+
return await self.handle_speak(speak_request, timeout=timeout)
|
|
364
410
|
|
|
365
411
|
@overload
|
|
366
412
|
def speak_stream(
|
|
367
413
|
self,
|
|
368
414
|
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
415
|
+
*,
|
|
416
|
+
timeout: Optional[float] = None,
|
|
369
417
|
) -> AsyncIterator[bytes]:
|
|
370
418
|
...
|
|
371
419
|
|
|
@@ -380,13 +428,18 @@ class BaseLivellmClient(ABC):
|
|
|
380
428
|
mime_type: str,
|
|
381
429
|
sample_rate: int,
|
|
382
430
|
chunk_size: int = 20,
|
|
431
|
+
timeout: Optional[float] = None,
|
|
383
432
|
**kwargs
|
|
384
433
|
) -> AsyncIterator[bytes]:
|
|
385
434
|
...
|
|
386
435
|
|
|
387
436
|
|
|
388
437
|
@abstractmethod
|
|
389
|
-
async def handle_speak_stream(
|
|
438
|
+
async def handle_speak_stream(
|
|
439
|
+
self,
|
|
440
|
+
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
441
|
+
timeout: Optional[float] = None
|
|
442
|
+
) -> AsyncIterator[bytes]:
|
|
390
443
|
...
|
|
391
444
|
|
|
392
445
|
async def speak_stream(
|
|
@@ -400,6 +453,7 @@ class BaseLivellmClient(ABC):
|
|
|
400
453
|
mime_type: Optional[str] = None,
|
|
401
454
|
sample_rate: Optional[int] = None,
|
|
402
455
|
chunk_size: int = 20,
|
|
456
|
+
timeout: Optional[float] = None,
|
|
403
457
|
**kwargs
|
|
404
458
|
) -> AsyncIterator[bytes]:
|
|
405
459
|
"""
|
|
@@ -433,6 +487,7 @@ class BaseLivellmClient(ABC):
|
|
|
433
487
|
mime_type: The MIME type of the output audio
|
|
434
488
|
sample_rate: The sample rate of the output audio
|
|
435
489
|
chunk_size: Chunk size in milliseconds (default: 20ms)
|
|
490
|
+
timeout: Optional timeout in seconds (overrides default client timeout)
|
|
436
491
|
gen_config: Optional generation configuration
|
|
437
492
|
|
|
438
493
|
Returns:
|
|
@@ -444,7 +499,7 @@ class BaseLivellmClient(ABC):
|
|
|
444
499
|
raise TypeError(
|
|
445
500
|
f"First positional argument must be SpeakRequest or AudioFallbackRequest, got {type(request)}"
|
|
446
501
|
)
|
|
447
|
-
speak_stream = self.handle_speak_stream(request)
|
|
502
|
+
speak_stream = self.handle_speak_stream(request, timeout=timeout)
|
|
448
503
|
else:
|
|
449
504
|
# Otherwise, use keyword arguments
|
|
450
505
|
if provider_uid is None or model is None or text is None or voice is None or mime_type is None or sample_rate is None:
|
|
@@ -463,7 +518,7 @@ class BaseLivellmClient(ABC):
|
|
|
463
518
|
chunk_size=chunk_size,
|
|
464
519
|
gen_config=kwargs or None
|
|
465
520
|
)
|
|
466
|
-
speak_stream = self.handle_speak_stream(speak_request)
|
|
521
|
+
speak_stream = self.handle_speak_stream(speak_request, timeout=timeout)
|
|
467
522
|
async for chunk in speak_stream:
|
|
468
523
|
yield chunk
|
|
469
524
|
|
|
@@ -471,6 +526,8 @@ class BaseLivellmClient(ABC):
|
|
|
471
526
|
async def transcribe(
|
|
472
527
|
self,
|
|
473
528
|
request: Union[TranscribeRequest, TranscribeFallbackRequest],
|
|
529
|
+
*,
|
|
530
|
+
timeout: Optional[float] = None,
|
|
474
531
|
) -> TranscribeResponse:
|
|
475
532
|
...
|
|
476
533
|
|
|
@@ -482,13 +539,18 @@ class BaseLivellmClient(ABC):
|
|
|
482
539
|
file: File,
|
|
483
540
|
model: str,
|
|
484
541
|
language: Optional[str] = None,
|
|
542
|
+
timeout: Optional[float] = None,
|
|
485
543
|
**kwargs
|
|
486
544
|
) -> TranscribeResponse:
|
|
487
545
|
...
|
|
488
546
|
|
|
489
547
|
|
|
490
548
|
@abstractmethod
|
|
491
|
-
async def handle_transcribe(
|
|
549
|
+
async def handle_transcribe(
|
|
550
|
+
self,
|
|
551
|
+
request: Union[TranscribeRequest, TranscribeFallbackRequest],
|
|
552
|
+
timeout: Optional[float] = None
|
|
553
|
+
) -> TranscribeResponse:
|
|
492
554
|
...
|
|
493
555
|
|
|
494
556
|
async def transcribe(
|
|
@@ -499,6 +561,7 @@ class BaseLivellmClient(ABC):
|
|
|
499
561
|
file: Optional[File] = None,
|
|
500
562
|
model: Optional[str] = None,
|
|
501
563
|
language: Optional[str] = None,
|
|
564
|
+
timeout: Optional[float] = None,
|
|
502
565
|
**kwargs
|
|
503
566
|
) -> TranscribeResponse:
|
|
504
567
|
"""
|
|
@@ -522,6 +585,7 @@ class BaseLivellmClient(ABC):
|
|
|
522
585
|
file: The audio file as a tuple (filename, content, content_type)
|
|
523
586
|
model: The model to use for transcription
|
|
524
587
|
language: Optional language code
|
|
588
|
+
timeout: Optional timeout in seconds (overrides default client timeout)
|
|
525
589
|
gen_config: Optional generation configuration
|
|
526
590
|
|
|
527
591
|
Returns:
|
|
@@ -534,7 +598,7 @@ class BaseLivellmClient(ABC):
|
|
|
534
598
|
f"First positional argument must be TranscribeRequest or TranscribeFallbackRequest, got {type(request)}"
|
|
535
599
|
)
|
|
536
600
|
# JSON-based request
|
|
537
|
-
return await self.handle_transcribe(request)
|
|
601
|
+
return await self.handle_transcribe(request, timeout=timeout)
|
|
538
602
|
|
|
539
603
|
# Otherwise, use keyword arguments with multipart form-data request
|
|
540
604
|
if provider_uid is None or file is None or model is None:
|
|
@@ -550,7 +614,7 @@ class BaseLivellmClient(ABC):
|
|
|
550
614
|
language=language,
|
|
551
615
|
gen_config=kwargs or None
|
|
552
616
|
)
|
|
553
|
-
return await self.handle_transcribe(transcribe_request)
|
|
617
|
+
return await self.handle_transcribe(transcribe_request, timeout=timeout)
|
|
554
618
|
|
|
555
619
|
|
|
556
620
|
class LivellmWsClient(BaseLivellmClient):
|
|
@@ -628,7 +692,11 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
628
692
|
self.__listen_for_responses_task = None
|
|
629
693
|
self.sessions.clear()
|
|
630
694
|
|
|
631
|
-
|
|
695
|
+
def _get_effective_timeout(self, timeout: Optional[float]) -> Optional[float]:
|
|
696
|
+
"""Get effective timeout: per-request timeout overrides default."""
|
|
697
|
+
return timeout if timeout is not None else self.timeout
|
|
698
|
+
|
|
699
|
+
async def get_response(self, action: WsAction, payload: dict, timeout: Optional[float] = None) -> dict:
|
|
632
700
|
"""Send a request and wait for response."""
|
|
633
701
|
if self.websocket is None:
|
|
634
702
|
await self.connect()
|
|
@@ -638,7 +706,17 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
638
706
|
q = await self.get_or_update_session(session_id)
|
|
639
707
|
await self.websocket.send(json.dumps(request.model_dump()))
|
|
640
708
|
|
|
641
|
-
|
|
709
|
+
effective_timeout = self._get_effective_timeout(timeout)
|
|
710
|
+
|
|
711
|
+
try:
|
|
712
|
+
if effective_timeout:
|
|
713
|
+
response: WsResponse = await asyncio.wait_for(q.get(), timeout=effective_timeout)
|
|
714
|
+
else:
|
|
715
|
+
response: WsResponse = await q.get()
|
|
716
|
+
except asyncio.TimeoutError:
|
|
717
|
+
self.sessions.pop(session_id, None)
|
|
718
|
+
raise TimeoutError(f"Request timed out after {effective_timeout} seconds")
|
|
719
|
+
|
|
642
720
|
self.sessions.pop(session_id)
|
|
643
721
|
if response.status == WsStatus.ERROR:
|
|
644
722
|
raise Exception(f"WebSocket failed: {response.error}")
|
|
@@ -647,7 +725,7 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
647
725
|
else:
|
|
648
726
|
raise Exception(f"WebSocket failed with unknown status: {response}")
|
|
649
727
|
|
|
650
|
-
async def get_response_stream(self, action: WsAction, payload: dict) -> AsyncIterator[dict]:
|
|
728
|
+
async def get_response_stream(self, action: WsAction, payload: dict, timeout: Optional[float] = None) -> AsyncIterator[dict]:
|
|
651
729
|
"""Send a request and stream responses."""
|
|
652
730
|
if self.websocket is None:
|
|
653
731
|
await self.connect()
|
|
@@ -657,8 +735,17 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
657
735
|
q = await self.get_or_update_session(session_id)
|
|
658
736
|
await self.websocket.send(json.dumps(request.model_dump()))
|
|
659
737
|
|
|
738
|
+
effective_timeout = self._get_effective_timeout(timeout)
|
|
739
|
+
|
|
660
740
|
while True:
|
|
661
|
-
|
|
741
|
+
try:
|
|
742
|
+
if effective_timeout:
|
|
743
|
+
response: WsResponse = await asyncio.wait_for(q.get(), timeout=effective_timeout)
|
|
744
|
+
else:
|
|
745
|
+
response: WsResponse = await q.get()
|
|
746
|
+
except asyncio.TimeoutError:
|
|
747
|
+
self.sessions.pop(session_id, None)
|
|
748
|
+
raise TimeoutError(f"Request timed out after {effective_timeout} seconds")
|
|
662
749
|
|
|
663
750
|
if response.status == WsStatus.STREAMING:
|
|
664
751
|
yield response.data
|
|
@@ -674,37 +761,60 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
674
761
|
|
|
675
762
|
# Implement abstract methods from BaseLivellmClient
|
|
676
763
|
|
|
677
|
-
async def handle_agent_run(
|
|
764
|
+
async def handle_agent_run(
|
|
765
|
+
self,
|
|
766
|
+
request: Union[AgentRequest, AgentFallbackRequest],
|
|
767
|
+
timeout: Optional[float] = None
|
|
768
|
+
) -> AgentResponse:
|
|
678
769
|
"""Handle agent run via WebSocket."""
|
|
679
770
|
response = await self.get_response(
|
|
680
771
|
WsAction.AGENT_RUN,
|
|
681
|
-
request.model_dump()
|
|
772
|
+
request.model_dump(),
|
|
773
|
+
timeout=timeout
|
|
682
774
|
)
|
|
683
775
|
return AgentResponse(**response)
|
|
684
776
|
|
|
685
|
-
async def handle_agent_run_stream(
|
|
777
|
+
async def handle_agent_run_stream(
|
|
778
|
+
self,
|
|
779
|
+
request: Union[AgentRequest, AgentFallbackRequest],
|
|
780
|
+
timeout: Optional[float] = None
|
|
781
|
+
) -> AsyncIterator[AgentResponse]:
|
|
686
782
|
"""Handle streaming agent run via WebSocket."""
|
|
687
|
-
async for response in self.get_response_stream(WsAction.AGENT_RUN_STREAM, request.model_dump()):
|
|
783
|
+
async for response in self.get_response_stream(WsAction.AGENT_RUN_STREAM, request.model_dump(), timeout=timeout):
|
|
688
784
|
yield AgentResponse(**response)
|
|
689
785
|
|
|
690
|
-
async def handle_speak(
|
|
786
|
+
async def handle_speak(
|
|
787
|
+
self,
|
|
788
|
+
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
789
|
+
timeout: Optional[float] = None
|
|
790
|
+
) -> bytes:
|
|
691
791
|
"""Handle speak request via WebSocket."""
|
|
692
792
|
response = await self.get_response(
|
|
693
793
|
WsAction.AUDIO_SPEAK,
|
|
694
|
-
request.model_dump()
|
|
794
|
+
request.model_dump(),
|
|
795
|
+
timeout=timeout
|
|
695
796
|
)
|
|
696
797
|
return EncodedSpeakResponse(**response).audio
|
|
697
798
|
|
|
698
|
-
async def handle_speak_stream(
|
|
799
|
+
async def handle_speak_stream(
|
|
800
|
+
self,
|
|
801
|
+
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
802
|
+
timeout: Optional[float] = None
|
|
803
|
+
) -> AsyncIterator[bytes]:
|
|
699
804
|
"""Handle streaming speak request via WebSocket."""
|
|
700
|
-
async for response in self.get_response_stream(WsAction.AUDIO_SPEAK_STREAM, request.model_dump()):
|
|
805
|
+
async for response in self.get_response_stream(WsAction.AUDIO_SPEAK_STREAM, request.model_dump(), timeout=timeout):
|
|
701
806
|
yield EncodedSpeakResponse(**response).audio
|
|
702
807
|
|
|
703
|
-
async def handle_transcribe(
|
|
808
|
+
async def handle_transcribe(
|
|
809
|
+
self,
|
|
810
|
+
request: Union[TranscribeRequest, TranscribeFallbackRequest],
|
|
811
|
+
timeout: Optional[float] = None
|
|
812
|
+
) -> TranscribeResponse:
|
|
704
813
|
"""Handle transcribe request via WebSocket."""
|
|
705
814
|
response = await self.get_response(
|
|
706
815
|
WsAction.AUDIO_TRANSCRIBE,
|
|
707
|
-
request.model_dump()
|
|
816
|
+
request.model_dump(),
|
|
817
|
+
timeout=timeout
|
|
708
818
|
)
|
|
709
819
|
return TranscribeResponse(**response)
|
|
710
820
|
|
|
@@ -747,8 +857,8 @@ class LivellmClient(BaseLivellmClient):
|
|
|
747
857
|
self.base_url = f"{self._root_base_url}/livellm"
|
|
748
858
|
self.timeout = timeout
|
|
749
859
|
self.user_agent = user_agent or DEFAULT_USER_AGENT
|
|
750
|
-
|
|
751
|
-
|
|
860
|
+
# Create client without timeout - we'll pass timeout per-request
|
|
861
|
+
self.client = httpx.AsyncClient(base_url=self.base_url)
|
|
752
862
|
self.settings = []
|
|
753
863
|
self.headers = {
|
|
754
864
|
"Content-Type": "application/json",
|
|
@@ -759,6 +869,10 @@ class LivellmClient(BaseLivellmClient):
|
|
|
759
869
|
if configs:
|
|
760
870
|
self.update_configs_post_init(configs)
|
|
761
871
|
|
|
872
|
+
def _get_effective_timeout(self, timeout: Optional[float]) -> Optional[float]:
|
|
873
|
+
"""Get effective timeout: per-request timeout overrides default."""
|
|
874
|
+
return timeout if timeout is not None else self.timeout
|
|
875
|
+
|
|
762
876
|
@property
|
|
763
877
|
def realtime(self) -> LivellmWsClient:
|
|
764
878
|
"""
|
|
@@ -789,15 +903,17 @@ class LivellmClient(BaseLivellmClient):
|
|
|
789
903
|
return SuccessResponse(success=True, message="Configs updated successfully")
|
|
790
904
|
|
|
791
905
|
|
|
792
|
-
async def delete(self, endpoint: str) -> dict:
|
|
906
|
+
async def delete(self, endpoint: str, timeout: Optional[float] = None) -> dict:
|
|
793
907
|
"""
|
|
794
908
|
Delete a resource from the given endpoint and return the response.
|
|
795
909
|
Args:
|
|
796
910
|
endpoint: The endpoint to delete from.
|
|
911
|
+
timeout: Optional timeout override.
|
|
797
912
|
Returns:
|
|
798
913
|
The response from the endpoint.
|
|
799
914
|
"""
|
|
800
|
-
|
|
915
|
+
effective_timeout = self._get_effective_timeout(timeout)
|
|
916
|
+
response = await self.client.delete(endpoint, headers=self.headers, timeout=effective_timeout)
|
|
801
917
|
response.raise_for_status()
|
|
802
918
|
return response.json()
|
|
803
919
|
|
|
@@ -805,7 +921,8 @@ class LivellmClient(BaseLivellmClient):
|
|
|
805
921
|
self,
|
|
806
922
|
files: dict,
|
|
807
923
|
data: dict,
|
|
808
|
-
endpoint: str
|
|
924
|
+
endpoint: str,
|
|
925
|
+
timeout: Optional[float] = None
|
|
809
926
|
) -> dict:
|
|
810
927
|
"""
|
|
811
928
|
Post a multipart request to the given endpoint and return the response.
|
|
@@ -813,27 +930,32 @@ class LivellmClient(BaseLivellmClient):
|
|
|
813
930
|
files: The files to send in the request.
|
|
814
931
|
data: The data to send in the request.
|
|
815
932
|
endpoint: The endpoint to post to.
|
|
933
|
+
timeout: Optional timeout override.
|
|
816
934
|
Returns:
|
|
817
935
|
The response from the endpoint.
|
|
818
936
|
"""
|
|
937
|
+
effective_timeout = self._get_effective_timeout(timeout)
|
|
819
938
|
# Don't pass Content-Type header for multipart - httpx will set it automatically
|
|
820
|
-
response = await self.client.post(endpoint, files=files, data=data)
|
|
939
|
+
response = await self.client.post(endpoint, files=files, data=data, timeout=effective_timeout)
|
|
821
940
|
response.raise_for_status()
|
|
822
941
|
return response.json()
|
|
823
942
|
|
|
824
943
|
|
|
825
944
|
async def get(
|
|
826
945
|
self,
|
|
827
|
-
endpoint: str
|
|
946
|
+
endpoint: str,
|
|
947
|
+
timeout: Optional[float] = None
|
|
828
948
|
) -> dict:
|
|
829
949
|
"""
|
|
830
950
|
Get a request from the given endpoint and return the response.
|
|
831
951
|
Args:
|
|
832
952
|
endpoint: The endpoint to get from.
|
|
953
|
+
timeout: Optional timeout override.
|
|
833
954
|
Returns:
|
|
834
955
|
The response from the endpoint.
|
|
835
956
|
"""
|
|
836
|
-
|
|
957
|
+
effective_timeout = self._get_effective_timeout(timeout)
|
|
958
|
+
response = await self.client.get(endpoint, headers=self.headers, timeout=effective_timeout)
|
|
837
959
|
response.raise_for_status()
|
|
838
960
|
return response.json()
|
|
839
961
|
|
|
@@ -842,7 +964,8 @@ class LivellmClient(BaseLivellmClient):
|
|
|
842
964
|
json_data: dict,
|
|
843
965
|
endpoint: str,
|
|
844
966
|
expect_stream: bool = False,
|
|
845
|
-
expect_json: bool = True
|
|
967
|
+
expect_json: bool = True,
|
|
968
|
+
timeout: Optional[float] = None
|
|
846
969
|
) -> Union[dict, bytes, AsyncIterator[Union[dict, bytes]]]:
|
|
847
970
|
"""
|
|
848
971
|
Post a request to the given endpoint and return the response.
|
|
@@ -854,12 +977,14 @@ class LivellmClient(BaseLivellmClient):
|
|
|
854
977
|
endpoint: The endpoint to post to.
|
|
855
978
|
expect_stream: Whether to expect a stream response.
|
|
856
979
|
expect_json: Whether to expect a JSON response.
|
|
980
|
+
timeout: Optional timeout override.
|
|
857
981
|
Returns:
|
|
858
982
|
The response from the endpoint.
|
|
859
983
|
Raises:
|
|
860
984
|
Exception: If the response is not 200 or 201.
|
|
861
985
|
"""
|
|
862
|
-
|
|
986
|
+
effective_timeout = self._get_effective_timeout(timeout)
|
|
987
|
+
response = await self.client.post(endpoint, json=json_data, headers=self.headers, timeout=effective_timeout)
|
|
863
988
|
if response.status_code not in [200, 201]:
|
|
864
989
|
error_response = await response.aread()
|
|
865
990
|
error_response = error_response.decode("utf-8")
|
|
@@ -882,26 +1007,26 @@ class LivellmClient(BaseLivellmClient):
|
|
|
882
1007
|
else:
|
|
883
1008
|
return response.content
|
|
884
1009
|
|
|
885
|
-
async def ping(self) -> SuccessResponse:
|
|
886
|
-
result = await self.get("ping")
|
|
1010
|
+
async def ping(self, timeout: Optional[float] = None) -> SuccessResponse:
|
|
1011
|
+
result = await self.get("ping", timeout=timeout)
|
|
887
1012
|
return SuccessResponse(**result)
|
|
888
1013
|
|
|
889
|
-
async def update_config(self, config: Settings) -> SuccessResponse:
|
|
890
|
-
result = await self.post(config.model_dump(), "providers/config", expect_json=True)
|
|
1014
|
+
async def update_config(self, config: Settings, timeout: Optional[float] = None) -> SuccessResponse:
|
|
1015
|
+
result = await self.post(config.model_dump(), "providers/config", expect_json=True, timeout=timeout)
|
|
891
1016
|
self.settings.append(config)
|
|
892
1017
|
return SuccessResponse(**result)
|
|
893
1018
|
|
|
894
|
-
async def update_configs(self, configs: List[Settings]) -> SuccessResponse:
|
|
1019
|
+
async def update_configs(self, configs: List[Settings], timeout: Optional[float] = None) -> SuccessResponse:
|
|
895
1020
|
for config in configs:
|
|
896
|
-
await self.update_config(config)
|
|
1021
|
+
await self.update_config(config, timeout=timeout)
|
|
897
1022
|
return SuccessResponse(success=True, message="Configs updated successfully")
|
|
898
1023
|
|
|
899
|
-
async def get_configs(self) -> List[Settings]:
|
|
900
|
-
result = await self.get("providers/configs")
|
|
1024
|
+
async def get_configs(self, timeout: Optional[float] = None) -> List[Settings]:
|
|
1025
|
+
result = await self.get("providers/configs", timeout=timeout)
|
|
901
1026
|
return [Settings(**config) for config in result]
|
|
902
1027
|
|
|
903
|
-
async def delete_config(self, config_uid: str) -> SuccessResponse:
|
|
904
|
-
result = await self.delete(f"providers/config/{config_uid}")
|
|
1028
|
+
async def delete_config(self, config_uid: str, timeout: Optional[float] = None) -> SuccessResponse:
|
|
1029
|
+
result = await self.delete(f"providers/config/{config_uid}", timeout=timeout)
|
|
905
1030
|
return SuccessResponse(**result)
|
|
906
1031
|
|
|
907
1032
|
async def cleanup(self):
|
|
@@ -916,59 +1041,51 @@ class LivellmClient(BaseLivellmClient):
|
|
|
916
1041
|
# Also close any realtime WebSocket client if it was created
|
|
917
1042
|
if self._realtime is not None:
|
|
918
1043
|
await self._realtime.disconnect()
|
|
919
|
-
|
|
920
|
-
# def __del__(self):
|
|
921
|
-
# """
|
|
922
|
-
# Destructor to clean up resources when the client is garbage collected.
|
|
923
|
-
# This will close the HTTP client and attempt to delete configs if cleanup wasn't called.
|
|
924
|
-
# Note: It's recommended to use the async context manager or call cleanup() explicitly.
|
|
925
|
-
# """
|
|
926
|
-
# # Warn user if cleanup wasn't called
|
|
927
|
-
# if self.settings:
|
|
928
|
-
# warnings.warn(
|
|
929
|
-
# "LivellmClient is being garbage collected without explicit cleanup. "
|
|
930
|
-
# "Provider configs may not be deleted from the server. "
|
|
931
|
-
# "Consider using 'async with' or calling 'await client.cleanup()' explicitly.",
|
|
932
|
-
# ResourceWarning,
|
|
933
|
-
# stacklevel=2
|
|
934
|
-
# )
|
|
935
|
-
|
|
936
|
-
# # Close the httpx client synchronously
|
|
937
|
-
# # httpx.AsyncClient stores a sync Transport that needs cleanup
|
|
938
|
-
# try:
|
|
939
|
-
# with httpx.Client(base_url=self.base_url) as client:
|
|
940
|
-
# for config in self.settings:
|
|
941
|
-
# config: Settings = config
|
|
942
|
-
# client.delete(f"providers/config/{config.uid}", headers=self.headers)
|
|
943
|
-
# except Exception:
|
|
944
|
-
# # Silently fail - we're in a destructor
|
|
945
|
-
# pass
|
|
946
1044
|
|
|
947
1045
|
# Implement abstract methods from BaseLivellmClient
|
|
948
1046
|
|
|
949
|
-
async def handle_agent_run(
|
|
1047
|
+
async def handle_agent_run(
|
|
1048
|
+
self,
|
|
1049
|
+
request: Union[AgentRequest, AgentFallbackRequest],
|
|
1050
|
+
timeout: Optional[float] = None
|
|
1051
|
+
) -> AgentResponse:
|
|
950
1052
|
"""Handle agent run via HTTP."""
|
|
951
|
-
result = await self.post(request.model_dump(), "agent/run", expect_json=True)
|
|
1053
|
+
result = await self.post(request.model_dump(), "agent/run", expect_json=True, timeout=timeout)
|
|
952
1054
|
return AgentResponse(**result)
|
|
953
1055
|
|
|
954
|
-
async def handle_agent_run_stream(
|
|
1056
|
+
async def handle_agent_run_stream(
|
|
1057
|
+
self,
|
|
1058
|
+
request: Union[AgentRequest, AgentFallbackRequest],
|
|
1059
|
+
timeout: Optional[float] = None
|
|
1060
|
+
) -> AsyncIterator[AgentResponse]:
|
|
955
1061
|
"""Handle streaming agent run via HTTP."""
|
|
956
|
-
stream = await self.post(request.model_dump(), "agent/run_stream", expect_stream=True, expect_json=True)
|
|
1062
|
+
stream = await self.post(request.model_dump(), "agent/run_stream", expect_stream=True, expect_json=True, timeout=timeout)
|
|
957
1063
|
async for chunk in stream:
|
|
958
1064
|
yield AgentResponse(**chunk)
|
|
959
1065
|
|
|
960
|
-
async def handle_speak(
|
|
1066
|
+
async def handle_speak(
|
|
1067
|
+
self,
|
|
1068
|
+
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
1069
|
+
timeout: Optional[float] = None
|
|
1070
|
+
) -> bytes:
|
|
961
1071
|
"""Handle speak request via HTTP."""
|
|
962
|
-
return await self.post(request.model_dump(), "audio/speak", expect_json=False)
|
|
1072
|
+
return await self.post(request.model_dump(), "audio/speak", expect_json=False, timeout=timeout)
|
|
963
1073
|
|
|
964
|
-
async def handle_speak_stream(
|
|
1074
|
+
async def handle_speak_stream(
|
|
1075
|
+
self,
|
|
1076
|
+
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
1077
|
+
timeout: Optional[float] = None
|
|
1078
|
+
) -> AsyncIterator[bytes]:
|
|
965
1079
|
"""Handle streaming speak request via HTTP."""
|
|
966
|
-
speak_stream = await self.post(request.model_dump(), "audio/speak_stream", expect_stream=True, expect_json=False)
|
|
1080
|
+
speak_stream = await self.post(request.model_dump(), "audio/speak_stream", expect_stream=True, expect_json=False, timeout=timeout)
|
|
967
1081
|
async for chunk in speak_stream:
|
|
968
1082
|
yield chunk
|
|
969
1083
|
|
|
970
|
-
async def handle_transcribe(
|
|
1084
|
+
async def handle_transcribe(
|
|
1085
|
+
self,
|
|
1086
|
+
request: Union[TranscribeRequest, TranscribeFallbackRequest],
|
|
1087
|
+
timeout: Optional[float] = None
|
|
1088
|
+
) -> TranscribeResponse:
|
|
971
1089
|
"""Handle transcribe request via HTTP."""
|
|
972
|
-
result = await self.post(request.model_dump(), "audio/transcribe_json", expect_json=True)
|
|
1090
|
+
result = await self.post(request.model_dump(), "audio/transcribe_json", expect_json=True, timeout=timeout)
|
|
973
1091
|
return TranscribeResponse(**result)
|
|
974
|
-
|
livellm/models/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .common import BaseRequest, ProviderKind, Settings, SuccessResponse
|
|
2
2
|
from .fallback import AgentFallbackRequest, AudioFallbackRequest, TranscribeFallbackRequest, FallbackStrategy
|
|
3
|
-
from .agent.agent import AgentRequest, AgentResponse, AgentResponseUsage
|
|
3
|
+
from .agent.agent import AgentRequest, AgentResponse, AgentResponseUsage, ContextOverflowStrategy
|
|
4
4
|
from .agent.chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
|
|
5
5
|
from .agent.tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
|
|
6
6
|
from .agent.output_schema import OutputSchema, PropertyDef
|
|
@@ -24,6 +24,7 @@ __all__ = [
|
|
|
24
24
|
"AgentRequest",
|
|
25
25
|
"AgentResponse",
|
|
26
26
|
"AgentResponseUsage",
|
|
27
|
+
"ContextOverflowStrategy",
|
|
27
28
|
"Message",
|
|
28
29
|
"MessageRole",
|
|
29
30
|
"TextMessage",
|
livellm/models/agent/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .agent import AgentRequest, AgentResponse, AgentResponseUsage
|
|
1
|
+
from .agent import AgentRequest, AgentResponse, AgentResponseUsage, ContextOverflowStrategy
|
|
2
2
|
from .chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
|
|
3
3
|
from .tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
|
|
4
4
|
from .output_schema import OutputSchema, PropertyDef
|
|
@@ -8,6 +8,7 @@ __all__ = [
|
|
|
8
8
|
"AgentRequest",
|
|
9
9
|
"AgentResponse",
|
|
10
10
|
"AgentResponseUsage",
|
|
11
|
+
"ContextOverflowStrategy",
|
|
11
12
|
"Message",
|
|
12
13
|
"MessageRole",
|
|
13
14
|
"TextMessage",
|
livellm/models/agent/agent.py
CHANGED
|
@@ -2,12 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel, Field
|
|
4
4
|
from typing import Optional, List, Union, Any, Dict
|
|
5
|
+
from enum import Enum
|
|
5
6
|
from .chat import TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
|
|
6
7
|
from .tools import WebSearchInput, MCPStreamableServerInput
|
|
7
8
|
from .output_schema import OutputSchema, PropertyDef
|
|
8
9
|
from ..common import BaseRequest
|
|
9
10
|
|
|
10
11
|
|
|
12
|
+
class ContextOverflowStrategy(str, Enum):
|
|
13
|
+
"""Strategy for handling context overflow when text exceeds context_limit."""
|
|
14
|
+
TRUNCATE = "truncate" # Take beginning, middle, and end portions
|
|
15
|
+
RECYCLE = "recycle" # Iteratively process chunks, merging results
|
|
16
|
+
|
|
17
|
+
|
|
11
18
|
class AgentRequest(BaseRequest):
|
|
12
19
|
model: str = Field(..., description="The model to use")
|
|
13
20
|
messages: List[Union[TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage]] = Field(..., description="The messages to use")
|
|
@@ -15,6 +22,8 @@ class AgentRequest(BaseRequest):
|
|
|
15
22
|
gen_config: Optional[dict] = Field(default=None, description="The configuration for the generation")
|
|
16
23
|
include_history: bool = Field(default=False, description="Whether to include full conversation history in the response")
|
|
17
24
|
output_schema: Optional[Union[OutputSchema, Dict[str, Any]]] = Field(default=None, description="JSON schema for structured output. Can be an OutputSchema, a dict representing a JSON schema, or will be converted from a Pydantic BaseModel.")
|
|
25
|
+
context_limit: int = Field(default=0, description="Maximum context size in tokens. If <= 0, context overflow handling is disabled.")
|
|
26
|
+
context_overflow_strategy: ContextOverflowStrategy = Field(default=ContextOverflowStrategy.TRUNCATE, description="Strategy for handling context overflow: 'truncate' or 'recycle'")
|
|
18
27
|
|
|
19
28
|
class AgentResponseUsage(BaseModel):
|
|
20
29
|
input_tokens: int = Field(..., description="The number of input tokens used")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: livellm
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.2
|
|
4
4
|
Summary: Python client for the LiveLLM Server
|
|
5
5
|
Project-URL: Homepage, https://github.com/qalby-tech/livellm-client-py
|
|
6
6
|
Project-URL: Repository, https://github.com/qalby-tech/livellm-client-py
|
|
@@ -36,6 +36,8 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
|
|
|
36
36
|
- 🔄 **Streaming** - Real-time streaming for agent and audio
|
|
37
37
|
- 🛠️ **Flexible API** - Use request objects or keyword arguments
|
|
38
38
|
- 📋 **Structured Output** - Get validated JSON responses with schema support (Pydantic, OutputSchema, or dict)
|
|
39
|
+
- 📏 **Context Overflow Management** - Automatic handling of large texts with truncate/recycle strategies
|
|
40
|
+
- ⏱️ **Per-Request Timeout** - Override default timeout for individual requests
|
|
39
41
|
- 🎙️ **Audio services** - Text-to-speech and transcription
|
|
40
42
|
- 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
|
|
41
43
|
- ⚡ **Fallback strategies** - Sequential and parallel handling
|
|
@@ -95,10 +97,10 @@ from livellm.models import Settings, ProviderKind
|
|
|
95
97
|
# Basic
|
|
96
98
|
client = LivellmClient(base_url="http://localhost:8000")
|
|
97
99
|
|
|
98
|
-
# With timeout and pre-configured providers
|
|
100
|
+
# With default timeout and pre-configured providers
|
|
99
101
|
client = LivellmClient(
|
|
100
102
|
base_url="http://localhost:8000",
|
|
101
|
-
timeout=30.0,
|
|
103
|
+
timeout=30.0, # Default timeout for all requests
|
|
102
104
|
configs=[
|
|
103
105
|
Settings(
|
|
104
106
|
uid="openai",
|
|
@@ -116,6 +118,50 @@ client = LivellmClient(
|
|
|
116
118
|
)
|
|
117
119
|
```
|
|
118
120
|
|
|
121
|
+
### Per-Request Timeout Override
|
|
122
|
+
|
|
123
|
+
The timeout provided in `__init__` is the default, but you can override it for individual requests:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
# Client with 30s default timeout
|
|
127
|
+
client = LivellmClient(base_url="http://localhost:8000", timeout=30.0)
|
|
128
|
+
|
|
129
|
+
# Uses default 30s timeout
|
|
130
|
+
response = await client.agent_run(
|
|
131
|
+
provider_uid="openai",
|
|
132
|
+
model="gpt-4",
|
|
133
|
+
messages=[TextMessage(role="user", content="Hello")]
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Override with 120s timeout for this specific request
|
|
137
|
+
response = await client.agent_run(
|
|
138
|
+
provider_uid="openai",
|
|
139
|
+
model="gpt-4",
|
|
140
|
+
messages=[TextMessage(role="user", content="Write a long essay...")],
|
|
141
|
+
timeout=120.0 # Override for this request only
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Works with streaming too
|
|
145
|
+
async for chunk in client.agent_run_stream(
|
|
146
|
+
provider_uid="openai",
|
|
147
|
+
model="gpt-4",
|
|
148
|
+
messages=[TextMessage(role="user", content="Tell me a story")],
|
|
149
|
+
timeout=300.0 # 5 minutes for streaming
|
|
150
|
+
):
|
|
151
|
+
print(chunk.output, end="")
|
|
152
|
+
|
|
153
|
+
# Works with all methods: speak(), speak_stream(), transcribe(), etc.
|
|
154
|
+
audio = await client.speak(
|
|
155
|
+
provider_uid="openai",
|
|
156
|
+
model="tts-1",
|
|
157
|
+
text="Hello world",
|
|
158
|
+
voice="alloy",
|
|
159
|
+
mime_type=SpeakMimeType.MP3,
|
|
160
|
+
sample_rate=24000,
|
|
161
|
+
timeout=60.0
|
|
162
|
+
)
|
|
163
|
+
```
|
|
164
|
+
|
|
119
165
|
### Supported Providers
|
|
120
166
|
|
|
121
167
|
`OPENAI` • `GOOGLE` • `ANTHROPIC` • `GROQ` • `ELEVENLABS`
|
|
@@ -439,6 +485,73 @@ data = json.loads(full_output)
|
|
|
439
485
|
- Type-safe responses
|
|
440
486
|
- Integration with type-checked code
|
|
441
487
|
|
|
488
|
+
#### Context Overflow Management
|
|
489
|
+
|
|
490
|
+
Handle large texts that exceed model context windows with automatic truncation or iterative processing:
|
|
491
|
+
|
|
492
|
+
```python
|
|
493
|
+
from livellm.models import TextMessage, ContextOverflowStrategy, OutputSchema, PropertyDef
|
|
494
|
+
|
|
495
|
+
# TRUNCATE strategy (default): Preserves beginning, middle, and end
|
|
496
|
+
# Works with both streaming and non-streaming
|
|
497
|
+
response = await client.agent_run(
|
|
498
|
+
provider_uid="openai",
|
|
499
|
+
model="gpt-4",
|
|
500
|
+
messages=[
|
|
501
|
+
TextMessage(role="system", content="Summarize the document."),
|
|
502
|
+
TextMessage(role="user", content=very_long_document)
|
|
503
|
+
],
|
|
504
|
+
context_limit=4000, # Max tokens
|
|
505
|
+
context_overflow_strategy=ContextOverflowStrategy.TRUNCATE
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
# RECYCLE strategy: Iteratively processes chunks and merges results
|
|
509
|
+
# Useful for extraction tasks - processes entire document
|
|
510
|
+
# Requires output_schema for JSON merging
|
|
511
|
+
output_schema = OutputSchema(
|
|
512
|
+
title="ExtractedInfo",
|
|
513
|
+
properties={
|
|
514
|
+
"topics": PropertyDef(type="array", items={"type": "string"}),
|
|
515
|
+
"key_figures": PropertyDef(type="array", items={"type": "string"})
|
|
516
|
+
},
|
|
517
|
+
required=["topics", "key_figures"]
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
response = await client.agent_run(
|
|
521
|
+
provider_uid="openai",
|
|
522
|
+
model="gpt-4",
|
|
523
|
+
messages=[
|
|
524
|
+
TextMessage(role="system", content="Extract all topics and key figures."),
|
|
525
|
+
TextMessage(role="user", content=very_long_document)
|
|
526
|
+
],
|
|
527
|
+
context_limit=3000,
|
|
528
|
+
context_overflow_strategy=ContextOverflowStrategy.RECYCLE,
|
|
529
|
+
output_schema=output_schema
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
# Parse the merged results
|
|
533
|
+
import json
|
|
534
|
+
result = json.loads(response.output)
|
|
535
|
+
print(f"Topics: {result['topics']}")
|
|
536
|
+
print(f"Key figures: {result['key_figures']}")
|
|
537
|
+
```
|
|
538
|
+
|
|
539
|
+
**Strategy comparison:**
|
|
540
|
+
|
|
541
|
+
| Strategy | How it works | Best for | Streaming |
|
|
542
|
+
|----------|--------------|----------|-----------|
|
|
543
|
+
| `TRUNCATE` | Takes beginning, middle, end portions | Summarization, Q&A | ✅ Yes |
|
|
544
|
+
| `RECYCLE` | Processes chunks iteratively, merges JSON | Full document extraction | ❌ No |
|
|
545
|
+
|
|
546
|
+
**Parameters:**
|
|
547
|
+
- `context_limit` (int, default: 0) - Maximum tokens. If ≤ 0, overflow handling is disabled
|
|
548
|
+
- `context_overflow_strategy` (ContextOverflowStrategy, default: TRUNCATE) - Strategy to use
|
|
549
|
+
|
|
550
|
+
**Notes:**
|
|
551
|
+
- System prompts are always preserved (never truncated)
|
|
552
|
+
- Token counting includes a 20% safety buffer
|
|
553
|
+
- RECYCLE requires `output_schema` for JSON merging
|
|
554
|
+
|
|
442
555
|
### Audio Services
|
|
443
556
|
|
|
444
557
|
#### Text-to-Speech
|
|
@@ -711,20 +824,22 @@ response = await client.ping()
|
|
|
711
824
|
|
|
712
825
|
### Client Methods
|
|
713
826
|
|
|
827
|
+
All methods accept an optional `timeout` parameter to override the default client timeout.
|
|
828
|
+
|
|
714
829
|
**Configuration**
|
|
715
|
-
- `ping()` - Health check
|
|
716
|
-
- `update_config(config)` / `update_configs(configs)` - Add/update providers
|
|
717
|
-
- `get_configs()` - List all configurations
|
|
718
|
-
- `delete_config(uid)` - Remove provider
|
|
830
|
+
- `ping(timeout?)` - Health check
|
|
831
|
+
- `update_config(config, timeout?)` / `update_configs(configs, timeout?)` - Add/update providers
|
|
832
|
+
- `get_configs(timeout?)` - List all configurations
|
|
833
|
+
- `delete_config(uid, timeout?)` - Remove provider
|
|
719
834
|
|
|
720
835
|
**Agent**
|
|
721
|
-
- `agent_run(request | **kwargs)` - Run agent (blocking)
|
|
722
|
-
- `agent_run_stream(request | **kwargs)` - Run agent (streaming)
|
|
836
|
+
- `agent_run(request | **kwargs, timeout?)` - Run agent (blocking)
|
|
837
|
+
- `agent_run_stream(request | **kwargs, timeout?)` - Run agent (streaming)
|
|
723
838
|
|
|
724
839
|
**Audio**
|
|
725
|
-
- `speak(request | **kwargs)` - Text-to-speech (blocking)
|
|
726
|
-
- `speak_stream(request | **kwargs)` - Text-to-speech (streaming)
|
|
727
|
-
- `transcribe(request | **kwargs)` - Speech-to-text
|
|
840
|
+
- `speak(request | **kwargs, timeout?)` - Text-to-speech (blocking)
|
|
841
|
+
- `speak_stream(request | **kwargs, timeout?)` - Text-to-speech (streaming)
|
|
842
|
+
- `transcribe(request | **kwargs, timeout?)` - Speech-to-text
|
|
728
843
|
|
|
729
844
|
**Real-Time Transcription (TranscriptionWsClient)**
|
|
730
845
|
- `connect()` - Establish WebSocket connection
|
|
@@ -750,12 +865,15 @@ response = await client.ping()
|
|
|
750
865
|
- `MessageRole` - `USER` | `MODEL` | `SYSTEM` | `TOOL_CALL` | `TOOL_RETURN` (or use strings)
|
|
751
866
|
|
|
752
867
|
**Requests**
|
|
753
|
-
- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output.
|
|
868
|
+
- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?, context_limit?, context_overflow_strategy?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output. Set `context_limit` and `context_overflow_strategy` for handling large texts.
|
|
754
869
|
- `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
|
|
755
870
|
- `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
|
|
756
871
|
- `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
|
|
757
872
|
- `TranscriptionAudioChunkWsRequest(audio)` - Audio chunk for streaming
|
|
758
873
|
|
|
874
|
+
**Context Overflow**
|
|
875
|
+
- `ContextOverflowStrategy` - `TRUNCATE` | `RECYCLE`
|
|
876
|
+
|
|
759
877
|
**Tools**
|
|
760
878
|
- `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
|
|
761
879
|
- `MCPStreamableServerInput(kind=ToolKind.MCP_STREAMABLE_SERVER, url, prefix?, timeout?)`
|
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
livellm/__init__.py,sha256=p2Szx7PELGYi-PTnSNnRPGVbU438ZBTFXYAQoMToUfE,440
|
|
2
|
-
livellm/livellm.py,sha256=
|
|
2
|
+
livellm/livellm.py,sha256=rb1ZGX2CiBDfEAqD-VQnD5JHqwxmM3q__ywpTW_eJvQ,42403
|
|
3
3
|
livellm/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
livellm/transcripton.py,sha256=oxUxagfPNQYPjyizcWW4Rjp7Doqh9at9mkOuxx3cuvo,6167
|
|
5
|
-
livellm/models/__init__.py,sha256=
|
|
5
|
+
livellm/models/__init__.py,sha256=WSg3GFnGRdmI9DKjCYgZItNQFEAhd3zvBjCkdUPpFqI,1685
|
|
6
6
|
livellm/models/common.py,sha256=nx0w7Th9xeHeQg6Ssxi7jEEh3aEcGyzGAP0uk9l072c,1772
|
|
7
7
|
livellm/models/fallback.py,sha256=zGG_MjdbaTx0fqKZTEg3ullej-CJznPfwaon0jEvRvI,1170
|
|
8
8
|
livellm/models/transcription.py,sha256=sACAywecLfuQdFmwy0OYP-in8pe-caCrHw2hKNTR5XI,1765
|
|
9
9
|
livellm/models/ws.py,sha256=OCoJwAjQLOz6ErTiTtb-qD22N4wSsEGvi_4JQqCHIPQ,1111
|
|
10
|
-
livellm/models/agent/__init__.py,sha256=
|
|
11
|
-
livellm/models/agent/agent.py,sha256=
|
|
10
|
+
livellm/models/agent/__init__.py,sha256=ivWS8ILFAvoPYB26To43kiTpzXKXisY0DKfb_AZSVvU,708
|
|
11
|
+
livellm/models/agent/agent.py,sha256=1u70ge282pshcQiygFHbgui1RNuTh4fUue66u6gTnDI,2483
|
|
12
12
|
livellm/models/agent/chat.py,sha256=VxdHTbJELMffxJJUSTdhT4behFbVq5XNyBLeg75wpsU,1632
|
|
13
13
|
livellm/models/agent/output_schema.py,sha256=jWCWqIfRMgNUvbPVt8NrYqiyuNSrbzKypHFC3rHu-Qc,5763
|
|
14
14
|
livellm/models/agent/tools.py,sha256=wVWfx6_jxL3IcmX_Nt_PonZ3RQLtpfqJnszHz32BQiU,1403
|
|
15
15
|
livellm/models/audio/__init__.py,sha256=sz2NxCOfFGVvp-XQUsdgOR_TYBO1Wb-8LLXaZDEiAZk,282
|
|
16
16
|
livellm/models/audio/speak.py,sha256=lDITZ7fiLRuDhA-LxCPQ6Yraxr33B6Lg7VyR4CkuGk8,1872
|
|
17
17
|
livellm/models/audio/transcribe.py,sha256=Leji2lk5zfq4GE-fw-z2dZR8BuijzW8TJ12GHw_UZJY,2085
|
|
18
|
-
livellm-1.
|
|
19
|
-
livellm-1.
|
|
20
|
-
livellm-1.
|
|
21
|
-
livellm-1.
|
|
18
|
+
livellm-1.7.2.dist-info/METADATA,sha256=tYn64Ak9wNGfxDRW5xouUXodY_xWevRhu3jiIuVwk-c,29709
|
|
19
|
+
livellm-1.7.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
20
|
+
livellm-1.7.2.dist-info/licenses/LICENSE,sha256=yapGO2C_00ymEx6TADdbU8Oyc1bWOrZY-fjP-agmFL4,1071
|
|
21
|
+
livellm-1.7.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|