livellm 1.6.1__tar.gz → 1.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livellm
3
- Version: 1.6.1
3
+ Version: 1.7.1
4
4
  Summary: Python client for the LiveLLM Server
5
5
  Project-URL: Homepage, https://github.com/qalby-tech/livellm-client-py
6
6
  Project-URL: Repository, https://github.com/qalby-tech/livellm-client-py
@@ -36,6 +36,8 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
36
36
  - 🔄 **Streaming** - Real-time streaming for agent and audio
37
37
  - 🛠️ **Flexible API** - Use request objects or keyword arguments
38
38
  - 📋 **Structured Output** - Get validated JSON responses with schema support (Pydantic, OutputSchema, or dict)
39
+ - 📏 **Context Overflow Management** - Automatic handling of large texts with truncate/recycle strategies
40
+ - ⏱️ **Per-Request Timeout** - Override default timeout for individual requests
39
41
  - 🎙️ **Audio services** - Text-to-speech and transcription
40
42
  - 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
41
43
  - ⚡ **Fallback strategies** - Sequential and parallel handling
@@ -95,10 +97,10 @@ from livellm.models import Settings, ProviderKind
95
97
  # Basic
96
98
  client = LivellmClient(base_url="http://localhost:8000")
97
99
 
98
- # With timeout and pre-configured providers
100
+ # With default timeout and pre-configured providers
99
101
  client = LivellmClient(
100
102
  base_url="http://localhost:8000",
101
- timeout=30.0,
103
+ timeout=30.0, # Default timeout for all requests
102
104
  configs=[
103
105
  Settings(
104
106
  uid="openai",
@@ -116,6 +118,50 @@ client = LivellmClient(
116
118
  )
117
119
  ```
118
120
 
121
+ ### Per-Request Timeout Override
122
+
123
+ The timeout provided in `__init__` is the default, but you can override it for individual requests:
124
+
125
+ ```python
126
+ # Client with 30s default timeout
127
+ client = LivellmClient(base_url="http://localhost:8000", timeout=30.0)
128
+
129
+ # Uses default 30s timeout
130
+ response = await client.agent_run(
131
+ provider_uid="openai",
132
+ model="gpt-4",
133
+ messages=[TextMessage(role="user", content="Hello")]
134
+ )
135
+
136
+ # Override with 120s timeout for this specific request
137
+ response = await client.agent_run(
138
+ provider_uid="openai",
139
+ model="gpt-4",
140
+ messages=[TextMessage(role="user", content="Write a long essay...")],
141
+ timeout=120.0 # Override for this request only
142
+ )
143
+
144
+ # Works with streaming too
145
+ async for chunk in client.agent_run_stream(
146
+ provider_uid="openai",
147
+ model="gpt-4",
148
+ messages=[TextMessage(role="user", content="Tell me a story")],
149
+ timeout=300.0 # 5 minutes for streaming
150
+ ):
151
+ print(chunk.output, end="")
152
+
153
+ # Works with all methods: speak(), speak_stream(), transcribe(), etc.
154
+ audio = await client.speak(
155
+ provider_uid="openai",
156
+ model="tts-1",
157
+ text="Hello world",
158
+ voice="alloy",
159
+ mime_type=SpeakMimeType.MP3,
160
+ sample_rate=24000,
161
+ timeout=60.0
162
+ )
163
+ ```
164
+
119
165
  ### Supported Providers
120
166
 
121
167
  `OPENAI` • `GOOGLE` • `ANTHROPIC` • `GROQ` • `ELEVENLABS`
@@ -439,6 +485,73 @@ data = json.loads(full_output)
439
485
  - Type-safe responses
440
486
  - Integration with type-checked code
441
487
 
488
+ #### Context Overflow Management
489
+
490
+ Handle large texts that exceed model context windows with automatic truncation or iterative processing:
491
+
492
+ ```python
493
+ from livellm.models import TextMessage, ContextOverflowStrategy, OutputSchema, PropertyDef
494
+
495
+ # TRUNCATE strategy (default): Preserves beginning, middle, and end
496
+ # Works with both streaming and non-streaming
497
+ response = await client.agent_run(
498
+ provider_uid="openai",
499
+ model="gpt-4",
500
+ messages=[
501
+ TextMessage(role="system", content="Summarize the document."),
502
+ TextMessage(role="user", content=very_long_document)
503
+ ],
504
+ context_limit=4000, # Max tokens
505
+ context_overflow_strategy=ContextOverflowStrategy.TRUNCATE
506
+ )
507
+
508
+ # RECYCLE strategy: Iteratively processes chunks and merges results
509
+ # Useful for extraction tasks - processes entire document
510
+ # Requires output_schema for JSON merging
511
+ output_schema = OutputSchema(
512
+ title="ExtractedInfo",
513
+ properties={
514
+ "topics": PropertyDef(type="array", items={"type": "string"}),
515
+ "key_figures": PropertyDef(type="array", items={"type": "string"})
516
+ },
517
+ required=["topics", "key_figures"]
518
+ )
519
+
520
+ response = await client.agent_run(
521
+ provider_uid="openai",
522
+ model="gpt-4",
523
+ messages=[
524
+ TextMessage(role="system", content="Extract all topics and key figures."),
525
+ TextMessage(role="user", content=very_long_document)
526
+ ],
527
+ context_limit=3000,
528
+ context_overflow_strategy=ContextOverflowStrategy.RECYCLE,
529
+ output_schema=output_schema
530
+ )
531
+
532
+ # Parse the merged results
533
+ import json
534
+ result = json.loads(response.output)
535
+ print(f"Topics: {result['topics']}")
536
+ print(f"Key figures: {result['key_figures']}")
537
+ ```
538
+
539
+ **Strategy comparison:**
540
+
541
+ | Strategy | How it works | Best for | Streaming |
542
+ |----------|--------------|----------|-----------|
543
+ | `TRUNCATE` | Takes beginning, middle, end portions | Summarization, Q&A | ✅ Yes |
544
+ | `RECYCLE` | Processes chunks iteratively, merges JSON | Full document extraction | ❌ No |
545
+
546
+ **Parameters:**
547
+ - `context_limit` (int, default: 0) - Maximum tokens. If ≤ 0, overflow handling is disabled
548
+ - `context_overflow_strategy` (ContextOverflowStrategy, default: TRUNCATE) - Strategy to use
549
+
550
+ **Notes:**
551
+ - System prompts are always preserved (never truncated)
552
+ - Token counting includes a 20% safety buffer
553
+ - RECYCLE requires `output_schema` for JSON merging
554
+
442
555
  ### Audio Services
443
556
 
444
557
  #### Text-to-Speech
@@ -711,20 +824,22 @@ response = await client.ping()
711
824
 
712
825
  ### Client Methods
713
826
 
827
+ All methods accept an optional `timeout` parameter to override the default client timeout.
828
+
714
829
  **Configuration**
715
- - `ping()` - Health check
716
- - `update_config(config)` / `update_configs(configs)` - Add/update providers
717
- - `get_configs()` - List all configurations
718
- - `delete_config(uid)` - Remove provider
830
+ - `ping(timeout?)` - Health check
831
+ - `update_config(config, timeout?)` / `update_configs(configs, timeout?)` - Add/update providers
832
+ - `get_configs(timeout?)` - List all configurations
833
+ - `delete_config(uid, timeout?)` - Remove provider
719
834
 
720
835
  **Agent**
721
- - `agent_run(request | **kwargs)` - Run agent (blocking)
722
- - `agent_run_stream(request | **kwargs)` - Run agent (streaming)
836
+ - `agent_run(request | **kwargs, timeout?)` - Run agent (blocking)
837
+ - `agent_run_stream(request | **kwargs, timeout?)` - Run agent (streaming)
723
838
 
724
839
  **Audio**
725
- - `speak(request | **kwargs)` - Text-to-speech (blocking)
726
- - `speak_stream(request | **kwargs)` - Text-to-speech (streaming)
727
- - `transcribe(request | **kwargs)` - Speech-to-text
840
+ - `speak(request | **kwargs, timeout?)` - Text-to-speech (blocking)
841
+ - `speak_stream(request | **kwargs, timeout?)` - Text-to-speech (streaming)
842
+ - `transcribe(request | **kwargs, timeout?)` - Speech-to-text
728
843
 
729
844
  **Real-Time Transcription (TranscriptionWsClient)**
730
845
  - `connect()` - Establish WebSocket connection
@@ -750,12 +865,15 @@ response = await client.ping()
750
865
  - `MessageRole` - `USER` | `MODEL` | `SYSTEM` | `TOOL_CALL` | `TOOL_RETURN` (or use strings)
751
866
 
752
867
  **Requests**
753
- - `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output.
868
+ - `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?, context_limit?, context_overflow_strategy?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output. Set `context_limit` and `context_overflow_strategy` for handling large texts.
754
869
  - `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
755
870
  - `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
756
871
  - `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
757
872
  - `TranscriptionAudioChunkWsRequest(audio)` - Audio chunk for streaming
758
873
 
874
+ **Context Overflow**
875
+ - `ContextOverflowStrategy` - `TRUNCATE` | `RECYCLE`
876
+
759
877
  **Tools**
760
878
  - `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
761
879
  - `MCPStreamableServerInput(kind=ToolKind.MCP_STREAMABLE_SERVER, url, prefix?, timeout?)`
@@ -13,6 +13,8 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
13
13
  - 🔄 **Streaming** - Real-time streaming for agent and audio
14
14
  - 🛠️ **Flexible API** - Use request objects or keyword arguments
15
15
  - 📋 **Structured Output** - Get validated JSON responses with schema support (Pydantic, OutputSchema, or dict)
16
+ - 📏 **Context Overflow Management** - Automatic handling of large texts with truncate/recycle strategies
17
+ - ⏱️ **Per-Request Timeout** - Override default timeout for individual requests
16
18
  - 🎙️ **Audio services** - Text-to-speech and transcription
17
19
  - 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
18
20
  - ⚡ **Fallback strategies** - Sequential and parallel handling
@@ -72,10 +74,10 @@ from livellm.models import Settings, ProviderKind
72
74
  # Basic
73
75
  client = LivellmClient(base_url="http://localhost:8000")
74
76
 
75
- # With timeout and pre-configured providers
77
+ # With default timeout and pre-configured providers
76
78
  client = LivellmClient(
77
79
  base_url="http://localhost:8000",
78
- timeout=30.0,
80
+ timeout=30.0, # Default timeout for all requests
79
81
  configs=[
80
82
  Settings(
81
83
  uid="openai",
@@ -93,6 +95,50 @@ client = LivellmClient(
93
95
  )
94
96
  ```
95
97
 
98
+ ### Per-Request Timeout Override
99
+
100
+ The timeout provided in `__init__` is the default, but you can override it for individual requests:
101
+
102
+ ```python
103
+ # Client with 30s default timeout
104
+ client = LivellmClient(base_url="http://localhost:8000", timeout=30.0)
105
+
106
+ # Uses default 30s timeout
107
+ response = await client.agent_run(
108
+ provider_uid="openai",
109
+ model="gpt-4",
110
+ messages=[TextMessage(role="user", content="Hello")]
111
+ )
112
+
113
+ # Override with 120s timeout for this specific request
114
+ response = await client.agent_run(
115
+ provider_uid="openai",
116
+ model="gpt-4",
117
+ messages=[TextMessage(role="user", content="Write a long essay...")],
118
+ timeout=120.0 # Override for this request only
119
+ )
120
+
121
+ # Works with streaming too
122
+ async for chunk in client.agent_run_stream(
123
+ provider_uid="openai",
124
+ model="gpt-4",
125
+ messages=[TextMessage(role="user", content="Tell me a story")],
126
+ timeout=300.0 # 5 minutes for streaming
127
+ ):
128
+ print(chunk.output, end="")
129
+
130
+ # Works with all methods: speak(), speak_stream(), transcribe(), etc.
131
+ audio = await client.speak(
132
+ provider_uid="openai",
133
+ model="tts-1",
134
+ text="Hello world",
135
+ voice="alloy",
136
+ mime_type=SpeakMimeType.MP3,
137
+ sample_rate=24000,
138
+ timeout=60.0
139
+ )
140
+ ```
141
+
96
142
  ### Supported Providers
97
143
 
98
144
  `OPENAI` • `GOOGLE` • `ANTHROPIC` • `GROQ` • `ELEVENLABS`
@@ -416,6 +462,73 @@ data = json.loads(full_output)
416
462
  - Type-safe responses
417
463
  - Integration with type-checked code
418
464
 
465
+ #### Context Overflow Management
466
+
467
+ Handle large texts that exceed model context windows with automatic truncation or iterative processing:
468
+
469
+ ```python
470
+ from livellm.models import TextMessage, ContextOverflowStrategy, OutputSchema, PropertyDef
471
+
472
+ # TRUNCATE strategy (default): Preserves beginning, middle, and end
473
+ # Works with both streaming and non-streaming
474
+ response = await client.agent_run(
475
+ provider_uid="openai",
476
+ model="gpt-4",
477
+ messages=[
478
+ TextMessage(role="system", content="Summarize the document."),
479
+ TextMessage(role="user", content=very_long_document)
480
+ ],
481
+ context_limit=4000, # Max tokens
482
+ context_overflow_strategy=ContextOverflowStrategy.TRUNCATE
483
+ )
484
+
485
+ # RECYCLE strategy: Iteratively processes chunks and merges results
486
+ # Useful for extraction tasks - processes entire document
487
+ # Requires output_schema for JSON merging
488
+ output_schema = OutputSchema(
489
+ title="ExtractedInfo",
490
+ properties={
491
+ "topics": PropertyDef(type="array", items={"type": "string"}),
492
+ "key_figures": PropertyDef(type="array", items={"type": "string"})
493
+ },
494
+ required=["topics", "key_figures"]
495
+ )
496
+
497
+ response = await client.agent_run(
498
+ provider_uid="openai",
499
+ model="gpt-4",
500
+ messages=[
501
+ TextMessage(role="system", content="Extract all topics and key figures."),
502
+ TextMessage(role="user", content=very_long_document)
503
+ ],
504
+ context_limit=3000,
505
+ context_overflow_strategy=ContextOverflowStrategy.RECYCLE,
506
+ output_schema=output_schema
507
+ )
508
+
509
+ # Parse the merged results
510
+ import json
511
+ result = json.loads(response.output)
512
+ print(f"Topics: {result['topics']}")
513
+ print(f"Key figures: {result['key_figures']}")
514
+ ```
515
+
516
+ **Strategy comparison:**
517
+
518
+ | Strategy | How it works | Best for | Streaming |
519
+ |----------|--------------|----------|-----------|
520
+ | `TRUNCATE` | Takes beginning, middle, end portions | Summarization, Q&A | ✅ Yes |
521
+ | `RECYCLE` | Processes chunks iteratively, merges JSON | Full document extraction | ❌ No |
522
+
523
+ **Parameters:**
524
+ - `context_limit` (int, default: 0) - Maximum tokens. If ≤ 0, overflow handling is disabled
525
+ - `context_overflow_strategy` (ContextOverflowStrategy, default: TRUNCATE) - Strategy to use
526
+
527
+ **Notes:**
528
+ - System prompts are always preserved (never truncated)
529
+ - Token counting includes a 20% safety buffer
530
+ - RECYCLE requires `output_schema` for JSON merging
531
+
419
532
  ### Audio Services
420
533
 
421
534
  #### Text-to-Speech
@@ -688,20 +801,22 @@ response = await client.ping()
688
801
 
689
802
  ### Client Methods
690
803
 
804
+ All methods accept an optional `timeout` parameter to override the default client timeout.
805
+
691
806
  **Configuration**
692
- - `ping()` - Health check
693
- - `update_config(config)` / `update_configs(configs)` - Add/update providers
694
- - `get_configs()` - List all configurations
695
- - `delete_config(uid)` - Remove provider
807
+ - `ping(timeout?)` - Health check
808
+ - `update_config(config, timeout?)` / `update_configs(configs, timeout?)` - Add/update providers
809
+ - `get_configs(timeout?)` - List all configurations
810
+ - `delete_config(uid, timeout?)` - Remove provider
696
811
 
697
812
  **Agent**
698
- - `agent_run(request | **kwargs)` - Run agent (blocking)
699
- - `agent_run_stream(request | **kwargs)` - Run agent (streaming)
813
+ - `agent_run(request | **kwargs, timeout?)` - Run agent (blocking)
814
+ - `agent_run_stream(request | **kwargs, timeout?)` - Run agent (streaming)
700
815
 
701
816
  **Audio**
702
- - `speak(request | **kwargs)` - Text-to-speech (blocking)
703
- - `speak_stream(request | **kwargs)` - Text-to-speech (streaming)
704
- - `transcribe(request | **kwargs)` - Speech-to-text
817
+ - `speak(request | **kwargs, timeout?)` - Text-to-speech (blocking)
818
+ - `speak_stream(request | **kwargs, timeout?)` - Text-to-speech (streaming)
819
+ - `transcribe(request | **kwargs, timeout?)` - Speech-to-text
705
820
 
706
821
  **Real-Time Transcription (TranscriptionWsClient)**
707
822
  - `connect()` - Establish WebSocket connection
@@ -727,12 +842,15 @@ response = await client.ping()
727
842
  - `MessageRole` - `USER` | `MODEL` | `SYSTEM` | `TOOL_CALL` | `TOOL_RETURN` (or use strings)
728
843
 
729
844
  **Requests**
730
- - `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output.
845
+ - `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?, context_limit?, context_overflow_strategy?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output. Set `context_limit` and `context_overflow_strategy` for handling large texts.
731
846
  - `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
732
847
  - `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
733
848
  - `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
734
849
  - `TranscriptionAudioChunkWsRequest(audio)` - Audio chunk for streaming
735
850
 
851
+ **Context Overflow**
852
+ - `ContextOverflowStrategy` - `TRUNCATE` | `RECYCLE`
853
+
736
854
  **Tools**
737
855
  - `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
738
856
  - `MCPStreamableServerInput(kind=ToolKind.MCP_STREAMABLE_SERVER, url, prefix?, timeout?)`
@@ -31,10 +31,15 @@ DEFAULT_USER_AGENT = f"livellm-python/{__version__}"
31
31
 
32
32
  class BaseLivellmClient(ABC):
33
33
 
34
+ # Default timeout (set by subclasses)
35
+ timeout: Optional[float] = None
36
+
34
37
  @overload
35
38
  async def agent_run(
36
39
  self,
37
40
  request: Union[AgentRequest, AgentFallbackRequest],
41
+ *,
42
+ timeout: Optional[float] = None,
38
43
  ) -> AgentResponse:
39
44
  ...
40
45
 
@@ -48,13 +53,18 @@ class BaseLivellmClient(ABC):
48
53
  tools: Optional[list] = None,
49
54
  include_history: bool = False,
50
55
  output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
56
+ timeout: Optional[float] = None,
51
57
  **kwargs
52
58
  ) -> AgentResponse:
53
59
  ...
54
60
 
55
61
 
56
62
  @abstractmethod
57
- async def handle_agent_run(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AgentResponse:
63
+ async def handle_agent_run(
64
+ self,
65
+ request: Union[AgentRequest, AgentFallbackRequest],
66
+ timeout: Optional[float] = None
67
+ ) -> AgentResponse:
58
68
  ...
59
69
 
60
70
  async def agent_run(
@@ -67,6 +77,7 @@ class BaseLivellmClient(ABC):
67
77
  tools: Optional[list] = None,
68
78
  include_history: bool = False,
69
79
  output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
80
+ timeout: Optional[float] = None,
70
81
  **kwargs
71
82
  ) -> AgentResponse:
72
83
  """
@@ -100,6 +111,7 @@ class BaseLivellmClient(ABC):
100
111
  - An OutputSchema instance
101
112
  - A dict representing a JSON schema
102
113
  - A Pydantic BaseModel class (will be converted to OutputSchema)
114
+ timeout: Optional timeout in seconds (overrides default client timeout)
103
115
 
104
116
  Returns:
105
117
  AgentResponse with the agent's output. If output_schema was provided,
@@ -111,7 +123,7 @@ class BaseLivellmClient(ABC):
111
123
  raise TypeError(
112
124
  f"First positional argument must be AgentRequest or AgentFallbackRequest, got {type(request)}"
113
125
  )
114
- return await self.handle_agent_run(request)
126
+ return await self.handle_agent_run(request, timeout=timeout)
115
127
 
116
128
  # Otherwise, use keyword arguments
117
129
  if provider_uid is None or model is None or messages is None:
@@ -132,7 +144,7 @@ class BaseLivellmClient(ABC):
132
144
  include_history=include_history,
133
145
  output_schema=resolved_schema
134
146
  )
135
- return await self.handle_agent_run(agent_request)
147
+ return await self.handle_agent_run(agent_request, timeout=timeout)
136
148
 
137
149
  def _resolve_output_schema(
138
150
  self,
@@ -157,6 +169,8 @@ class BaseLivellmClient(ABC):
157
169
  def agent_run_stream(
158
170
  self,
159
171
  request: Union[AgentRequest, AgentFallbackRequest],
172
+ *,
173
+ timeout: Optional[float] = None,
160
174
  ) -> AsyncIterator[AgentResponse]:
161
175
  ...
162
176
 
@@ -170,13 +184,18 @@ class BaseLivellmClient(ABC):
170
184
  tools: Optional[list] = None,
171
185
  include_history: bool = False,
172
186
  output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
187
+ timeout: Optional[float] = None,
173
188
  **kwargs
174
189
  ) -> AsyncIterator[AgentResponse]:
175
190
  ...
176
191
 
177
192
 
178
193
  @abstractmethod
179
- async def handle_agent_run_stream(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AsyncIterator[AgentResponse]:
194
+ async def handle_agent_run_stream(
195
+ self,
196
+ request: Union[AgentRequest, AgentFallbackRequest],
197
+ timeout: Optional[float] = None
198
+ ) -> AsyncIterator[AgentResponse]:
180
199
  ...
181
200
 
182
201
  async def agent_run_stream(
@@ -189,6 +208,7 @@ class BaseLivellmClient(ABC):
189
208
  tools: Optional[list] = None,
190
209
  include_history: bool = False,
191
210
  output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
211
+ timeout: Optional[float] = None,
192
212
  **kwargs
193
213
  ) -> AsyncIterator[AgentResponse]:
194
214
  """
@@ -225,6 +245,7 @@ class BaseLivellmClient(ABC):
225
245
  - An OutputSchema instance
226
246
  - A dict representing a JSON schema
227
247
  - A Pydantic BaseModel class (will be converted to OutputSchema)
248
+ timeout: Optional timeout in seconds (overrides default client timeout)
228
249
 
229
250
  Returns:
230
251
  AsyncIterator of AgentResponse chunks. If output_schema was provided,
@@ -236,7 +257,7 @@ class BaseLivellmClient(ABC):
236
257
  raise TypeError(
237
258
  f"First positional argument must be AgentRequest or AgentFallbackRequest, got {type(request)}"
238
259
  )
239
- stream = self.handle_agent_run_stream(request)
260
+ stream = self.handle_agent_run_stream(request, timeout=timeout)
240
261
  else:
241
262
  # Otherwise, use keyword arguments
242
263
  if provider_uid is None or model is None or messages is None:
@@ -257,7 +278,7 @@ class BaseLivellmClient(ABC):
257
278
  include_history=include_history,
258
279
  output_schema=resolved_schema
259
280
  )
260
- stream = self.handle_agent_run_stream(agent_request)
281
+ stream = self.handle_agent_run_stream(agent_request, timeout=timeout)
261
282
 
262
283
  async for chunk in stream:
263
284
  yield chunk
@@ -266,6 +287,8 @@ class BaseLivellmClient(ABC):
266
287
  async def speak(
267
288
  self,
268
289
  request: Union[SpeakRequest, AudioFallbackRequest],
290
+ *,
291
+ timeout: Optional[float] = None,
269
292
  ) -> bytes:
270
293
  ...
271
294
 
@@ -280,13 +303,18 @@ class BaseLivellmClient(ABC):
280
303
  mime_type: str,
281
304
  sample_rate: int,
282
305
  chunk_size: int = 20,
306
+ timeout: Optional[float] = None,
283
307
  **kwargs
284
308
  ) -> bytes:
285
309
  ...
286
310
 
287
311
 
288
312
  @abstractmethod
289
- async def handle_speak(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> bytes:
313
+ async def handle_speak(
314
+ self,
315
+ request: Union[SpeakRequest, AudioFallbackRequest],
316
+ timeout: Optional[float] = None
317
+ ) -> bytes:
290
318
  ...
291
319
 
292
320
  async def speak(
@@ -300,6 +328,7 @@ class BaseLivellmClient(ABC):
300
328
  mime_type: Optional[str] = None,
301
329
  sample_rate: Optional[int] = None,
302
330
  chunk_size: int = 20,
331
+ timeout: Optional[float] = None,
303
332
  **kwargs
304
333
  ) -> bytes:
305
334
  """
@@ -330,6 +359,7 @@ class BaseLivellmClient(ABC):
330
359
  mime_type: The MIME type of the output audio
331
360
  sample_rate: The sample rate of the output audio
332
361
  chunk_size: Chunk size in milliseconds (default: 20ms)
362
+ timeout: Optional timeout in seconds (overrides default client timeout)
333
363
  gen_config: Optional generation configuration
334
364
 
335
365
  Returns:
@@ -341,7 +371,7 @@ class BaseLivellmClient(ABC):
341
371
  raise TypeError(
342
372
  f"First positional argument must be SpeakRequest or AudioFallbackRequest, got {type(request)}"
343
373
  )
344
- return await self.handle_speak(request)
374
+ return await self.handle_speak(request, timeout=timeout)
345
375
 
346
376
  # Otherwise, use keyword arguments
347
377
  if provider_uid is None or model is None or text is None or voice is None or mime_type is None or sample_rate is None:
@@ -360,12 +390,14 @@ class BaseLivellmClient(ABC):
360
390
  chunk_size=chunk_size,
361
391
  gen_config=kwargs or None
362
392
  )
363
- return await self.handle_speak(speak_request)
393
+ return await self.handle_speak(speak_request, timeout=timeout)
364
394
 
365
395
  @overload
366
396
  def speak_stream(
367
397
  self,
368
398
  request: Union[SpeakRequest, AudioFallbackRequest],
399
+ *,
400
+ timeout: Optional[float] = None,
369
401
  ) -> AsyncIterator[bytes]:
370
402
  ...
371
403
 
@@ -380,13 +412,18 @@ class BaseLivellmClient(ABC):
380
412
  mime_type: str,
381
413
  sample_rate: int,
382
414
  chunk_size: int = 20,
415
+ timeout: Optional[float] = None,
383
416
  **kwargs
384
417
  ) -> AsyncIterator[bytes]:
385
418
  ...
386
419
 
387
420
 
388
421
  @abstractmethod
389
- async def handle_speak_stream(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> AsyncIterator[bytes]:
422
+ async def handle_speak_stream(
423
+ self,
424
+ request: Union[SpeakRequest, AudioFallbackRequest],
425
+ timeout: Optional[float] = None
426
+ ) -> AsyncIterator[bytes]:
390
427
  ...
391
428
 
392
429
  async def speak_stream(
@@ -400,6 +437,7 @@ class BaseLivellmClient(ABC):
400
437
  mime_type: Optional[str] = None,
401
438
  sample_rate: Optional[int] = None,
402
439
  chunk_size: int = 20,
440
+ timeout: Optional[float] = None,
403
441
  **kwargs
404
442
  ) -> AsyncIterator[bytes]:
405
443
  """
@@ -433,6 +471,7 @@ class BaseLivellmClient(ABC):
433
471
  mime_type: The MIME type of the output audio
434
472
  sample_rate: The sample rate of the output audio
435
473
  chunk_size: Chunk size in milliseconds (default: 20ms)
474
+ timeout: Optional timeout in seconds (overrides default client timeout)
436
475
  gen_config: Optional generation configuration
437
476
 
438
477
  Returns:
@@ -444,7 +483,7 @@ class BaseLivellmClient(ABC):
444
483
  raise TypeError(
445
484
  f"First positional argument must be SpeakRequest or AudioFallbackRequest, got {type(request)}"
446
485
  )
447
- speak_stream = self.handle_speak_stream(request)
486
+ speak_stream = self.handle_speak_stream(request, timeout=timeout)
448
487
  else:
449
488
  # Otherwise, use keyword arguments
450
489
  if provider_uid is None or model is None or text is None or voice is None or mime_type is None or sample_rate is None:
@@ -463,7 +502,7 @@ class BaseLivellmClient(ABC):
463
502
  chunk_size=chunk_size,
464
503
  gen_config=kwargs or None
465
504
  )
466
- speak_stream = self.handle_speak_stream(speak_request)
505
+ speak_stream = self.handle_speak_stream(speak_request, timeout=timeout)
467
506
  async for chunk in speak_stream:
468
507
  yield chunk
469
508
 
@@ -471,6 +510,8 @@ class BaseLivellmClient(ABC):
471
510
  async def transcribe(
472
511
  self,
473
512
  request: Union[TranscribeRequest, TranscribeFallbackRequest],
513
+ *,
514
+ timeout: Optional[float] = None,
474
515
  ) -> TranscribeResponse:
475
516
  ...
476
517
 
@@ -482,13 +523,18 @@ class BaseLivellmClient(ABC):
482
523
  file: File,
483
524
  model: str,
484
525
  language: Optional[str] = None,
526
+ timeout: Optional[float] = None,
485
527
  **kwargs
486
528
  ) -> TranscribeResponse:
487
529
  ...
488
530
 
489
531
 
490
532
  @abstractmethod
491
- async def handle_transcribe(self, request: Union[TranscribeRequest, TranscribeFallbackRequest]) -> TranscribeResponse:
533
+ async def handle_transcribe(
534
+ self,
535
+ request: Union[TranscribeRequest, TranscribeFallbackRequest],
536
+ timeout: Optional[float] = None
537
+ ) -> TranscribeResponse:
492
538
  ...
493
539
 
494
540
  async def transcribe(
@@ -499,6 +545,7 @@ class BaseLivellmClient(ABC):
499
545
  file: Optional[File] = None,
500
546
  model: Optional[str] = None,
501
547
  language: Optional[str] = None,
548
+ timeout: Optional[float] = None,
502
549
  **kwargs
503
550
  ) -> TranscribeResponse:
504
551
  """
@@ -522,6 +569,7 @@ class BaseLivellmClient(ABC):
522
569
  file: The audio file as a tuple (filename, content, content_type)
523
570
  model: The model to use for transcription
524
571
  language: Optional language code
572
+ timeout: Optional timeout in seconds (overrides default client timeout)
525
573
  gen_config: Optional generation configuration
526
574
 
527
575
  Returns:
@@ -534,7 +582,7 @@ class BaseLivellmClient(ABC):
534
582
  f"First positional argument must be TranscribeRequest or TranscribeFallbackRequest, got {type(request)}"
535
583
  )
536
584
  # JSON-based request
537
- return await self.handle_transcribe(request)
585
+ return await self.handle_transcribe(request, timeout=timeout)
538
586
 
539
587
  # Otherwise, use keyword arguments with multipart form-data request
540
588
  if provider_uid is None or file is None or model is None:
@@ -550,7 +598,7 @@ class BaseLivellmClient(ABC):
550
598
  language=language,
551
599
  gen_config=kwargs or None
552
600
  )
553
- return await self.handle_transcribe(transcribe_request)
601
+ return await self.handle_transcribe(transcribe_request, timeout=timeout)
554
602
 
555
603
 
556
604
  class LivellmWsClient(BaseLivellmClient):
@@ -628,7 +676,11 @@ class LivellmWsClient(BaseLivellmClient):
628
676
  self.__listen_for_responses_task = None
629
677
  self.sessions.clear()
630
678
 
631
- async def get_response(self, action: WsAction, payload: dict) -> dict:
679
+ def _get_effective_timeout(self, timeout: Optional[float]) -> Optional[float]:
680
+ """Get effective timeout: per-request timeout overrides default."""
681
+ return timeout if timeout is not None else self.timeout
682
+
683
+ async def get_response(self, action: WsAction, payload: dict, timeout: Optional[float] = None) -> dict:
632
684
  """Send a request and wait for response."""
633
685
  if self.websocket is None:
634
686
  await self.connect()
@@ -638,7 +690,17 @@ class LivellmWsClient(BaseLivellmClient):
638
690
  q = await self.get_or_update_session(session_id)
639
691
  await self.websocket.send(json.dumps(request.model_dump()))
640
692
 
641
- response: WsResponse = await q.get()
693
+ effective_timeout = self._get_effective_timeout(timeout)
694
+
695
+ try:
696
+ if effective_timeout:
697
+ response: WsResponse = await asyncio.wait_for(q.get(), timeout=effective_timeout)
698
+ else:
699
+ response: WsResponse = await q.get()
700
+ except asyncio.TimeoutError:
701
+ self.sessions.pop(session_id, None)
702
+ raise TimeoutError(f"Request timed out after {effective_timeout} seconds")
703
+
642
704
  self.sessions.pop(session_id)
643
705
  if response.status == WsStatus.ERROR:
644
706
  raise Exception(f"WebSocket failed: {response.error}")
@@ -647,7 +709,7 @@ class LivellmWsClient(BaseLivellmClient):
647
709
  else:
648
710
  raise Exception(f"WebSocket failed with unknown status: {response}")
649
711
 
650
- async def get_response_stream(self, action: WsAction, payload: dict) -> AsyncIterator[dict]:
712
+ async def get_response_stream(self, action: WsAction, payload: dict, timeout: Optional[float] = None) -> AsyncIterator[dict]:
651
713
  """Send a request and stream responses."""
652
714
  if self.websocket is None:
653
715
  await self.connect()
@@ -657,8 +719,17 @@ class LivellmWsClient(BaseLivellmClient):
657
719
  q = await self.get_or_update_session(session_id)
658
720
  await self.websocket.send(json.dumps(request.model_dump()))
659
721
 
722
+ effective_timeout = self._get_effective_timeout(timeout)
723
+
660
724
  while True:
661
- response: WsResponse = await q.get()
725
+ try:
726
+ if effective_timeout:
727
+ response: WsResponse = await asyncio.wait_for(q.get(), timeout=effective_timeout)
728
+ else:
729
+ response: WsResponse = await q.get()
730
+ except asyncio.TimeoutError:
731
+ self.sessions.pop(session_id, None)
732
+ raise TimeoutError(f"Request timed out after {effective_timeout} seconds")
662
733
 
663
734
  if response.status == WsStatus.STREAMING:
664
735
  yield response.data
@@ -674,37 +745,60 @@ class LivellmWsClient(BaseLivellmClient):
674
745
 
675
746
  # Implement abstract methods from BaseLivellmClient
676
747
 
677
- async def handle_agent_run(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AgentResponse:
748
+ async def handle_agent_run(
749
+ self,
750
+ request: Union[AgentRequest, AgentFallbackRequest],
751
+ timeout: Optional[float] = None
752
+ ) -> AgentResponse:
678
753
  """Handle agent run via WebSocket."""
679
754
  response = await self.get_response(
680
755
  WsAction.AGENT_RUN,
681
- request.model_dump()
756
+ request.model_dump(),
757
+ timeout=timeout
682
758
  )
683
759
  return AgentResponse(**response)
684
760
 
685
- async def handle_agent_run_stream(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AsyncIterator[AgentResponse]:
761
+ async def handle_agent_run_stream(
762
+ self,
763
+ request: Union[AgentRequest, AgentFallbackRequest],
764
+ timeout: Optional[float] = None
765
+ ) -> AsyncIterator[AgentResponse]:
686
766
  """Handle streaming agent run via WebSocket."""
687
- async for response in self.get_response_stream(WsAction.AGENT_RUN_STREAM, request.model_dump()):
767
+ async for response in self.get_response_stream(WsAction.AGENT_RUN_STREAM, request.model_dump(), timeout=timeout):
688
768
  yield AgentResponse(**response)
689
769
 
690
- async def handle_speak(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> bytes:
770
+ async def handle_speak(
771
+ self,
772
+ request: Union[SpeakRequest, AudioFallbackRequest],
773
+ timeout: Optional[float] = None
774
+ ) -> bytes:
691
775
  """Handle speak request via WebSocket."""
692
776
  response = await self.get_response(
693
777
  WsAction.AUDIO_SPEAK,
694
- request.model_dump()
778
+ request.model_dump(),
779
+ timeout=timeout
695
780
  )
696
781
  return EncodedSpeakResponse(**response).audio
697
782
 
698
- async def handle_speak_stream(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> AsyncIterator[bytes]:
783
+ async def handle_speak_stream(
784
+ self,
785
+ request: Union[SpeakRequest, AudioFallbackRequest],
786
+ timeout: Optional[float] = None
787
+ ) -> AsyncIterator[bytes]:
699
788
  """Handle streaming speak request via WebSocket."""
700
- async for response in self.get_response_stream(WsAction.AUDIO_SPEAK_STREAM, request.model_dump()):
789
+ async for response in self.get_response_stream(WsAction.AUDIO_SPEAK_STREAM, request.model_dump(), timeout=timeout):
701
790
  yield EncodedSpeakResponse(**response).audio
702
791
 
703
- async def handle_transcribe(self, request: Union[TranscribeRequest, TranscribeFallbackRequest]) -> TranscribeResponse:
792
+ async def handle_transcribe(
793
+ self,
794
+ request: Union[TranscribeRequest, TranscribeFallbackRequest],
795
+ timeout: Optional[float] = None
796
+ ) -> TranscribeResponse:
704
797
  """Handle transcribe request via WebSocket."""
705
798
  response = await self.get_response(
706
799
  WsAction.AUDIO_TRANSCRIBE,
707
- request.model_dump()
800
+ request.model_dump(),
801
+ timeout=timeout
708
802
  )
709
803
  return TranscribeResponse(**response)
710
804
 
@@ -747,8 +841,8 @@ class LivellmClient(BaseLivellmClient):
747
841
  self.base_url = f"{self._root_base_url}/livellm"
748
842
  self.timeout = timeout
749
843
  self.user_agent = user_agent or DEFAULT_USER_AGENT
750
- self.client = httpx.AsyncClient(base_url=self.base_url, timeout=self.timeout) \
751
- if self.timeout else httpx.AsyncClient(base_url=self.base_url)
844
+ # Create client without timeout - we'll pass timeout per-request
845
+ self.client = httpx.AsyncClient(base_url=self.base_url)
752
846
  self.settings = []
753
847
  self.headers = {
754
848
  "Content-Type": "application/json",
@@ -759,6 +853,10 @@ class LivellmClient(BaseLivellmClient):
759
853
  if configs:
760
854
  self.update_configs_post_init(configs)
761
855
 
856
+ def _get_effective_timeout(self, timeout: Optional[float]) -> Optional[float]:
857
+ """Get effective timeout: per-request timeout overrides default."""
858
+ return timeout if timeout is not None else self.timeout
859
+
762
860
  @property
763
861
  def realtime(self) -> LivellmWsClient:
764
862
  """
@@ -789,15 +887,17 @@ class LivellmClient(BaseLivellmClient):
789
887
  return SuccessResponse(success=True, message="Configs updated successfully")
790
888
 
791
889
 
792
- async def delete(self, endpoint: str) -> dict:
890
+ async def delete(self, endpoint: str, timeout: Optional[float] = None) -> dict:
793
891
  """
794
892
  Delete a resource from the given endpoint and return the response.
795
893
  Args:
796
894
  endpoint: The endpoint to delete from.
895
+ timeout: Optional timeout override.
797
896
  Returns:
798
897
  The response from the endpoint.
799
898
  """
800
- response = await self.client.delete(endpoint, headers=self.headers)
899
+ effective_timeout = self._get_effective_timeout(timeout)
900
+ response = await self.client.delete(endpoint, headers=self.headers, timeout=effective_timeout)
801
901
  response.raise_for_status()
802
902
  return response.json()
803
903
 
@@ -805,7 +905,8 @@ class LivellmClient(BaseLivellmClient):
805
905
  self,
806
906
  files: dict,
807
907
  data: dict,
808
- endpoint: str
908
+ endpoint: str,
909
+ timeout: Optional[float] = None
809
910
  ) -> dict:
810
911
  """
811
912
  Post a multipart request to the given endpoint and return the response.
@@ -813,27 +914,32 @@ class LivellmClient(BaseLivellmClient):
813
914
  files: The files to send in the request.
814
915
  data: The data to send in the request.
815
916
  endpoint: The endpoint to post to.
917
+ timeout: Optional timeout override.
816
918
  Returns:
817
919
  The response from the endpoint.
818
920
  """
921
+ effective_timeout = self._get_effective_timeout(timeout)
819
922
  # Don't pass Content-Type header for multipart - httpx will set it automatically
820
- response = await self.client.post(endpoint, files=files, data=data)
923
+ response = await self.client.post(endpoint, files=files, data=data, timeout=effective_timeout)
821
924
  response.raise_for_status()
822
925
  return response.json()
823
926
 
824
927
 
825
928
  async def get(
826
929
  self,
827
- endpoint: str
930
+ endpoint: str,
931
+ timeout: Optional[float] = None
828
932
  ) -> dict:
829
933
  """
830
934
  Get a request from the given endpoint and return the response.
831
935
  Args:
832
936
  endpoint: The endpoint to get from.
937
+ timeout: Optional timeout override.
833
938
  Returns:
834
939
  The response from the endpoint.
835
940
  """
836
- response = await self.client.get(endpoint, headers=self.headers)
941
+ effective_timeout = self._get_effective_timeout(timeout)
942
+ response = await self.client.get(endpoint, headers=self.headers, timeout=effective_timeout)
837
943
  response.raise_for_status()
838
944
  return response.json()
839
945
 
@@ -842,7 +948,8 @@ class LivellmClient(BaseLivellmClient):
842
948
  json_data: dict,
843
949
  endpoint: str,
844
950
  expect_stream: bool = False,
845
- expect_json: bool = True
951
+ expect_json: bool = True,
952
+ timeout: Optional[float] = None
846
953
  ) -> Union[dict, bytes, AsyncIterator[Union[dict, bytes]]]:
847
954
  """
848
955
  Post a request to the given endpoint and return the response.
@@ -854,12 +961,14 @@ class LivellmClient(BaseLivellmClient):
854
961
  endpoint: The endpoint to post to.
855
962
  expect_stream: Whether to expect a stream response.
856
963
  expect_json: Whether to expect a JSON response.
964
+ timeout: Optional timeout override.
857
965
  Returns:
858
966
  The response from the endpoint.
859
967
  Raises:
860
968
  Exception: If the response is not 200 or 201.
861
969
  """
862
- response = await self.client.post(endpoint, json=json_data, headers=self.headers)
970
+ effective_timeout = self._get_effective_timeout(timeout)
971
+ response = await self.client.post(endpoint, json=json_data, headers=self.headers, timeout=effective_timeout)
863
972
  if response.status_code not in [200, 201]:
864
973
  error_response = await response.aread()
865
974
  error_response = error_response.decode("utf-8")
@@ -882,26 +991,26 @@ class LivellmClient(BaseLivellmClient):
882
991
  else:
883
992
  return response.content
884
993
 
885
- async def ping(self) -> SuccessResponse:
886
- result = await self.get("ping")
994
+ async def ping(self, timeout: Optional[float] = None) -> SuccessResponse:
995
+ result = await self.get("ping", timeout=timeout)
887
996
  return SuccessResponse(**result)
888
997
 
889
- async def update_config(self, config: Settings) -> SuccessResponse:
890
- result = await self.post(config.model_dump(), "providers/config", expect_json=True)
998
+ async def update_config(self, config: Settings, timeout: Optional[float] = None) -> SuccessResponse:
999
+ result = await self.post(config.model_dump(), "providers/config", expect_json=True, timeout=timeout)
891
1000
  self.settings.append(config)
892
1001
  return SuccessResponse(**result)
893
1002
 
894
- async def update_configs(self, configs: List[Settings]) -> SuccessResponse:
1003
+ async def update_configs(self, configs: List[Settings], timeout: Optional[float] = None) -> SuccessResponse:
895
1004
  for config in configs:
896
- await self.update_config(config)
1005
+ await self.update_config(config, timeout=timeout)
897
1006
  return SuccessResponse(success=True, message="Configs updated successfully")
898
1007
 
899
- async def get_configs(self) -> List[Settings]:
900
- result = await self.get("providers/configs")
1008
+ async def get_configs(self, timeout: Optional[float] = None) -> List[Settings]:
1009
+ result = await self.get("providers/configs", timeout=timeout)
901
1010
  return [Settings(**config) for config in result]
902
1011
 
903
- async def delete_config(self, config_uid: str) -> SuccessResponse:
904
- result = await self.delete(f"providers/config/{config_uid}")
1012
+ async def delete_config(self, config_uid: str, timeout: Optional[float] = None) -> SuccessResponse:
1013
+ result = await self.delete(f"providers/config/{config_uid}", timeout=timeout)
905
1014
  return SuccessResponse(**result)
906
1015
 
907
1016
  async def cleanup(self):
@@ -916,59 +1025,51 @@ class LivellmClient(BaseLivellmClient):
916
1025
  # Also close any realtime WebSocket client if it was created
917
1026
  if self._realtime is not None:
918
1027
  await self._realtime.disconnect()
919
-
920
- # def __del__(self):
921
- # """
922
- # Destructor to clean up resources when the client is garbage collected.
923
- # This will close the HTTP client and attempt to delete configs if cleanup wasn't called.
924
- # Note: It's recommended to use the async context manager or call cleanup() explicitly.
925
- # """
926
- # # Warn user if cleanup wasn't called
927
- # if self.settings:
928
- # warnings.warn(
929
- # "LivellmClient is being garbage collected without explicit cleanup. "
930
- # "Provider configs may not be deleted from the server. "
931
- # "Consider using 'async with' or calling 'await client.cleanup()' explicitly.",
932
- # ResourceWarning,
933
- # stacklevel=2
934
- # )
935
-
936
- # # Close the httpx client synchronously
937
- # # httpx.AsyncClient stores a sync Transport that needs cleanup
938
- # try:
939
- # with httpx.Client(base_url=self.base_url) as client:
940
- # for config in self.settings:
941
- # config: Settings = config
942
- # client.delete(f"providers/config/{config.uid}", headers=self.headers)
943
- # except Exception:
944
- # # Silently fail - we're in a destructor
945
- # pass
946
1028
 
947
1029
  # Implement abstract methods from BaseLivellmClient
948
1030
 
949
- async def handle_agent_run(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AgentResponse:
1031
+ async def handle_agent_run(
1032
+ self,
1033
+ request: Union[AgentRequest, AgentFallbackRequest],
1034
+ timeout: Optional[float] = None
1035
+ ) -> AgentResponse:
950
1036
  """Handle agent run via HTTP."""
951
- result = await self.post(request.model_dump(), "agent/run", expect_json=True)
1037
+ result = await self.post(request.model_dump(), "agent/run", expect_json=True, timeout=timeout)
952
1038
  return AgentResponse(**result)
953
1039
 
954
- async def handle_agent_run_stream(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AsyncIterator[AgentResponse]:
1040
+ async def handle_agent_run_stream(
1041
+ self,
1042
+ request: Union[AgentRequest, AgentFallbackRequest],
1043
+ timeout: Optional[float] = None
1044
+ ) -> AsyncIterator[AgentResponse]:
955
1045
  """Handle streaming agent run via HTTP."""
956
- stream = await self.post(request.model_dump(), "agent/run_stream", expect_stream=True, expect_json=True)
1046
+ stream = await self.post(request.model_dump(), "agent/run_stream", expect_stream=True, expect_json=True, timeout=timeout)
957
1047
  async for chunk in stream:
958
1048
  yield AgentResponse(**chunk)
959
1049
 
960
- async def handle_speak(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> bytes:
1050
+ async def handle_speak(
1051
+ self,
1052
+ request: Union[SpeakRequest, AudioFallbackRequest],
1053
+ timeout: Optional[float] = None
1054
+ ) -> bytes:
961
1055
  """Handle speak request via HTTP."""
962
- return await self.post(request.model_dump(), "audio/speak", expect_json=False)
1056
+ return await self.post(request.model_dump(), "audio/speak", expect_json=False, timeout=timeout)
963
1057
 
964
- async def handle_speak_stream(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> AsyncIterator[bytes]:
1058
+ async def handle_speak_stream(
1059
+ self,
1060
+ request: Union[SpeakRequest, AudioFallbackRequest],
1061
+ timeout: Optional[float] = None
1062
+ ) -> AsyncIterator[bytes]:
965
1063
  """Handle streaming speak request via HTTP."""
966
- speak_stream = await self.post(request.model_dump(), "audio/speak_stream", expect_stream=True, expect_json=False)
1064
+ speak_stream = await self.post(request.model_dump(), "audio/speak_stream", expect_stream=True, expect_json=False, timeout=timeout)
967
1065
  async for chunk in speak_stream:
968
1066
  yield chunk
969
1067
 
970
- async def handle_transcribe(self, request: Union[TranscribeRequest, TranscribeFallbackRequest]) -> TranscribeResponse:
1068
+ async def handle_transcribe(
1069
+ self,
1070
+ request: Union[TranscribeRequest, TranscribeFallbackRequest],
1071
+ timeout: Optional[float] = None
1072
+ ) -> TranscribeResponse:
971
1073
  """Handle transcribe request via HTTP."""
972
- result = await self.post(request.model_dump(), "audio/transcribe_json", expect_json=True)
1074
+ result = await self.post(request.model_dump(), "audio/transcribe_json", expect_json=True, timeout=timeout)
973
1075
  return TranscribeResponse(**result)
974
-
@@ -1,6 +1,6 @@
1
1
  from .common import BaseRequest, ProviderKind, Settings, SuccessResponse
2
2
  from .fallback import AgentFallbackRequest, AudioFallbackRequest, TranscribeFallbackRequest, FallbackStrategy
3
- from .agent.agent import AgentRequest, AgentResponse, AgentResponseUsage
3
+ from .agent.agent import AgentRequest, AgentResponse, AgentResponseUsage, ContextOverflowStrategy
4
4
  from .agent.chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
5
5
  from .agent.tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
6
6
  from .agent.output_schema import OutputSchema, PropertyDef
@@ -24,6 +24,7 @@ __all__ = [
24
24
  "AgentRequest",
25
25
  "AgentResponse",
26
26
  "AgentResponseUsage",
27
+ "ContextOverflowStrategy",
27
28
  "Message",
28
29
  "MessageRole",
29
30
  "TextMessage",
@@ -1,4 +1,4 @@
1
- from .agent import AgentRequest, AgentResponse, AgentResponseUsage
1
+ from .agent import AgentRequest, AgentResponse, AgentResponseUsage, ContextOverflowStrategy
2
2
  from .chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
3
3
  from .tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
4
4
  from .output_schema import OutputSchema, PropertyDef
@@ -8,6 +8,7 @@ __all__ = [
8
8
  "AgentRequest",
9
9
  "AgentResponse",
10
10
  "AgentResponseUsage",
11
+ "ContextOverflowStrategy",
11
12
  "Message",
12
13
  "MessageRole",
13
14
  "TextMessage",
@@ -2,12 +2,19 @@
2
2
 
3
3
  from pydantic import BaseModel, Field
4
4
  from typing import Optional, List, Union, Any, Dict
5
+ from enum import Enum
5
6
  from .chat import TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
6
7
  from .tools import WebSearchInput, MCPStreamableServerInput
7
8
  from .output_schema import OutputSchema, PropertyDef
8
9
  from ..common import BaseRequest
9
10
 
10
11
 
12
+ class ContextOverflowStrategy(str, Enum):
13
+ """Strategy for handling context overflow when text exceeds context_limit."""
14
+ TRUNCATE = "truncate" # Take beginning, middle, and end portions
15
+ RECYCLE = "recycle" # Iteratively process chunks, merging results
16
+
17
+
11
18
  class AgentRequest(BaseRequest):
12
19
  model: str = Field(..., description="The model to use")
13
20
  messages: List[Union[TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage]] = Field(..., description="The messages to use")
@@ -15,6 +22,8 @@ class AgentRequest(BaseRequest):
15
22
  gen_config: Optional[dict] = Field(default=None, description="The configuration for the generation")
16
23
  include_history: bool = Field(default=False, description="Whether to include full conversation history in the response")
17
24
  output_schema: Optional[Union[OutputSchema, Dict[str, Any]]] = Field(default=None, description="JSON schema for structured output. Can be an OutputSchema, a dict representing a JSON schema, or will be converted from a Pydantic BaseModel.")
25
+ context_limit: int = Field(default=0, description="Maximum context size in tokens. If <= 0, context overflow handling is disabled.")
26
+ context_overflow_strategy: ContextOverflowStrategy = Field(default=ContextOverflowStrategy.TRUNCATE, description="Strategy for handling context overflow: 'truncate' or 'recycle'")
18
27
 
19
28
  class AgentResponseUsage(BaseModel):
20
29
  input_tokens: int = Field(..., description="The number of input tokens used")
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "livellm"
3
- version = "1.6.1"
3
+ version = "1.7.1"
4
4
  description = "Python client for the LiveLLM Server"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes