livellm 1.6.1__tar.gz → 1.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {livellm-1.6.1 → livellm-1.7.1}/PKG-INFO +131 -13
- {livellm-1.6.1 → livellm-1.7.1}/README.md +130 -12
- {livellm-1.6.1 → livellm-1.7.1}/livellm/livellm.py +188 -87
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/__init__.py +2 -1
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/agent/__init__.py +2 -1
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/agent/agent.py +9 -0
- {livellm-1.6.1 → livellm-1.7.1}/pyproject.toml +1 -1
- {livellm-1.6.1 → livellm-1.7.1}/.gitignore +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/LICENSE +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/__init__.py +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/agent/chat.py +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/agent/output_schema.py +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/agent/tools.py +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/audio/__init__.py +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/audio/speak.py +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/audio/transcribe.py +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/common.py +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/fallback.py +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/transcription.py +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/models/ws.py +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/py.typed +0 -0
- {livellm-1.6.1 → livellm-1.7.1}/livellm/transcripton.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: livellm
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.1
|
|
4
4
|
Summary: Python client for the LiveLLM Server
|
|
5
5
|
Project-URL: Homepage, https://github.com/qalby-tech/livellm-client-py
|
|
6
6
|
Project-URL: Repository, https://github.com/qalby-tech/livellm-client-py
|
|
@@ -36,6 +36,8 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
|
|
|
36
36
|
- 🔄 **Streaming** - Real-time streaming for agent and audio
|
|
37
37
|
- 🛠️ **Flexible API** - Use request objects or keyword arguments
|
|
38
38
|
- 📋 **Structured Output** - Get validated JSON responses with schema support (Pydantic, OutputSchema, or dict)
|
|
39
|
+
- 📏 **Context Overflow Management** - Automatic handling of large texts with truncate/recycle strategies
|
|
40
|
+
- ⏱️ **Per-Request Timeout** - Override default timeout for individual requests
|
|
39
41
|
- 🎙️ **Audio services** - Text-to-speech and transcription
|
|
40
42
|
- 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
|
|
41
43
|
- ⚡ **Fallback strategies** - Sequential and parallel handling
|
|
@@ -95,10 +97,10 @@ from livellm.models import Settings, ProviderKind
|
|
|
95
97
|
# Basic
|
|
96
98
|
client = LivellmClient(base_url="http://localhost:8000")
|
|
97
99
|
|
|
98
|
-
# With timeout and pre-configured providers
|
|
100
|
+
# With default timeout and pre-configured providers
|
|
99
101
|
client = LivellmClient(
|
|
100
102
|
base_url="http://localhost:8000",
|
|
101
|
-
timeout=30.0,
|
|
103
|
+
timeout=30.0, # Default timeout for all requests
|
|
102
104
|
configs=[
|
|
103
105
|
Settings(
|
|
104
106
|
uid="openai",
|
|
@@ -116,6 +118,50 @@ client = LivellmClient(
|
|
|
116
118
|
)
|
|
117
119
|
```
|
|
118
120
|
|
|
121
|
+
### Per-Request Timeout Override
|
|
122
|
+
|
|
123
|
+
The timeout provided in `__init__` is the default, but you can override it for individual requests:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
# Client with 30s default timeout
|
|
127
|
+
client = LivellmClient(base_url="http://localhost:8000", timeout=30.0)
|
|
128
|
+
|
|
129
|
+
# Uses default 30s timeout
|
|
130
|
+
response = await client.agent_run(
|
|
131
|
+
provider_uid="openai",
|
|
132
|
+
model="gpt-4",
|
|
133
|
+
messages=[TextMessage(role="user", content="Hello")]
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Override with 120s timeout for this specific request
|
|
137
|
+
response = await client.agent_run(
|
|
138
|
+
provider_uid="openai",
|
|
139
|
+
model="gpt-4",
|
|
140
|
+
messages=[TextMessage(role="user", content="Write a long essay...")],
|
|
141
|
+
timeout=120.0 # Override for this request only
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Works with streaming too
|
|
145
|
+
async for chunk in client.agent_run_stream(
|
|
146
|
+
provider_uid="openai",
|
|
147
|
+
model="gpt-4",
|
|
148
|
+
messages=[TextMessage(role="user", content="Tell me a story")],
|
|
149
|
+
timeout=300.0 # 5 minutes for streaming
|
|
150
|
+
):
|
|
151
|
+
print(chunk.output, end="")
|
|
152
|
+
|
|
153
|
+
# Works with all methods: speak(), speak_stream(), transcribe(), etc.
|
|
154
|
+
audio = await client.speak(
|
|
155
|
+
provider_uid="openai",
|
|
156
|
+
model="tts-1",
|
|
157
|
+
text="Hello world",
|
|
158
|
+
voice="alloy",
|
|
159
|
+
mime_type=SpeakMimeType.MP3,
|
|
160
|
+
sample_rate=24000,
|
|
161
|
+
timeout=60.0
|
|
162
|
+
)
|
|
163
|
+
```
|
|
164
|
+
|
|
119
165
|
### Supported Providers
|
|
120
166
|
|
|
121
167
|
`OPENAI` • `GOOGLE` • `ANTHROPIC` • `GROQ` • `ELEVENLABS`
|
|
@@ -439,6 +485,73 @@ data = json.loads(full_output)
|
|
|
439
485
|
- Type-safe responses
|
|
440
486
|
- Integration with type-checked code
|
|
441
487
|
|
|
488
|
+
#### Context Overflow Management
|
|
489
|
+
|
|
490
|
+
Handle large texts that exceed model context windows with automatic truncation or iterative processing:
|
|
491
|
+
|
|
492
|
+
```python
|
|
493
|
+
from livellm.models import TextMessage, ContextOverflowStrategy, OutputSchema, PropertyDef
|
|
494
|
+
|
|
495
|
+
# TRUNCATE strategy (default): Preserves beginning, middle, and end
|
|
496
|
+
# Works with both streaming and non-streaming
|
|
497
|
+
response = await client.agent_run(
|
|
498
|
+
provider_uid="openai",
|
|
499
|
+
model="gpt-4",
|
|
500
|
+
messages=[
|
|
501
|
+
TextMessage(role="system", content="Summarize the document."),
|
|
502
|
+
TextMessage(role="user", content=very_long_document)
|
|
503
|
+
],
|
|
504
|
+
context_limit=4000, # Max tokens
|
|
505
|
+
context_overflow_strategy=ContextOverflowStrategy.TRUNCATE
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
# RECYCLE strategy: Iteratively processes chunks and merges results
|
|
509
|
+
# Useful for extraction tasks - processes entire document
|
|
510
|
+
# Requires output_schema for JSON merging
|
|
511
|
+
output_schema = OutputSchema(
|
|
512
|
+
title="ExtractedInfo",
|
|
513
|
+
properties={
|
|
514
|
+
"topics": PropertyDef(type="array", items={"type": "string"}),
|
|
515
|
+
"key_figures": PropertyDef(type="array", items={"type": "string"})
|
|
516
|
+
},
|
|
517
|
+
required=["topics", "key_figures"]
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
response = await client.agent_run(
|
|
521
|
+
provider_uid="openai",
|
|
522
|
+
model="gpt-4",
|
|
523
|
+
messages=[
|
|
524
|
+
TextMessage(role="system", content="Extract all topics and key figures."),
|
|
525
|
+
TextMessage(role="user", content=very_long_document)
|
|
526
|
+
],
|
|
527
|
+
context_limit=3000,
|
|
528
|
+
context_overflow_strategy=ContextOverflowStrategy.RECYCLE,
|
|
529
|
+
output_schema=output_schema
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
# Parse the merged results
|
|
533
|
+
import json
|
|
534
|
+
result = json.loads(response.output)
|
|
535
|
+
print(f"Topics: {result['topics']}")
|
|
536
|
+
print(f"Key figures: {result['key_figures']}")
|
|
537
|
+
```
|
|
538
|
+
|
|
539
|
+
**Strategy comparison:**
|
|
540
|
+
|
|
541
|
+
| Strategy | How it works | Best for | Streaming |
|
|
542
|
+
|----------|--------------|----------|-----------|
|
|
543
|
+
| `TRUNCATE` | Takes beginning, middle, end portions | Summarization, Q&A | ✅ Yes |
|
|
544
|
+
| `RECYCLE` | Processes chunks iteratively, merges JSON | Full document extraction | ❌ No |
|
|
545
|
+
|
|
546
|
+
**Parameters:**
|
|
547
|
+
- `context_limit` (int, default: 0) - Maximum tokens. If ≤ 0, overflow handling is disabled
|
|
548
|
+
- `context_overflow_strategy` (ContextOverflowStrategy, default: TRUNCATE) - Strategy to use
|
|
549
|
+
|
|
550
|
+
**Notes:**
|
|
551
|
+
- System prompts are always preserved (never truncated)
|
|
552
|
+
- Token counting includes a 20% safety buffer
|
|
553
|
+
- RECYCLE requires `output_schema` for JSON merging
|
|
554
|
+
|
|
442
555
|
### Audio Services
|
|
443
556
|
|
|
444
557
|
#### Text-to-Speech
|
|
@@ -711,20 +824,22 @@ response = await client.ping()
|
|
|
711
824
|
|
|
712
825
|
### Client Methods
|
|
713
826
|
|
|
827
|
+
All methods accept an optional `timeout` parameter to override the default client timeout.
|
|
828
|
+
|
|
714
829
|
**Configuration**
|
|
715
|
-
- `ping()` - Health check
|
|
716
|
-
- `update_config(config)` / `update_configs(configs)` - Add/update providers
|
|
717
|
-
- `get_configs()` - List all configurations
|
|
718
|
-
- `delete_config(uid)` - Remove provider
|
|
830
|
+
- `ping(timeout?)` - Health check
|
|
831
|
+
- `update_config(config, timeout?)` / `update_configs(configs, timeout?)` - Add/update providers
|
|
832
|
+
- `get_configs(timeout?)` - List all configurations
|
|
833
|
+
- `delete_config(uid, timeout?)` - Remove provider
|
|
719
834
|
|
|
720
835
|
**Agent**
|
|
721
|
-
- `agent_run(request | **kwargs)` - Run agent (blocking)
|
|
722
|
-
- `agent_run_stream(request | **kwargs)` - Run agent (streaming)
|
|
836
|
+
- `agent_run(request | **kwargs, timeout?)` - Run agent (blocking)
|
|
837
|
+
- `agent_run_stream(request | **kwargs, timeout?)` - Run agent (streaming)
|
|
723
838
|
|
|
724
839
|
**Audio**
|
|
725
|
-
- `speak(request | **kwargs)` - Text-to-speech (blocking)
|
|
726
|
-
- `speak_stream(request | **kwargs)` - Text-to-speech (streaming)
|
|
727
|
-
- `transcribe(request | **kwargs)` - Speech-to-text
|
|
840
|
+
- `speak(request | **kwargs, timeout?)` - Text-to-speech (blocking)
|
|
841
|
+
- `speak_stream(request | **kwargs, timeout?)` - Text-to-speech (streaming)
|
|
842
|
+
- `transcribe(request | **kwargs, timeout?)` - Speech-to-text
|
|
728
843
|
|
|
729
844
|
**Real-Time Transcription (TranscriptionWsClient)**
|
|
730
845
|
- `connect()` - Establish WebSocket connection
|
|
@@ -750,12 +865,15 @@ response = await client.ping()
|
|
|
750
865
|
- `MessageRole` - `USER` | `MODEL` | `SYSTEM` | `TOOL_CALL` | `TOOL_RETURN` (or use strings)
|
|
751
866
|
|
|
752
867
|
**Requests**
|
|
753
|
-
- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output.
|
|
868
|
+
- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?, context_limit?, context_overflow_strategy?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output. Set `context_limit` and `context_overflow_strategy` for handling large texts.
|
|
754
869
|
- `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
|
|
755
870
|
- `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
|
|
756
871
|
- `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
|
|
757
872
|
- `TranscriptionAudioChunkWsRequest(audio)` - Audio chunk for streaming
|
|
758
873
|
|
|
874
|
+
**Context Overflow**
|
|
875
|
+
- `ContextOverflowStrategy` - `TRUNCATE` | `RECYCLE`
|
|
876
|
+
|
|
759
877
|
**Tools**
|
|
760
878
|
- `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
|
|
761
879
|
- `MCPStreamableServerInput(kind=ToolKind.MCP_STREAMABLE_SERVER, url, prefix?, timeout?)`
|
|
@@ -13,6 +13,8 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
|
|
|
13
13
|
- 🔄 **Streaming** - Real-time streaming for agent and audio
|
|
14
14
|
- 🛠️ **Flexible API** - Use request objects or keyword arguments
|
|
15
15
|
- 📋 **Structured Output** - Get validated JSON responses with schema support (Pydantic, OutputSchema, or dict)
|
|
16
|
+
- 📏 **Context Overflow Management** - Automatic handling of large texts with truncate/recycle strategies
|
|
17
|
+
- ⏱️ **Per-Request Timeout** - Override default timeout for individual requests
|
|
16
18
|
- 🎙️ **Audio services** - Text-to-speech and transcription
|
|
17
19
|
- 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
|
|
18
20
|
- ⚡ **Fallback strategies** - Sequential and parallel handling
|
|
@@ -72,10 +74,10 @@ from livellm.models import Settings, ProviderKind
|
|
|
72
74
|
# Basic
|
|
73
75
|
client = LivellmClient(base_url="http://localhost:8000")
|
|
74
76
|
|
|
75
|
-
# With timeout and pre-configured providers
|
|
77
|
+
# With default timeout and pre-configured providers
|
|
76
78
|
client = LivellmClient(
|
|
77
79
|
base_url="http://localhost:8000",
|
|
78
|
-
timeout=30.0,
|
|
80
|
+
timeout=30.0, # Default timeout for all requests
|
|
79
81
|
configs=[
|
|
80
82
|
Settings(
|
|
81
83
|
uid="openai",
|
|
@@ -93,6 +95,50 @@ client = LivellmClient(
|
|
|
93
95
|
)
|
|
94
96
|
```
|
|
95
97
|
|
|
98
|
+
### Per-Request Timeout Override
|
|
99
|
+
|
|
100
|
+
The timeout provided in `__init__` is the default, but you can override it for individual requests:
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
# Client with 30s default timeout
|
|
104
|
+
client = LivellmClient(base_url="http://localhost:8000", timeout=30.0)
|
|
105
|
+
|
|
106
|
+
# Uses default 30s timeout
|
|
107
|
+
response = await client.agent_run(
|
|
108
|
+
provider_uid="openai",
|
|
109
|
+
model="gpt-4",
|
|
110
|
+
messages=[TextMessage(role="user", content="Hello")]
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Override with 120s timeout for this specific request
|
|
114
|
+
response = await client.agent_run(
|
|
115
|
+
provider_uid="openai",
|
|
116
|
+
model="gpt-4",
|
|
117
|
+
messages=[TextMessage(role="user", content="Write a long essay...")],
|
|
118
|
+
timeout=120.0 # Override for this request only
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Works with streaming too
|
|
122
|
+
async for chunk in client.agent_run_stream(
|
|
123
|
+
provider_uid="openai",
|
|
124
|
+
model="gpt-4",
|
|
125
|
+
messages=[TextMessage(role="user", content="Tell me a story")],
|
|
126
|
+
timeout=300.0 # 5 minutes for streaming
|
|
127
|
+
):
|
|
128
|
+
print(chunk.output, end="")
|
|
129
|
+
|
|
130
|
+
# Works with all methods: speak(), speak_stream(), transcribe(), etc.
|
|
131
|
+
audio = await client.speak(
|
|
132
|
+
provider_uid="openai",
|
|
133
|
+
model="tts-1",
|
|
134
|
+
text="Hello world",
|
|
135
|
+
voice="alloy",
|
|
136
|
+
mime_type=SpeakMimeType.MP3,
|
|
137
|
+
sample_rate=24000,
|
|
138
|
+
timeout=60.0
|
|
139
|
+
)
|
|
140
|
+
```
|
|
141
|
+
|
|
96
142
|
### Supported Providers
|
|
97
143
|
|
|
98
144
|
`OPENAI` • `GOOGLE` • `ANTHROPIC` • `GROQ` • `ELEVENLABS`
|
|
@@ -416,6 +462,73 @@ data = json.loads(full_output)
|
|
|
416
462
|
- Type-safe responses
|
|
417
463
|
- Integration with type-checked code
|
|
418
464
|
|
|
465
|
+
#### Context Overflow Management
|
|
466
|
+
|
|
467
|
+
Handle large texts that exceed model context windows with automatic truncation or iterative processing:
|
|
468
|
+
|
|
469
|
+
```python
|
|
470
|
+
from livellm.models import TextMessage, ContextOverflowStrategy, OutputSchema, PropertyDef
|
|
471
|
+
|
|
472
|
+
# TRUNCATE strategy (default): Preserves beginning, middle, and end
|
|
473
|
+
# Works with both streaming and non-streaming
|
|
474
|
+
response = await client.agent_run(
|
|
475
|
+
provider_uid="openai",
|
|
476
|
+
model="gpt-4",
|
|
477
|
+
messages=[
|
|
478
|
+
TextMessage(role="system", content="Summarize the document."),
|
|
479
|
+
TextMessage(role="user", content=very_long_document)
|
|
480
|
+
],
|
|
481
|
+
context_limit=4000, # Max tokens
|
|
482
|
+
context_overflow_strategy=ContextOverflowStrategy.TRUNCATE
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
# RECYCLE strategy: Iteratively processes chunks and merges results
|
|
486
|
+
# Useful for extraction tasks - processes entire document
|
|
487
|
+
# Requires output_schema for JSON merging
|
|
488
|
+
output_schema = OutputSchema(
|
|
489
|
+
title="ExtractedInfo",
|
|
490
|
+
properties={
|
|
491
|
+
"topics": PropertyDef(type="array", items={"type": "string"}),
|
|
492
|
+
"key_figures": PropertyDef(type="array", items={"type": "string"})
|
|
493
|
+
},
|
|
494
|
+
required=["topics", "key_figures"]
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
response = await client.agent_run(
|
|
498
|
+
provider_uid="openai",
|
|
499
|
+
model="gpt-4",
|
|
500
|
+
messages=[
|
|
501
|
+
TextMessage(role="system", content="Extract all topics and key figures."),
|
|
502
|
+
TextMessage(role="user", content=very_long_document)
|
|
503
|
+
],
|
|
504
|
+
context_limit=3000,
|
|
505
|
+
context_overflow_strategy=ContextOverflowStrategy.RECYCLE,
|
|
506
|
+
output_schema=output_schema
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
# Parse the merged results
|
|
510
|
+
import json
|
|
511
|
+
result = json.loads(response.output)
|
|
512
|
+
print(f"Topics: {result['topics']}")
|
|
513
|
+
print(f"Key figures: {result['key_figures']}")
|
|
514
|
+
```
|
|
515
|
+
|
|
516
|
+
**Strategy comparison:**
|
|
517
|
+
|
|
518
|
+
| Strategy | How it works | Best for | Streaming |
|
|
519
|
+
|----------|--------------|----------|-----------|
|
|
520
|
+
| `TRUNCATE` | Takes beginning, middle, end portions | Summarization, Q&A | ✅ Yes |
|
|
521
|
+
| `RECYCLE` | Processes chunks iteratively, merges JSON | Full document extraction | ❌ No |
|
|
522
|
+
|
|
523
|
+
**Parameters:**
|
|
524
|
+
- `context_limit` (int, default: 0) - Maximum tokens. If ≤ 0, overflow handling is disabled
|
|
525
|
+
- `context_overflow_strategy` (ContextOverflowStrategy, default: TRUNCATE) - Strategy to use
|
|
526
|
+
|
|
527
|
+
**Notes:**
|
|
528
|
+
- System prompts are always preserved (never truncated)
|
|
529
|
+
- Token counting includes a 20% safety buffer
|
|
530
|
+
- RECYCLE requires `output_schema` for JSON merging
|
|
531
|
+
|
|
419
532
|
### Audio Services
|
|
420
533
|
|
|
421
534
|
#### Text-to-Speech
|
|
@@ -688,20 +801,22 @@ response = await client.ping()
|
|
|
688
801
|
|
|
689
802
|
### Client Methods
|
|
690
803
|
|
|
804
|
+
All methods accept an optional `timeout` parameter to override the default client timeout.
|
|
805
|
+
|
|
691
806
|
**Configuration**
|
|
692
|
-
- `ping()` - Health check
|
|
693
|
-
- `update_config(config)` / `update_configs(configs)` - Add/update providers
|
|
694
|
-
- `get_configs()` - List all configurations
|
|
695
|
-
- `delete_config(uid)` - Remove provider
|
|
807
|
+
- `ping(timeout?)` - Health check
|
|
808
|
+
- `update_config(config, timeout?)` / `update_configs(configs, timeout?)` - Add/update providers
|
|
809
|
+
- `get_configs(timeout?)` - List all configurations
|
|
810
|
+
- `delete_config(uid, timeout?)` - Remove provider
|
|
696
811
|
|
|
697
812
|
**Agent**
|
|
698
|
-
- `agent_run(request | **kwargs)` - Run agent (blocking)
|
|
699
|
-
- `agent_run_stream(request | **kwargs)` - Run agent (streaming)
|
|
813
|
+
- `agent_run(request | **kwargs, timeout?)` - Run agent (blocking)
|
|
814
|
+
- `agent_run_stream(request | **kwargs, timeout?)` - Run agent (streaming)
|
|
700
815
|
|
|
701
816
|
**Audio**
|
|
702
|
-
- `speak(request | **kwargs)` - Text-to-speech (blocking)
|
|
703
|
-
- `speak_stream(request | **kwargs)` - Text-to-speech (streaming)
|
|
704
|
-
- `transcribe(request | **kwargs)` - Speech-to-text
|
|
817
|
+
- `speak(request | **kwargs, timeout?)` - Text-to-speech (blocking)
|
|
818
|
+
- `speak_stream(request | **kwargs, timeout?)` - Text-to-speech (streaming)
|
|
819
|
+
- `transcribe(request | **kwargs, timeout?)` - Speech-to-text
|
|
705
820
|
|
|
706
821
|
**Real-Time Transcription (TranscriptionWsClient)**
|
|
707
822
|
- `connect()` - Establish WebSocket connection
|
|
@@ -727,12 +842,15 @@ response = await client.ping()
|
|
|
727
842
|
- `MessageRole` - `USER` | `MODEL` | `SYSTEM` | `TOOL_CALL` | `TOOL_RETURN` (or use strings)
|
|
728
843
|
|
|
729
844
|
**Requests**
|
|
730
|
-
- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output.
|
|
845
|
+
- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?, context_limit?, context_overflow_strategy?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output. Set `context_limit` and `context_overflow_strategy` for handling large texts.
|
|
731
846
|
- `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
|
|
732
847
|
- `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
|
|
733
848
|
- `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
|
|
734
849
|
- `TranscriptionAudioChunkWsRequest(audio)` - Audio chunk for streaming
|
|
735
850
|
|
|
851
|
+
**Context Overflow**
|
|
852
|
+
- `ContextOverflowStrategy` - `TRUNCATE` | `RECYCLE`
|
|
853
|
+
|
|
736
854
|
**Tools**
|
|
737
855
|
- `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
|
|
738
856
|
- `MCPStreamableServerInput(kind=ToolKind.MCP_STREAMABLE_SERVER, url, prefix?, timeout?)`
|
|
@@ -31,10 +31,15 @@ DEFAULT_USER_AGENT = f"livellm-python/{__version__}"
|
|
|
31
31
|
|
|
32
32
|
class BaseLivellmClient(ABC):
|
|
33
33
|
|
|
34
|
+
# Default timeout (set by subclasses)
|
|
35
|
+
timeout: Optional[float] = None
|
|
36
|
+
|
|
34
37
|
@overload
|
|
35
38
|
async def agent_run(
|
|
36
39
|
self,
|
|
37
40
|
request: Union[AgentRequest, AgentFallbackRequest],
|
|
41
|
+
*,
|
|
42
|
+
timeout: Optional[float] = None,
|
|
38
43
|
) -> AgentResponse:
|
|
39
44
|
...
|
|
40
45
|
|
|
@@ -48,13 +53,18 @@ class BaseLivellmClient(ABC):
|
|
|
48
53
|
tools: Optional[list] = None,
|
|
49
54
|
include_history: bool = False,
|
|
50
55
|
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
|
|
56
|
+
timeout: Optional[float] = None,
|
|
51
57
|
**kwargs
|
|
52
58
|
) -> AgentResponse:
|
|
53
59
|
...
|
|
54
60
|
|
|
55
61
|
|
|
56
62
|
@abstractmethod
|
|
57
|
-
async def handle_agent_run(
|
|
63
|
+
async def handle_agent_run(
|
|
64
|
+
self,
|
|
65
|
+
request: Union[AgentRequest, AgentFallbackRequest],
|
|
66
|
+
timeout: Optional[float] = None
|
|
67
|
+
) -> AgentResponse:
|
|
58
68
|
...
|
|
59
69
|
|
|
60
70
|
async def agent_run(
|
|
@@ -67,6 +77,7 @@ class BaseLivellmClient(ABC):
|
|
|
67
77
|
tools: Optional[list] = None,
|
|
68
78
|
include_history: bool = False,
|
|
69
79
|
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
|
|
80
|
+
timeout: Optional[float] = None,
|
|
70
81
|
**kwargs
|
|
71
82
|
) -> AgentResponse:
|
|
72
83
|
"""
|
|
@@ -100,6 +111,7 @@ class BaseLivellmClient(ABC):
|
|
|
100
111
|
- An OutputSchema instance
|
|
101
112
|
- A dict representing a JSON schema
|
|
102
113
|
- A Pydantic BaseModel class (will be converted to OutputSchema)
|
|
114
|
+
timeout: Optional timeout in seconds (overrides default client timeout)
|
|
103
115
|
|
|
104
116
|
Returns:
|
|
105
117
|
AgentResponse with the agent's output. If output_schema was provided,
|
|
@@ -111,7 +123,7 @@ class BaseLivellmClient(ABC):
|
|
|
111
123
|
raise TypeError(
|
|
112
124
|
f"First positional argument must be AgentRequest or AgentFallbackRequest, got {type(request)}"
|
|
113
125
|
)
|
|
114
|
-
return await self.handle_agent_run(request)
|
|
126
|
+
return await self.handle_agent_run(request, timeout=timeout)
|
|
115
127
|
|
|
116
128
|
# Otherwise, use keyword arguments
|
|
117
129
|
if provider_uid is None or model is None or messages is None:
|
|
@@ -132,7 +144,7 @@ class BaseLivellmClient(ABC):
|
|
|
132
144
|
include_history=include_history,
|
|
133
145
|
output_schema=resolved_schema
|
|
134
146
|
)
|
|
135
|
-
return await self.handle_agent_run(agent_request)
|
|
147
|
+
return await self.handle_agent_run(agent_request, timeout=timeout)
|
|
136
148
|
|
|
137
149
|
def _resolve_output_schema(
|
|
138
150
|
self,
|
|
@@ -157,6 +169,8 @@ class BaseLivellmClient(ABC):
|
|
|
157
169
|
def agent_run_stream(
|
|
158
170
|
self,
|
|
159
171
|
request: Union[AgentRequest, AgentFallbackRequest],
|
|
172
|
+
*,
|
|
173
|
+
timeout: Optional[float] = None,
|
|
160
174
|
) -> AsyncIterator[AgentResponse]:
|
|
161
175
|
...
|
|
162
176
|
|
|
@@ -170,13 +184,18 @@ class BaseLivellmClient(ABC):
|
|
|
170
184
|
tools: Optional[list] = None,
|
|
171
185
|
include_history: bool = False,
|
|
172
186
|
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
|
|
187
|
+
timeout: Optional[float] = None,
|
|
173
188
|
**kwargs
|
|
174
189
|
) -> AsyncIterator[AgentResponse]:
|
|
175
190
|
...
|
|
176
191
|
|
|
177
192
|
|
|
178
193
|
@abstractmethod
|
|
179
|
-
async def handle_agent_run_stream(
|
|
194
|
+
async def handle_agent_run_stream(
|
|
195
|
+
self,
|
|
196
|
+
request: Union[AgentRequest, AgentFallbackRequest],
|
|
197
|
+
timeout: Optional[float] = None
|
|
198
|
+
) -> AsyncIterator[AgentResponse]:
|
|
180
199
|
...
|
|
181
200
|
|
|
182
201
|
async def agent_run_stream(
|
|
@@ -189,6 +208,7 @@ class BaseLivellmClient(ABC):
|
|
|
189
208
|
tools: Optional[list] = None,
|
|
190
209
|
include_history: bool = False,
|
|
191
210
|
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
|
|
211
|
+
timeout: Optional[float] = None,
|
|
192
212
|
**kwargs
|
|
193
213
|
) -> AsyncIterator[AgentResponse]:
|
|
194
214
|
"""
|
|
@@ -225,6 +245,7 @@ class BaseLivellmClient(ABC):
|
|
|
225
245
|
- An OutputSchema instance
|
|
226
246
|
- A dict representing a JSON schema
|
|
227
247
|
- A Pydantic BaseModel class (will be converted to OutputSchema)
|
|
248
|
+
timeout: Optional timeout in seconds (overrides default client timeout)
|
|
228
249
|
|
|
229
250
|
Returns:
|
|
230
251
|
AsyncIterator of AgentResponse chunks. If output_schema was provided,
|
|
@@ -236,7 +257,7 @@ class BaseLivellmClient(ABC):
|
|
|
236
257
|
raise TypeError(
|
|
237
258
|
f"First positional argument must be AgentRequest or AgentFallbackRequest, got {type(request)}"
|
|
238
259
|
)
|
|
239
|
-
stream = self.handle_agent_run_stream(request)
|
|
260
|
+
stream = self.handle_agent_run_stream(request, timeout=timeout)
|
|
240
261
|
else:
|
|
241
262
|
# Otherwise, use keyword arguments
|
|
242
263
|
if provider_uid is None or model is None or messages is None:
|
|
@@ -257,7 +278,7 @@ class BaseLivellmClient(ABC):
|
|
|
257
278
|
include_history=include_history,
|
|
258
279
|
output_schema=resolved_schema
|
|
259
280
|
)
|
|
260
|
-
stream = self.handle_agent_run_stream(agent_request)
|
|
281
|
+
stream = self.handle_agent_run_stream(agent_request, timeout=timeout)
|
|
261
282
|
|
|
262
283
|
async for chunk in stream:
|
|
263
284
|
yield chunk
|
|
@@ -266,6 +287,8 @@ class BaseLivellmClient(ABC):
|
|
|
266
287
|
async def speak(
|
|
267
288
|
self,
|
|
268
289
|
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
290
|
+
*,
|
|
291
|
+
timeout: Optional[float] = None,
|
|
269
292
|
) -> bytes:
|
|
270
293
|
...
|
|
271
294
|
|
|
@@ -280,13 +303,18 @@ class BaseLivellmClient(ABC):
|
|
|
280
303
|
mime_type: str,
|
|
281
304
|
sample_rate: int,
|
|
282
305
|
chunk_size: int = 20,
|
|
306
|
+
timeout: Optional[float] = None,
|
|
283
307
|
**kwargs
|
|
284
308
|
) -> bytes:
|
|
285
309
|
...
|
|
286
310
|
|
|
287
311
|
|
|
288
312
|
@abstractmethod
|
|
289
|
-
async def handle_speak(
|
|
313
|
+
async def handle_speak(
|
|
314
|
+
self,
|
|
315
|
+
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
316
|
+
timeout: Optional[float] = None
|
|
317
|
+
) -> bytes:
|
|
290
318
|
...
|
|
291
319
|
|
|
292
320
|
async def speak(
|
|
@@ -300,6 +328,7 @@ class BaseLivellmClient(ABC):
|
|
|
300
328
|
mime_type: Optional[str] = None,
|
|
301
329
|
sample_rate: Optional[int] = None,
|
|
302
330
|
chunk_size: int = 20,
|
|
331
|
+
timeout: Optional[float] = None,
|
|
303
332
|
**kwargs
|
|
304
333
|
) -> bytes:
|
|
305
334
|
"""
|
|
@@ -330,6 +359,7 @@ class BaseLivellmClient(ABC):
|
|
|
330
359
|
mime_type: The MIME type of the output audio
|
|
331
360
|
sample_rate: The sample rate of the output audio
|
|
332
361
|
chunk_size: Chunk size in milliseconds (default: 20ms)
|
|
362
|
+
timeout: Optional timeout in seconds (overrides default client timeout)
|
|
333
363
|
gen_config: Optional generation configuration
|
|
334
364
|
|
|
335
365
|
Returns:
|
|
@@ -341,7 +371,7 @@ class BaseLivellmClient(ABC):
|
|
|
341
371
|
raise TypeError(
|
|
342
372
|
f"First positional argument must be SpeakRequest or AudioFallbackRequest, got {type(request)}"
|
|
343
373
|
)
|
|
344
|
-
return await self.handle_speak(request)
|
|
374
|
+
return await self.handle_speak(request, timeout=timeout)
|
|
345
375
|
|
|
346
376
|
# Otherwise, use keyword arguments
|
|
347
377
|
if provider_uid is None or model is None or text is None or voice is None or mime_type is None or sample_rate is None:
|
|
@@ -360,12 +390,14 @@ class BaseLivellmClient(ABC):
|
|
|
360
390
|
chunk_size=chunk_size,
|
|
361
391
|
gen_config=kwargs or None
|
|
362
392
|
)
|
|
363
|
-
return await self.handle_speak(speak_request)
|
|
393
|
+
return await self.handle_speak(speak_request, timeout=timeout)
|
|
364
394
|
|
|
365
395
|
@overload
|
|
366
396
|
def speak_stream(
|
|
367
397
|
self,
|
|
368
398
|
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
399
|
+
*,
|
|
400
|
+
timeout: Optional[float] = None,
|
|
369
401
|
) -> AsyncIterator[bytes]:
|
|
370
402
|
...
|
|
371
403
|
|
|
@@ -380,13 +412,18 @@ class BaseLivellmClient(ABC):
|
|
|
380
412
|
mime_type: str,
|
|
381
413
|
sample_rate: int,
|
|
382
414
|
chunk_size: int = 20,
|
|
415
|
+
timeout: Optional[float] = None,
|
|
383
416
|
**kwargs
|
|
384
417
|
) -> AsyncIterator[bytes]:
|
|
385
418
|
...
|
|
386
419
|
|
|
387
420
|
|
|
388
421
|
@abstractmethod
|
|
389
|
-
async def handle_speak_stream(
|
|
422
|
+
async def handle_speak_stream(
|
|
423
|
+
self,
|
|
424
|
+
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
425
|
+
timeout: Optional[float] = None
|
|
426
|
+
) -> AsyncIterator[bytes]:
|
|
390
427
|
...
|
|
391
428
|
|
|
392
429
|
async def speak_stream(
|
|
@@ -400,6 +437,7 @@ class BaseLivellmClient(ABC):
|
|
|
400
437
|
mime_type: Optional[str] = None,
|
|
401
438
|
sample_rate: Optional[int] = None,
|
|
402
439
|
chunk_size: int = 20,
|
|
440
|
+
timeout: Optional[float] = None,
|
|
403
441
|
**kwargs
|
|
404
442
|
) -> AsyncIterator[bytes]:
|
|
405
443
|
"""
|
|
@@ -433,6 +471,7 @@ class BaseLivellmClient(ABC):
|
|
|
433
471
|
mime_type: The MIME type of the output audio
|
|
434
472
|
sample_rate: The sample rate of the output audio
|
|
435
473
|
chunk_size: Chunk size in milliseconds (default: 20ms)
|
|
474
|
+
timeout: Optional timeout in seconds (overrides default client timeout)
|
|
436
475
|
gen_config: Optional generation configuration
|
|
437
476
|
|
|
438
477
|
Returns:
|
|
@@ -444,7 +483,7 @@ class BaseLivellmClient(ABC):
|
|
|
444
483
|
raise TypeError(
|
|
445
484
|
f"First positional argument must be SpeakRequest or AudioFallbackRequest, got {type(request)}"
|
|
446
485
|
)
|
|
447
|
-
speak_stream = self.handle_speak_stream(request)
|
|
486
|
+
speak_stream = self.handle_speak_stream(request, timeout=timeout)
|
|
448
487
|
else:
|
|
449
488
|
# Otherwise, use keyword arguments
|
|
450
489
|
if provider_uid is None or model is None or text is None or voice is None or mime_type is None or sample_rate is None:
|
|
@@ -463,7 +502,7 @@ class BaseLivellmClient(ABC):
|
|
|
463
502
|
chunk_size=chunk_size,
|
|
464
503
|
gen_config=kwargs or None
|
|
465
504
|
)
|
|
466
|
-
speak_stream = self.handle_speak_stream(speak_request)
|
|
505
|
+
speak_stream = self.handle_speak_stream(speak_request, timeout=timeout)
|
|
467
506
|
async for chunk in speak_stream:
|
|
468
507
|
yield chunk
|
|
469
508
|
|
|
@@ -471,6 +510,8 @@ class BaseLivellmClient(ABC):
|
|
|
471
510
|
async def transcribe(
|
|
472
511
|
self,
|
|
473
512
|
request: Union[TranscribeRequest, TranscribeFallbackRequest],
|
|
513
|
+
*,
|
|
514
|
+
timeout: Optional[float] = None,
|
|
474
515
|
) -> TranscribeResponse:
|
|
475
516
|
...
|
|
476
517
|
|
|
@@ -482,13 +523,18 @@ class BaseLivellmClient(ABC):
|
|
|
482
523
|
file: File,
|
|
483
524
|
model: str,
|
|
484
525
|
language: Optional[str] = None,
|
|
526
|
+
timeout: Optional[float] = None,
|
|
485
527
|
**kwargs
|
|
486
528
|
) -> TranscribeResponse:
|
|
487
529
|
...
|
|
488
530
|
|
|
489
531
|
|
|
490
532
|
@abstractmethod
|
|
491
|
-
async def handle_transcribe(
|
|
533
|
+
async def handle_transcribe(
|
|
534
|
+
self,
|
|
535
|
+
request: Union[TranscribeRequest, TranscribeFallbackRequest],
|
|
536
|
+
timeout: Optional[float] = None
|
|
537
|
+
) -> TranscribeResponse:
|
|
492
538
|
...
|
|
493
539
|
|
|
494
540
|
async def transcribe(
|
|
@@ -499,6 +545,7 @@ class BaseLivellmClient(ABC):
|
|
|
499
545
|
file: Optional[File] = None,
|
|
500
546
|
model: Optional[str] = None,
|
|
501
547
|
language: Optional[str] = None,
|
|
548
|
+
timeout: Optional[float] = None,
|
|
502
549
|
**kwargs
|
|
503
550
|
) -> TranscribeResponse:
|
|
504
551
|
"""
|
|
@@ -522,6 +569,7 @@ class BaseLivellmClient(ABC):
|
|
|
522
569
|
file: The audio file as a tuple (filename, content, content_type)
|
|
523
570
|
model: The model to use for transcription
|
|
524
571
|
language: Optional language code
|
|
572
|
+
timeout: Optional timeout in seconds (overrides default client timeout)
|
|
525
573
|
gen_config: Optional generation configuration
|
|
526
574
|
|
|
527
575
|
Returns:
|
|
@@ -534,7 +582,7 @@ class BaseLivellmClient(ABC):
|
|
|
534
582
|
f"First positional argument must be TranscribeRequest or TranscribeFallbackRequest, got {type(request)}"
|
|
535
583
|
)
|
|
536
584
|
# JSON-based request
|
|
537
|
-
return await self.handle_transcribe(request)
|
|
585
|
+
return await self.handle_transcribe(request, timeout=timeout)
|
|
538
586
|
|
|
539
587
|
# Otherwise, use keyword arguments with multipart form-data request
|
|
540
588
|
if provider_uid is None or file is None or model is None:
|
|
@@ -550,7 +598,7 @@ class BaseLivellmClient(ABC):
|
|
|
550
598
|
language=language,
|
|
551
599
|
gen_config=kwargs or None
|
|
552
600
|
)
|
|
553
|
-
return await self.handle_transcribe(transcribe_request)
|
|
601
|
+
return await self.handle_transcribe(transcribe_request, timeout=timeout)
|
|
554
602
|
|
|
555
603
|
|
|
556
604
|
class LivellmWsClient(BaseLivellmClient):
|
|
@@ -628,7 +676,11 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
628
676
|
self.__listen_for_responses_task = None
|
|
629
677
|
self.sessions.clear()
|
|
630
678
|
|
|
631
|
-
|
|
679
|
+
def _get_effective_timeout(self, timeout: Optional[float]) -> Optional[float]:
|
|
680
|
+
"""Get effective timeout: per-request timeout overrides default."""
|
|
681
|
+
return timeout if timeout is not None else self.timeout
|
|
682
|
+
|
|
683
|
+
async def get_response(self, action: WsAction, payload: dict, timeout: Optional[float] = None) -> dict:
|
|
632
684
|
"""Send a request and wait for response."""
|
|
633
685
|
if self.websocket is None:
|
|
634
686
|
await self.connect()
|
|
@@ -638,7 +690,17 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
638
690
|
q = await self.get_or_update_session(session_id)
|
|
639
691
|
await self.websocket.send(json.dumps(request.model_dump()))
|
|
640
692
|
|
|
641
|
-
|
|
693
|
+
effective_timeout = self._get_effective_timeout(timeout)
|
|
694
|
+
|
|
695
|
+
try:
|
|
696
|
+
if effective_timeout:
|
|
697
|
+
response: WsResponse = await asyncio.wait_for(q.get(), timeout=effective_timeout)
|
|
698
|
+
else:
|
|
699
|
+
response: WsResponse = await q.get()
|
|
700
|
+
except asyncio.TimeoutError:
|
|
701
|
+
self.sessions.pop(session_id, None)
|
|
702
|
+
raise TimeoutError(f"Request timed out after {effective_timeout} seconds")
|
|
703
|
+
|
|
642
704
|
self.sessions.pop(session_id)
|
|
643
705
|
if response.status == WsStatus.ERROR:
|
|
644
706
|
raise Exception(f"WebSocket failed: {response.error}")
|
|
@@ -647,7 +709,7 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
647
709
|
else:
|
|
648
710
|
raise Exception(f"WebSocket failed with unknown status: {response}")
|
|
649
711
|
|
|
650
|
-
async def get_response_stream(self, action: WsAction, payload: dict) -> AsyncIterator[dict]:
|
|
712
|
+
async def get_response_stream(self, action: WsAction, payload: dict, timeout: Optional[float] = None) -> AsyncIterator[dict]:
|
|
651
713
|
"""Send a request and stream responses."""
|
|
652
714
|
if self.websocket is None:
|
|
653
715
|
await self.connect()
|
|
@@ -657,8 +719,17 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
657
719
|
q = await self.get_or_update_session(session_id)
|
|
658
720
|
await self.websocket.send(json.dumps(request.model_dump()))
|
|
659
721
|
|
|
722
|
+
effective_timeout = self._get_effective_timeout(timeout)
|
|
723
|
+
|
|
660
724
|
while True:
|
|
661
|
-
|
|
725
|
+
try:
|
|
726
|
+
if effective_timeout:
|
|
727
|
+
response: WsResponse = await asyncio.wait_for(q.get(), timeout=effective_timeout)
|
|
728
|
+
else:
|
|
729
|
+
response: WsResponse = await q.get()
|
|
730
|
+
except asyncio.TimeoutError:
|
|
731
|
+
self.sessions.pop(session_id, None)
|
|
732
|
+
raise TimeoutError(f"Request timed out after {effective_timeout} seconds")
|
|
662
733
|
|
|
663
734
|
if response.status == WsStatus.STREAMING:
|
|
664
735
|
yield response.data
|
|
@@ -674,37 +745,60 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
674
745
|
|
|
675
746
|
# Implement abstract methods from BaseLivellmClient
|
|
676
747
|
|
|
677
|
-
async def handle_agent_run(
|
|
748
|
+
async def handle_agent_run(
|
|
749
|
+
self,
|
|
750
|
+
request: Union[AgentRequest, AgentFallbackRequest],
|
|
751
|
+
timeout: Optional[float] = None
|
|
752
|
+
) -> AgentResponse:
|
|
678
753
|
"""Handle agent run via WebSocket."""
|
|
679
754
|
response = await self.get_response(
|
|
680
755
|
WsAction.AGENT_RUN,
|
|
681
|
-
request.model_dump()
|
|
756
|
+
request.model_dump(),
|
|
757
|
+
timeout=timeout
|
|
682
758
|
)
|
|
683
759
|
return AgentResponse(**response)
|
|
684
760
|
|
|
685
|
-
async def handle_agent_run_stream(
|
|
761
|
+
async def handle_agent_run_stream(
|
|
762
|
+
self,
|
|
763
|
+
request: Union[AgentRequest, AgentFallbackRequest],
|
|
764
|
+
timeout: Optional[float] = None
|
|
765
|
+
) -> AsyncIterator[AgentResponse]:
|
|
686
766
|
"""Handle streaming agent run via WebSocket."""
|
|
687
|
-
async for response in self.get_response_stream(WsAction.AGENT_RUN_STREAM, request.model_dump()):
|
|
767
|
+
async for response in self.get_response_stream(WsAction.AGENT_RUN_STREAM, request.model_dump(), timeout=timeout):
|
|
688
768
|
yield AgentResponse(**response)
|
|
689
769
|
|
|
690
|
-
async def handle_speak(
|
|
770
|
+
async def handle_speak(
|
|
771
|
+
self,
|
|
772
|
+
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
773
|
+
timeout: Optional[float] = None
|
|
774
|
+
) -> bytes:
|
|
691
775
|
"""Handle speak request via WebSocket."""
|
|
692
776
|
response = await self.get_response(
|
|
693
777
|
WsAction.AUDIO_SPEAK,
|
|
694
|
-
request.model_dump()
|
|
778
|
+
request.model_dump(),
|
|
779
|
+
timeout=timeout
|
|
695
780
|
)
|
|
696
781
|
return EncodedSpeakResponse(**response).audio
|
|
697
782
|
|
|
698
|
-
async def handle_speak_stream(
|
|
783
|
+
async def handle_speak_stream(
|
|
784
|
+
self,
|
|
785
|
+
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
786
|
+
timeout: Optional[float] = None
|
|
787
|
+
) -> AsyncIterator[bytes]:
|
|
699
788
|
"""Handle streaming speak request via WebSocket."""
|
|
700
|
-
async for response in self.get_response_stream(WsAction.AUDIO_SPEAK_STREAM, request.model_dump()):
|
|
789
|
+
async for response in self.get_response_stream(WsAction.AUDIO_SPEAK_STREAM, request.model_dump(), timeout=timeout):
|
|
701
790
|
yield EncodedSpeakResponse(**response).audio
|
|
702
791
|
|
|
703
|
-
async def handle_transcribe(
|
|
792
|
+
async def handle_transcribe(
|
|
793
|
+
self,
|
|
794
|
+
request: Union[TranscribeRequest, TranscribeFallbackRequest],
|
|
795
|
+
timeout: Optional[float] = None
|
|
796
|
+
) -> TranscribeResponse:
|
|
704
797
|
"""Handle transcribe request via WebSocket."""
|
|
705
798
|
response = await self.get_response(
|
|
706
799
|
WsAction.AUDIO_TRANSCRIBE,
|
|
707
|
-
request.model_dump()
|
|
800
|
+
request.model_dump(),
|
|
801
|
+
timeout=timeout
|
|
708
802
|
)
|
|
709
803
|
return TranscribeResponse(**response)
|
|
710
804
|
|
|
@@ -747,8 +841,8 @@ class LivellmClient(BaseLivellmClient):
|
|
|
747
841
|
self.base_url = f"{self._root_base_url}/livellm"
|
|
748
842
|
self.timeout = timeout
|
|
749
843
|
self.user_agent = user_agent or DEFAULT_USER_AGENT
|
|
750
|
-
|
|
751
|
-
|
|
844
|
+
# Create client without timeout - we'll pass timeout per-request
|
|
845
|
+
self.client = httpx.AsyncClient(base_url=self.base_url)
|
|
752
846
|
self.settings = []
|
|
753
847
|
self.headers = {
|
|
754
848
|
"Content-Type": "application/json",
|
|
@@ -759,6 +853,10 @@ class LivellmClient(BaseLivellmClient):
|
|
|
759
853
|
if configs:
|
|
760
854
|
self.update_configs_post_init(configs)
|
|
761
855
|
|
|
856
|
+
def _get_effective_timeout(self, timeout: Optional[float]) -> Optional[float]:
|
|
857
|
+
"""Get effective timeout: per-request timeout overrides default."""
|
|
858
|
+
return timeout if timeout is not None else self.timeout
|
|
859
|
+
|
|
762
860
|
@property
|
|
763
861
|
def realtime(self) -> LivellmWsClient:
|
|
764
862
|
"""
|
|
@@ -789,15 +887,17 @@ class LivellmClient(BaseLivellmClient):
|
|
|
789
887
|
return SuccessResponse(success=True, message="Configs updated successfully")
|
|
790
888
|
|
|
791
889
|
|
|
792
|
-
async def delete(self, endpoint: str) -> dict:
|
|
890
|
+
async def delete(self, endpoint: str, timeout: Optional[float] = None) -> dict:
|
|
793
891
|
"""
|
|
794
892
|
Delete a resource from the given endpoint and return the response.
|
|
795
893
|
Args:
|
|
796
894
|
endpoint: The endpoint to delete from.
|
|
895
|
+
timeout: Optional timeout override.
|
|
797
896
|
Returns:
|
|
798
897
|
The response from the endpoint.
|
|
799
898
|
"""
|
|
800
|
-
|
|
899
|
+
effective_timeout = self._get_effective_timeout(timeout)
|
|
900
|
+
response = await self.client.delete(endpoint, headers=self.headers, timeout=effective_timeout)
|
|
801
901
|
response.raise_for_status()
|
|
802
902
|
return response.json()
|
|
803
903
|
|
|
@@ -805,7 +905,8 @@ class LivellmClient(BaseLivellmClient):
|
|
|
805
905
|
self,
|
|
806
906
|
files: dict,
|
|
807
907
|
data: dict,
|
|
808
|
-
endpoint: str
|
|
908
|
+
endpoint: str,
|
|
909
|
+
timeout: Optional[float] = None
|
|
809
910
|
) -> dict:
|
|
810
911
|
"""
|
|
811
912
|
Post a multipart request to the given endpoint and return the response.
|
|
@@ -813,27 +914,32 @@ class LivellmClient(BaseLivellmClient):
|
|
|
813
914
|
files: The files to send in the request.
|
|
814
915
|
data: The data to send in the request.
|
|
815
916
|
endpoint: The endpoint to post to.
|
|
917
|
+
timeout: Optional timeout override.
|
|
816
918
|
Returns:
|
|
817
919
|
The response from the endpoint.
|
|
818
920
|
"""
|
|
921
|
+
effective_timeout = self._get_effective_timeout(timeout)
|
|
819
922
|
# Don't pass Content-Type header for multipart - httpx will set it automatically
|
|
820
|
-
response = await self.client.post(endpoint, files=files, data=data)
|
|
923
|
+
response = await self.client.post(endpoint, files=files, data=data, timeout=effective_timeout)
|
|
821
924
|
response.raise_for_status()
|
|
822
925
|
return response.json()
|
|
823
926
|
|
|
824
927
|
|
|
825
928
|
async def get(
|
|
826
929
|
self,
|
|
827
|
-
endpoint: str
|
|
930
|
+
endpoint: str,
|
|
931
|
+
timeout: Optional[float] = None
|
|
828
932
|
) -> dict:
|
|
829
933
|
"""
|
|
830
934
|
Get a request from the given endpoint and return the response.
|
|
831
935
|
Args:
|
|
832
936
|
endpoint: The endpoint to get from.
|
|
937
|
+
timeout: Optional timeout override.
|
|
833
938
|
Returns:
|
|
834
939
|
The response from the endpoint.
|
|
835
940
|
"""
|
|
836
|
-
|
|
941
|
+
effective_timeout = self._get_effective_timeout(timeout)
|
|
942
|
+
response = await self.client.get(endpoint, headers=self.headers, timeout=effective_timeout)
|
|
837
943
|
response.raise_for_status()
|
|
838
944
|
return response.json()
|
|
839
945
|
|
|
@@ -842,7 +948,8 @@ class LivellmClient(BaseLivellmClient):
|
|
|
842
948
|
json_data: dict,
|
|
843
949
|
endpoint: str,
|
|
844
950
|
expect_stream: bool = False,
|
|
845
|
-
expect_json: bool = True
|
|
951
|
+
expect_json: bool = True,
|
|
952
|
+
timeout: Optional[float] = None
|
|
846
953
|
) -> Union[dict, bytes, AsyncIterator[Union[dict, bytes]]]:
|
|
847
954
|
"""
|
|
848
955
|
Post a request to the given endpoint and return the response.
|
|
@@ -854,12 +961,14 @@ class LivellmClient(BaseLivellmClient):
|
|
|
854
961
|
endpoint: The endpoint to post to.
|
|
855
962
|
expect_stream: Whether to expect a stream response.
|
|
856
963
|
expect_json: Whether to expect a JSON response.
|
|
964
|
+
timeout: Optional timeout override.
|
|
857
965
|
Returns:
|
|
858
966
|
The response from the endpoint.
|
|
859
967
|
Raises:
|
|
860
968
|
Exception: If the response is not 200 or 201.
|
|
861
969
|
"""
|
|
862
|
-
|
|
970
|
+
effective_timeout = self._get_effective_timeout(timeout)
|
|
971
|
+
response = await self.client.post(endpoint, json=json_data, headers=self.headers, timeout=effective_timeout)
|
|
863
972
|
if response.status_code not in [200, 201]:
|
|
864
973
|
error_response = await response.aread()
|
|
865
974
|
error_response = error_response.decode("utf-8")
|
|
@@ -882,26 +991,26 @@ class LivellmClient(BaseLivellmClient):
|
|
|
882
991
|
else:
|
|
883
992
|
return response.content
|
|
884
993
|
|
|
885
|
-
async def ping(self) -> SuccessResponse:
|
|
886
|
-
result = await self.get("ping")
|
|
994
|
+
async def ping(self, timeout: Optional[float] = None) -> SuccessResponse:
|
|
995
|
+
result = await self.get("ping", timeout=timeout)
|
|
887
996
|
return SuccessResponse(**result)
|
|
888
997
|
|
|
889
|
-
async def update_config(self, config: Settings) -> SuccessResponse:
|
|
890
|
-
result = await self.post(config.model_dump(), "providers/config", expect_json=True)
|
|
998
|
+
async def update_config(self, config: Settings, timeout: Optional[float] = None) -> SuccessResponse:
|
|
999
|
+
result = await self.post(config.model_dump(), "providers/config", expect_json=True, timeout=timeout)
|
|
891
1000
|
self.settings.append(config)
|
|
892
1001
|
return SuccessResponse(**result)
|
|
893
1002
|
|
|
894
|
-
async def update_configs(self, configs: List[Settings]) -> SuccessResponse:
|
|
1003
|
+
async def update_configs(self, configs: List[Settings], timeout: Optional[float] = None) -> SuccessResponse:
|
|
895
1004
|
for config in configs:
|
|
896
|
-
await self.update_config(config)
|
|
1005
|
+
await self.update_config(config, timeout=timeout)
|
|
897
1006
|
return SuccessResponse(success=True, message="Configs updated successfully")
|
|
898
1007
|
|
|
899
|
-
async def get_configs(self) -> List[Settings]:
|
|
900
|
-
result = await self.get("providers/configs")
|
|
1008
|
+
async def get_configs(self, timeout: Optional[float] = None) -> List[Settings]:
|
|
1009
|
+
result = await self.get("providers/configs", timeout=timeout)
|
|
901
1010
|
return [Settings(**config) for config in result]
|
|
902
1011
|
|
|
903
|
-
async def delete_config(self, config_uid: str) -> SuccessResponse:
|
|
904
|
-
result = await self.delete(f"providers/config/{config_uid}")
|
|
1012
|
+
async def delete_config(self, config_uid: str, timeout: Optional[float] = None) -> SuccessResponse:
|
|
1013
|
+
result = await self.delete(f"providers/config/{config_uid}", timeout=timeout)
|
|
905
1014
|
return SuccessResponse(**result)
|
|
906
1015
|
|
|
907
1016
|
async def cleanup(self):
|
|
@@ -916,59 +1025,51 @@ class LivellmClient(BaseLivellmClient):
|
|
|
916
1025
|
# Also close any realtime WebSocket client if it was created
|
|
917
1026
|
if self._realtime is not None:
|
|
918
1027
|
await self._realtime.disconnect()
|
|
919
|
-
|
|
920
|
-
# def __del__(self):
|
|
921
|
-
# """
|
|
922
|
-
# Destructor to clean up resources when the client is garbage collected.
|
|
923
|
-
# This will close the HTTP client and attempt to delete configs if cleanup wasn't called.
|
|
924
|
-
# Note: It's recommended to use the async context manager or call cleanup() explicitly.
|
|
925
|
-
# """
|
|
926
|
-
# # Warn user if cleanup wasn't called
|
|
927
|
-
# if self.settings:
|
|
928
|
-
# warnings.warn(
|
|
929
|
-
# "LivellmClient is being garbage collected without explicit cleanup. "
|
|
930
|
-
# "Provider configs may not be deleted from the server. "
|
|
931
|
-
# "Consider using 'async with' or calling 'await client.cleanup()' explicitly.",
|
|
932
|
-
# ResourceWarning,
|
|
933
|
-
# stacklevel=2
|
|
934
|
-
# )
|
|
935
|
-
|
|
936
|
-
# # Close the httpx client synchronously
|
|
937
|
-
# # httpx.AsyncClient stores a sync Transport that needs cleanup
|
|
938
|
-
# try:
|
|
939
|
-
# with httpx.Client(base_url=self.base_url) as client:
|
|
940
|
-
# for config in self.settings:
|
|
941
|
-
# config: Settings = config
|
|
942
|
-
# client.delete(f"providers/config/{config.uid}", headers=self.headers)
|
|
943
|
-
# except Exception:
|
|
944
|
-
# # Silently fail - we're in a destructor
|
|
945
|
-
# pass
|
|
946
1028
|
|
|
947
1029
|
# Implement abstract methods from BaseLivellmClient
|
|
948
1030
|
|
|
949
|
-
async def handle_agent_run(
|
|
1031
|
+
async def handle_agent_run(
|
|
1032
|
+
self,
|
|
1033
|
+
request: Union[AgentRequest, AgentFallbackRequest],
|
|
1034
|
+
timeout: Optional[float] = None
|
|
1035
|
+
) -> AgentResponse:
|
|
950
1036
|
"""Handle agent run via HTTP."""
|
|
951
|
-
result = await self.post(request.model_dump(), "agent/run", expect_json=True)
|
|
1037
|
+
result = await self.post(request.model_dump(), "agent/run", expect_json=True, timeout=timeout)
|
|
952
1038
|
return AgentResponse(**result)
|
|
953
1039
|
|
|
954
|
-
async def handle_agent_run_stream(
|
|
1040
|
+
async def handle_agent_run_stream(
|
|
1041
|
+
self,
|
|
1042
|
+
request: Union[AgentRequest, AgentFallbackRequest],
|
|
1043
|
+
timeout: Optional[float] = None
|
|
1044
|
+
) -> AsyncIterator[AgentResponse]:
|
|
955
1045
|
"""Handle streaming agent run via HTTP."""
|
|
956
|
-
stream = await self.post(request.model_dump(), "agent/run_stream", expect_stream=True, expect_json=True)
|
|
1046
|
+
stream = await self.post(request.model_dump(), "agent/run_stream", expect_stream=True, expect_json=True, timeout=timeout)
|
|
957
1047
|
async for chunk in stream:
|
|
958
1048
|
yield AgentResponse(**chunk)
|
|
959
1049
|
|
|
960
|
-
async def handle_speak(
|
|
1050
|
+
async def handle_speak(
|
|
1051
|
+
self,
|
|
1052
|
+
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
1053
|
+
timeout: Optional[float] = None
|
|
1054
|
+
) -> bytes:
|
|
961
1055
|
"""Handle speak request via HTTP."""
|
|
962
|
-
return await self.post(request.model_dump(), "audio/speak", expect_json=False)
|
|
1056
|
+
return await self.post(request.model_dump(), "audio/speak", expect_json=False, timeout=timeout)
|
|
963
1057
|
|
|
964
|
-
async def handle_speak_stream(
|
|
1058
|
+
async def handle_speak_stream(
|
|
1059
|
+
self,
|
|
1060
|
+
request: Union[SpeakRequest, AudioFallbackRequest],
|
|
1061
|
+
timeout: Optional[float] = None
|
|
1062
|
+
) -> AsyncIterator[bytes]:
|
|
965
1063
|
"""Handle streaming speak request via HTTP."""
|
|
966
|
-
speak_stream = await self.post(request.model_dump(), "audio/speak_stream", expect_stream=True, expect_json=False)
|
|
1064
|
+
speak_stream = await self.post(request.model_dump(), "audio/speak_stream", expect_stream=True, expect_json=False, timeout=timeout)
|
|
967
1065
|
async for chunk in speak_stream:
|
|
968
1066
|
yield chunk
|
|
969
1067
|
|
|
970
|
-
async def handle_transcribe(
|
|
1068
|
+
async def handle_transcribe(
|
|
1069
|
+
self,
|
|
1070
|
+
request: Union[TranscribeRequest, TranscribeFallbackRequest],
|
|
1071
|
+
timeout: Optional[float] = None
|
|
1072
|
+
) -> TranscribeResponse:
|
|
971
1073
|
"""Handle transcribe request via HTTP."""
|
|
972
|
-
result = await self.post(request.model_dump(), "audio/transcribe_json", expect_json=True)
|
|
1074
|
+
result = await self.post(request.model_dump(), "audio/transcribe_json", expect_json=True, timeout=timeout)
|
|
973
1075
|
return TranscribeResponse(**result)
|
|
974
|
-
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .common import BaseRequest, ProviderKind, Settings, SuccessResponse
|
|
2
2
|
from .fallback import AgentFallbackRequest, AudioFallbackRequest, TranscribeFallbackRequest, FallbackStrategy
|
|
3
|
-
from .agent.agent import AgentRequest, AgentResponse, AgentResponseUsage
|
|
3
|
+
from .agent.agent import AgentRequest, AgentResponse, AgentResponseUsage, ContextOverflowStrategy
|
|
4
4
|
from .agent.chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
|
|
5
5
|
from .agent.tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
|
|
6
6
|
from .agent.output_schema import OutputSchema, PropertyDef
|
|
@@ -24,6 +24,7 @@ __all__ = [
|
|
|
24
24
|
"AgentRequest",
|
|
25
25
|
"AgentResponse",
|
|
26
26
|
"AgentResponseUsage",
|
|
27
|
+
"ContextOverflowStrategy",
|
|
27
28
|
"Message",
|
|
28
29
|
"MessageRole",
|
|
29
30
|
"TextMessage",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .agent import AgentRequest, AgentResponse, AgentResponseUsage
|
|
1
|
+
from .agent import AgentRequest, AgentResponse, AgentResponseUsage, ContextOverflowStrategy
|
|
2
2
|
from .chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
|
|
3
3
|
from .tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
|
|
4
4
|
from .output_schema import OutputSchema, PropertyDef
|
|
@@ -8,6 +8,7 @@ __all__ = [
|
|
|
8
8
|
"AgentRequest",
|
|
9
9
|
"AgentResponse",
|
|
10
10
|
"AgentResponseUsage",
|
|
11
|
+
"ContextOverflowStrategy",
|
|
11
12
|
"Message",
|
|
12
13
|
"MessageRole",
|
|
13
14
|
"TextMessage",
|
|
@@ -2,12 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel, Field
|
|
4
4
|
from typing import Optional, List, Union, Any, Dict
|
|
5
|
+
from enum import Enum
|
|
5
6
|
from .chat import TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
|
|
6
7
|
from .tools import WebSearchInput, MCPStreamableServerInput
|
|
7
8
|
from .output_schema import OutputSchema, PropertyDef
|
|
8
9
|
from ..common import BaseRequest
|
|
9
10
|
|
|
10
11
|
|
|
12
|
+
class ContextOverflowStrategy(str, Enum):
|
|
13
|
+
"""Strategy for handling context overflow when text exceeds context_limit."""
|
|
14
|
+
TRUNCATE = "truncate" # Take beginning, middle, and end portions
|
|
15
|
+
RECYCLE = "recycle" # Iteratively process chunks, merging results
|
|
16
|
+
|
|
17
|
+
|
|
11
18
|
class AgentRequest(BaseRequest):
|
|
12
19
|
model: str = Field(..., description="The model to use")
|
|
13
20
|
messages: List[Union[TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage]] = Field(..., description="The messages to use")
|
|
@@ -15,6 +22,8 @@ class AgentRequest(BaseRequest):
|
|
|
15
22
|
gen_config: Optional[dict] = Field(default=None, description="The configuration for the generation")
|
|
16
23
|
include_history: bool = Field(default=False, description="Whether to include full conversation history in the response")
|
|
17
24
|
output_schema: Optional[Union[OutputSchema, Dict[str, Any]]] = Field(default=None, description="JSON schema for structured output. Can be an OutputSchema, a dict representing a JSON schema, or will be converted from a Pydantic BaseModel.")
|
|
25
|
+
context_limit: int = Field(default=0, description="Maximum context size in tokens. If <= 0, context overflow handling is disabled.")
|
|
26
|
+
context_overflow_strategy: ContextOverflowStrategy = Field(default=ContextOverflowStrategy.TRUNCATE, description="Strategy for handling context overflow: 'truncate' or 'recycle'")
|
|
18
27
|
|
|
19
28
|
class AgentResponseUsage(BaseModel):
|
|
20
29
|
input_tokens: int = Field(..., description="The number of input tokens used")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|