livellm 1.5.5__tar.gz → 1.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {livellm-1.5.5 → livellm-1.7.1}/.gitignore +2 -1
- {livellm-1.5.5 → livellm-1.7.1}/PKG-INFO +299 -33
- {livellm-1.5.5 → livellm-1.7.1}/README.md +298 -28
- {livellm-1.5.5 → livellm-1.7.1}/livellm/livellm.py +258 -98
- {livellm-1.5.5 → livellm-1.7.1}/livellm/models/__init__.py +5 -1
- {livellm-1.5.5 → livellm-1.7.1}/livellm/models/agent/__init__.py +5 -1
- {livellm-1.5.5 → livellm-1.7.1}/livellm/models/agent/agent.py +15 -4
- livellm-1.7.1/livellm/models/agent/output_schema.py +120 -0
- {livellm-1.5.5 → livellm-1.7.1}/livellm/models/transcription.py +2 -0
- {livellm-1.5.5 → livellm-1.7.1}/livellm/transcripton.py +61 -19
- {livellm-1.5.5 → livellm-1.7.1}/pyproject.toml +3 -3
- {livellm-1.5.5 → livellm-1.7.1}/LICENSE +0 -0
- {livellm-1.5.5 → livellm-1.7.1}/livellm/__init__.py +0 -0
- {livellm-1.5.5 → livellm-1.7.1}/livellm/models/agent/chat.py +0 -0
- {livellm-1.5.5 → livellm-1.7.1}/livellm/models/agent/tools.py +0 -0
- {livellm-1.5.5 → livellm-1.7.1}/livellm/models/audio/__init__.py +0 -0
- {livellm-1.5.5 → livellm-1.7.1}/livellm/models/audio/speak.py +0 -0
- {livellm-1.5.5 → livellm-1.7.1}/livellm/models/audio/transcribe.py +0 -0
- {livellm-1.5.5 → livellm-1.7.1}/livellm/models/common.py +0 -0
- {livellm-1.5.5 → livellm-1.7.1}/livellm/models/fallback.py +0 -0
- {livellm-1.5.5 → livellm-1.7.1}/livellm/models/ws.py +0 -0
- {livellm-1.5.5 → livellm-1.7.1}/livellm/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: livellm
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.1
|
|
4
4
|
Summary: Python client for the LiveLLM Server
|
|
5
5
|
Project-URL: Homepage, https://github.com/qalby-tech/livellm-client-py
|
|
6
6
|
Project-URL: Repository, https://github.com/qalby-tech/livellm-client-py
|
|
@@ -19,10 +19,6 @@ Requires-Dist: httpx>=0.27.0
|
|
|
19
19
|
Requires-Dist: pydantic>=2.0.0
|
|
20
20
|
Requires-Dist: sounddevice>=0.5.3
|
|
21
21
|
Requires-Dist: websockets>=15.0.1
|
|
22
|
-
Provides-Extra: testing
|
|
23
|
-
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'testing'
|
|
24
|
-
Requires-Dist: pytest-cov>=4.1.0; extra == 'testing'
|
|
25
|
-
Requires-Dist: pytest>=8.4.2; extra == 'testing'
|
|
26
22
|
Description-Content-Type: text/markdown
|
|
27
23
|
|
|
28
24
|
# LiveLLM Python Client
|
|
@@ -39,6 +35,9 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
|
|
|
39
35
|
- 🎯 **Multi-provider** - OpenAI, Google, Anthropic, Groq, ElevenLabs
|
|
40
36
|
- 🔄 **Streaming** - Real-time streaming for agent and audio
|
|
41
37
|
- 🛠️ **Flexible API** - Use request objects or keyword arguments
|
|
38
|
+
- 📋 **Structured Output** - Get validated JSON responses with schema support (Pydantic, OutputSchema, or dict)
|
|
39
|
+
- 📏 **Context Overflow Management** - Automatic handling of large texts with truncate/recycle strategies
|
|
40
|
+
- ⏱️ **Per-Request Timeout** - Override default timeout for individual requests
|
|
42
41
|
- 🎙️ **Audio services** - Text-to-speech and transcription
|
|
43
42
|
- 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
|
|
44
43
|
- ⚡ **Fallback strategies** - Sequential and parallel handling
|
|
@@ -98,10 +97,10 @@ from livellm.models import Settings, ProviderKind
|
|
|
98
97
|
# Basic
|
|
99
98
|
client = LivellmClient(base_url="http://localhost:8000")
|
|
100
99
|
|
|
101
|
-
# With timeout and pre-configured providers
|
|
100
|
+
# With default timeout and pre-configured providers
|
|
102
101
|
client = LivellmClient(
|
|
103
102
|
base_url="http://localhost:8000",
|
|
104
|
-
timeout=30.0,
|
|
103
|
+
timeout=30.0, # Default timeout for all requests
|
|
105
104
|
configs=[
|
|
106
105
|
Settings(
|
|
107
106
|
uid="openai",
|
|
@@ -119,6 +118,50 @@ client = LivellmClient(
|
|
|
119
118
|
)
|
|
120
119
|
```
|
|
121
120
|
|
|
121
|
+
### Per-Request Timeout Override
|
|
122
|
+
|
|
123
|
+
The timeout provided in `__init__` is the default, but you can override it for individual requests:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
# Client with 30s default timeout
|
|
127
|
+
client = LivellmClient(base_url="http://localhost:8000", timeout=30.0)
|
|
128
|
+
|
|
129
|
+
# Uses default 30s timeout
|
|
130
|
+
response = await client.agent_run(
|
|
131
|
+
provider_uid="openai",
|
|
132
|
+
model="gpt-4",
|
|
133
|
+
messages=[TextMessage(role="user", content="Hello")]
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Override with 120s timeout for this specific request
|
|
137
|
+
response = await client.agent_run(
|
|
138
|
+
provider_uid="openai",
|
|
139
|
+
model="gpt-4",
|
|
140
|
+
messages=[TextMessage(role="user", content="Write a long essay...")],
|
|
141
|
+
timeout=120.0 # Override for this request only
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Works with streaming too
|
|
145
|
+
async for chunk in client.agent_run_stream(
|
|
146
|
+
provider_uid="openai",
|
|
147
|
+
model="gpt-4",
|
|
148
|
+
messages=[TextMessage(role="user", content="Tell me a story")],
|
|
149
|
+
timeout=300.0 # 5 minutes for streaming
|
|
150
|
+
):
|
|
151
|
+
print(chunk.output, end="")
|
|
152
|
+
|
|
153
|
+
# Works with all methods: speak(), speak_stream(), transcribe(), etc.
|
|
154
|
+
audio = await client.speak(
|
|
155
|
+
provider_uid="openai",
|
|
156
|
+
model="tts-1",
|
|
157
|
+
text="Hello world",
|
|
158
|
+
voice="alloy",
|
|
159
|
+
mime_type=SpeakMimeType.MP3,
|
|
160
|
+
sample_rate=24000,
|
|
161
|
+
timeout=60.0
|
|
162
|
+
)
|
|
163
|
+
```
|
|
164
|
+
|
|
122
165
|
### Supported Providers
|
|
123
166
|
|
|
124
167
|
`OPENAI` • `GOOGLE` • `ANTHROPIC` • `GROQ` • `ELEVENLABS`
|
|
@@ -302,6 +345,213 @@ if response.history:
|
|
|
302
345
|
- Auditing and logging complete conversations
|
|
303
346
|
- Building conversational UIs with full context visibility
|
|
304
347
|
|
|
348
|
+
#### Agent with Structured Output
|
|
349
|
+
|
|
350
|
+
Get structured JSON responses from the agent by providing an output schema. The agent will return a JSON string matching your schema in the `output` field.
|
|
351
|
+
|
|
352
|
+
**Three ways to define a schema:**
|
|
353
|
+
|
|
354
|
+
**1. Using Pydantic BaseModel (Recommended)**
|
|
355
|
+
```python
|
|
356
|
+
import json
|
|
357
|
+
from pydantic import BaseModel
|
|
358
|
+
from livellm.models import TextMessage
|
|
359
|
+
|
|
360
|
+
class Person(BaseModel):
|
|
361
|
+
name: str
|
|
362
|
+
age: int
|
|
363
|
+
occupation: str
|
|
364
|
+
|
|
365
|
+
response = await client.agent_run(
|
|
366
|
+
provider_uid="openai",
|
|
367
|
+
model="gpt-4",
|
|
368
|
+
messages=[TextMessage(role="user", content="Extract info: John is a 28-year-old engineer")],
|
|
369
|
+
output_schema=Person # Pass the BaseModel class directly
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# response.output is a JSON string: '{"name": "John", "age": 28, "occupation": "engineer"}'
|
|
373
|
+
print(type(response.output)) # <class 'str'>
|
|
374
|
+
|
|
375
|
+
# Parse the JSON string yourself if needed
|
|
376
|
+
data = json.loads(response.output)
|
|
377
|
+
print(f"Name: {data['name']}")
|
|
378
|
+
print(f"Age: {data['age']}")
|
|
379
|
+
print(f"Occupation: {data['occupation']}")
|
|
380
|
+
|
|
381
|
+
# Or validate with your Pydantic model
|
|
382
|
+
person = Person.model_validate_json(response.output)
|
|
383
|
+
print(f"Name: {person.name}")
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
**2. Using OutputSchema**
|
|
387
|
+
```python
|
|
388
|
+
from livellm.models import OutputSchema, PropertyDef, TextMessage
|
|
389
|
+
|
|
390
|
+
schema = OutputSchema(
|
|
391
|
+
title="Person",
|
|
392
|
+
description="A person's information",
|
|
393
|
+
properties={
|
|
394
|
+
"name": PropertyDef(type="string", description="The person's name"),
|
|
395
|
+
"age": PropertyDef(type="integer", minimum=0, maximum=150, description="Age in years"),
|
|
396
|
+
"email": PropertyDef(type="string", pattern="^[^@]+@[^@]+\\.[^@]+$", description="Email address"),
|
|
397
|
+
},
|
|
398
|
+
required=["name", "age", "email"]
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
response = await client.agent_run(
|
|
402
|
+
provider_uid="openai",
|
|
403
|
+
model="gpt-4",
|
|
404
|
+
messages=[TextMessage(role="user", content="Tell me about a person")],
|
|
405
|
+
output_schema=schema
|
|
406
|
+
)
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
**3. Using a dictionary (JSON Schema)**
|
|
410
|
+
```python
|
|
411
|
+
schema_dict = {
|
|
412
|
+
"title": "Person",
|
|
413
|
+
"type": "object",
|
|
414
|
+
"properties": {
|
|
415
|
+
"name": {"type": "string", "description": "The person's name"},
|
|
416
|
+
"age": {"type": "integer", "minimum": 0, "maximum": 150},
|
|
417
|
+
"email": {"type": "string", "pattern": "^[^@]+@[^@]+\\.[^@]+$"}
|
|
418
|
+
},
|
|
419
|
+
"required": ["name", "age", "email"]
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
response = await client.agent_run(
|
|
423
|
+
provider_uid="openai",
|
|
424
|
+
model="gpt-4",
|
|
425
|
+
messages=[TextMessage(role="user", content="Extract person info")],
|
|
426
|
+
output_schema=schema_dict
|
|
427
|
+
)
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
**Complex nested schemas:**
|
|
431
|
+
```python
|
|
432
|
+
from pydantic import BaseModel
|
|
433
|
+
from typing import List, Optional
|
|
434
|
+
|
|
435
|
+
class Address(BaseModel):
|
|
436
|
+
street: str
|
|
437
|
+
city: str
|
|
438
|
+
zip_code: str
|
|
439
|
+
|
|
440
|
+
class Person(BaseModel):
|
|
441
|
+
name: str
|
|
442
|
+
age: int
|
|
443
|
+
addresses: List[Address]
|
|
444
|
+
phone: Optional[str] = None
|
|
445
|
+
|
|
446
|
+
response = await client.agent_run(
|
|
447
|
+
provider_uid="openai",
|
|
448
|
+
model="gpt-4",
|
|
449
|
+
messages=[TextMessage(role="user", content="Extract person with addresses")],
|
|
450
|
+
output_schema=Person # Nested models are automatically resolved
|
|
451
|
+
)
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
**With streaming:**
|
|
455
|
+
```python
|
|
456
|
+
from pydantic import BaseModel
|
|
457
|
+
|
|
458
|
+
class Summary(BaseModel):
|
|
459
|
+
title: str
|
|
460
|
+
key_points: List[str]
|
|
461
|
+
word_count: int
|
|
462
|
+
|
|
463
|
+
stream = client.agent_run_stream(
|
|
464
|
+
provider_uid="openai",
|
|
465
|
+
model="gpt-4",
|
|
466
|
+
messages=[TextMessage(role="user", content="Summarize this article")],
|
|
467
|
+
output_schema=Summary
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
async for chunk in stream:
|
|
471
|
+
print(chunk.output, end="", flush=True)
|
|
472
|
+
|
|
473
|
+
# After streaming completes, parse the full JSON output
|
|
474
|
+
full_output = "".join([chunk.output async for chunk in stream])
|
|
475
|
+
data = json.loads(full_output)
|
|
476
|
+
```
|
|
477
|
+
|
|
478
|
+
**Response fields:**
|
|
479
|
+
- `output` - The JSON string response matching your schema
|
|
480
|
+
|
|
481
|
+
**Use cases:**
|
|
482
|
+
- Data extraction and parsing
|
|
483
|
+
- API response formatting
|
|
484
|
+
- Structured data generation
|
|
485
|
+
- Type-safe responses
|
|
486
|
+
- Integration with type-checked code
|
|
487
|
+
|
|
488
|
+
#### Context Overflow Management
|
|
489
|
+
|
|
490
|
+
Handle large texts that exceed model context windows with automatic truncation or iterative processing:
|
|
491
|
+
|
|
492
|
+
```python
|
|
493
|
+
from livellm.models import TextMessage, ContextOverflowStrategy, OutputSchema, PropertyDef
|
|
494
|
+
|
|
495
|
+
# TRUNCATE strategy (default): Preserves beginning, middle, and end
|
|
496
|
+
# Works with both streaming and non-streaming
|
|
497
|
+
response = await client.agent_run(
|
|
498
|
+
provider_uid="openai",
|
|
499
|
+
model="gpt-4",
|
|
500
|
+
messages=[
|
|
501
|
+
TextMessage(role="system", content="Summarize the document."),
|
|
502
|
+
TextMessage(role="user", content=very_long_document)
|
|
503
|
+
],
|
|
504
|
+
context_limit=4000, # Max tokens
|
|
505
|
+
context_overflow_strategy=ContextOverflowStrategy.TRUNCATE
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
# RECYCLE strategy: Iteratively processes chunks and merges results
|
|
509
|
+
# Useful for extraction tasks - processes entire document
|
|
510
|
+
# Requires output_schema for JSON merging
|
|
511
|
+
output_schema = OutputSchema(
|
|
512
|
+
title="ExtractedInfo",
|
|
513
|
+
properties={
|
|
514
|
+
"topics": PropertyDef(type="array", items={"type": "string"}),
|
|
515
|
+
"key_figures": PropertyDef(type="array", items={"type": "string"})
|
|
516
|
+
},
|
|
517
|
+
required=["topics", "key_figures"]
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
response = await client.agent_run(
|
|
521
|
+
provider_uid="openai",
|
|
522
|
+
model="gpt-4",
|
|
523
|
+
messages=[
|
|
524
|
+
TextMessage(role="system", content="Extract all topics and key figures."),
|
|
525
|
+
TextMessage(role="user", content=very_long_document)
|
|
526
|
+
],
|
|
527
|
+
context_limit=3000,
|
|
528
|
+
context_overflow_strategy=ContextOverflowStrategy.RECYCLE,
|
|
529
|
+
output_schema=output_schema
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
# Parse the merged results
|
|
533
|
+
import json
|
|
534
|
+
result = json.loads(response.output)
|
|
535
|
+
print(f"Topics: {result['topics']}")
|
|
536
|
+
print(f"Key figures: {result['key_figures']}")
|
|
537
|
+
```
|
|
538
|
+
|
|
539
|
+
**Strategy comparison:**
|
|
540
|
+
|
|
541
|
+
| Strategy | How it works | Best for | Streaming |
|
|
542
|
+
|----------|--------------|----------|-----------|
|
|
543
|
+
| `TRUNCATE` | Takes beginning, middle, end portions | Summarization, Q&A | ✅ Yes |
|
|
544
|
+
| `RECYCLE` | Processes chunks iteratively, merges JSON | Full document extraction | ❌ No |
|
|
545
|
+
|
|
546
|
+
**Parameters:**
|
|
547
|
+
- `context_limit` (int, default: 0) - Maximum tokens. If ≤ 0, overflow handling is disabled
|
|
548
|
+
- `context_overflow_strategy` (ContextOverflowStrategy, default: TRUNCATE) - Strategy to use
|
|
549
|
+
|
|
550
|
+
**Notes:**
|
|
551
|
+
- System prompts are always preserved (never truncated)
|
|
552
|
+
- Token counting includes a 20% safety buffer
|
|
553
|
+
- RECYCLE requires `output_schema` for JSON merging
|
|
554
|
+
|
|
305
555
|
### Audio Services
|
|
306
556
|
|
|
307
557
|
#### Text-to-Speech
|
|
@@ -411,11 +661,17 @@ async def transcribe_live_direct():
|
|
|
411
661
|
)
|
|
412
662
|
|
|
413
663
|
# Stream audio and receive transcriptions
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
664
|
+
# Each iteration yields a list of responses (oldest to newest)
|
|
665
|
+
async for responses in client.start_session(init_request, audio_source()):
|
|
666
|
+
# Get the latest transcription (last element)
|
|
667
|
+
latest = responses[-1]
|
|
668
|
+
print(f"Latest transcription: {latest.transcription}")
|
|
669
|
+
|
|
670
|
+
# Process all accumulated transcriptions if needed
|
|
671
|
+
if len(responses) > 1:
|
|
672
|
+
print(f" (received {len(responses)} chunks)")
|
|
673
|
+
for resp in responses:
|
|
674
|
+
print(f" - {resp.transcription}")
|
|
419
675
|
|
|
420
676
|
asyncio.run(transcribe_live_direct())
|
|
421
677
|
```
|
|
@@ -453,25 +709,25 @@ async def transcribe_and_chat():
|
|
|
453
709
|
gen_config={},
|
|
454
710
|
)
|
|
455
711
|
|
|
456
|
-
# Listen for transcriptions and, for each
|
|
457
|
-
|
|
458
|
-
|
|
712
|
+
# Listen for transcriptions and, for each batch, run an agent request
|
|
713
|
+
# Each iteration yields a list of responses - newest is last
|
|
714
|
+
async for responses in t_client.start_session(init_request, audio_source()):
|
|
715
|
+
# Use the latest transcription for the agent
|
|
716
|
+
latest = responses[-1]
|
|
717
|
+
print("User said:", latest.transcription)
|
|
459
718
|
|
|
460
719
|
# You can call agent_run (or speak, etc.) while the transcription stream is active
|
|
720
|
+
# Even if this is slow, transcriptions accumulate and won't stall the loop
|
|
461
721
|
agent_response = await realtime.agent_run(
|
|
462
722
|
provider_uid="openai",
|
|
463
723
|
model="gpt-4",
|
|
464
724
|
messages=[
|
|
465
|
-
TextMessage(role="user", content=
|
|
725
|
+
TextMessage(role="user", content=latest.transcription),
|
|
466
726
|
],
|
|
467
727
|
temperature=0.7,
|
|
468
728
|
)
|
|
469
729
|
print("Agent:", agent_response.output)
|
|
470
730
|
|
|
471
|
-
if resp.is_end:
|
|
472
|
-
print("Transcription session complete")
|
|
473
|
-
break
|
|
474
|
-
|
|
475
731
|
asyncio.run(transcribe_and_chat())
|
|
476
732
|
```
|
|
477
733
|
|
|
@@ -568,25 +824,27 @@ response = await client.ping()
|
|
|
568
824
|
|
|
569
825
|
### Client Methods
|
|
570
826
|
|
|
827
|
+
All methods accept an optional `timeout` parameter to override the default client timeout.
|
|
828
|
+
|
|
571
829
|
**Configuration**
|
|
572
|
-
- `ping()` - Health check
|
|
573
|
-
- `update_config(config)` / `update_configs(configs)` - Add/update providers
|
|
574
|
-
- `get_configs()` - List all configurations
|
|
575
|
-
- `delete_config(uid)` - Remove provider
|
|
830
|
+
- `ping(timeout?)` - Health check
|
|
831
|
+
- `update_config(config, timeout?)` / `update_configs(configs, timeout?)` - Add/update providers
|
|
832
|
+
- `get_configs(timeout?)` - List all configurations
|
|
833
|
+
- `delete_config(uid, timeout?)` - Remove provider
|
|
576
834
|
|
|
577
835
|
**Agent**
|
|
578
|
-
- `agent_run(request | **kwargs)` - Run agent (blocking)
|
|
579
|
-
- `agent_run_stream(request | **kwargs)` - Run agent (streaming)
|
|
836
|
+
- `agent_run(request | **kwargs, timeout?)` - Run agent (blocking)
|
|
837
|
+
- `agent_run_stream(request | **kwargs, timeout?)` - Run agent (streaming)
|
|
580
838
|
|
|
581
839
|
**Audio**
|
|
582
|
-
- `speak(request | **kwargs)` - Text-to-speech (blocking)
|
|
583
|
-
- `speak_stream(request | **kwargs)` - Text-to-speech (streaming)
|
|
584
|
-
- `transcribe(request | **kwargs)` - Speech-to-text
|
|
840
|
+
- `speak(request | **kwargs, timeout?)` - Text-to-speech (blocking)
|
|
841
|
+
- `speak_stream(request | **kwargs, timeout?)` - Text-to-speech (streaming)
|
|
842
|
+
- `transcribe(request | **kwargs, timeout?)` - Speech-to-text
|
|
585
843
|
|
|
586
844
|
**Real-Time Transcription (TranscriptionWsClient)**
|
|
587
845
|
- `connect()` - Establish WebSocket connection
|
|
588
846
|
- `disconnect()` - Close WebSocket connection
|
|
589
|
-
- `start_session(init_request, audio_source)` - Start bidirectional streaming transcription
|
|
847
|
+
- `start_session(init_request, audio_source)` - Start bidirectional streaming transcription; yields `list[TranscriptionWsResponse]` (accumulated responses, newest last)
|
|
590
848
|
- `async with client:` - Auto connection management (recommended)
|
|
591
849
|
|
|
592
850
|
**Cleanup**
|
|
@@ -607,25 +865,33 @@ response = await client.ping()
|
|
|
607
865
|
- `MessageRole` - `USER` | `MODEL` | `SYSTEM` | `TOOL_CALL` | `TOOL_RETURN` (or use strings)
|
|
608
866
|
|
|
609
867
|
**Requests**
|
|
610
|
-
- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?)` - Set `include_history=True` to get full conversation
|
|
868
|
+
- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?, context_limit?, context_overflow_strategy?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output. Set `context_limit` and `context_overflow_strategy` for handling large texts.
|
|
611
869
|
- `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
|
|
612
870
|
- `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
|
|
613
871
|
- `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
|
|
614
872
|
- `TranscriptionAudioChunkWsRequest(audio)` - Audio chunk for streaming
|
|
615
873
|
|
|
874
|
+
**Context Overflow**
|
|
875
|
+
- `ContextOverflowStrategy` - `TRUNCATE` | `RECYCLE`
|
|
876
|
+
|
|
616
877
|
**Tools**
|
|
617
878
|
- `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
|
|
618
879
|
- `MCPStreamableServerInput(kind=ToolKind.MCP_STREAMABLE_SERVER, url, prefix?, timeout?)`
|
|
619
880
|
|
|
881
|
+
**Structured Output**
|
|
882
|
+
- `OutputSchema(title, description?, properties, required?, additionalProperties?)` - JSON Schema for structured output
|
|
883
|
+
- `PropertyDef(type, description?, enum?, default?, minLength?, maxLength?, pattern?, minimum?, maximum?, items?, ...)` - Property definition with validation constraints
|
|
884
|
+
- `OutputSchema.from_pydantic(model)` - Convert a Pydantic BaseModel class to OutputSchema
|
|
885
|
+
|
|
620
886
|
**Fallback**
|
|
621
887
|
- `AgentFallbackRequest(strategy, requests, timeout_per_request?)`
|
|
622
888
|
- `AudioFallbackRequest(strategy, requests, timeout_per_request?)`
|
|
623
889
|
- `FallbackStrategy` - `SEQUENTIAL` | `PARALLEL`
|
|
624
890
|
|
|
625
891
|
**Responses**
|
|
626
|
-
- `AgentResponse(output, usage{input_tokens, output_tokens}, history?)` - `history` included when `include_history=True`
|
|
892
|
+
- `AgentResponse(output, usage{input_tokens, output_tokens}, history?)` - `history` included when `include_history=True`. `output` is a JSON string when `output_schema` is provided.
|
|
627
893
|
- `TranscribeResponse(text, language)`
|
|
628
|
-
- `TranscriptionWsResponse(transcription,
|
|
894
|
+
- `TranscriptionWsResponse(transcription, received_at)` - Real-time transcription result; yielded as `list[TranscriptionWsResponse]` with newest last
|
|
629
895
|
|
|
630
896
|
## Error Handling
|
|
631
897
|
|