livellm 1.5.4__tar.gz → 1.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {livellm-1.5.4 → livellm-1.6.1}/.gitignore +2 -1
- {livellm-1.5.4 → livellm-1.6.1}/PKG-INFO +170 -22
- {livellm-1.5.4 → livellm-1.6.1}/README.md +169 -17
- {livellm-1.5.4 → livellm-1.6.1}/livellm/livellm.py +95 -36
- {livellm-1.5.4 → livellm-1.6.1}/livellm/models/__init__.py +3 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/models/agent/__init__.py +3 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/models/agent/agent.py +6 -4
- livellm-1.6.1/livellm/models/agent/output_schema.py +120 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/models/transcription.py +2 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/transcripton.py +61 -19
- {livellm-1.5.4 → livellm-1.6.1}/pyproject.toml +3 -3
- {livellm-1.5.4 → livellm-1.6.1}/LICENSE +0 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/__init__.py +0 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/models/agent/chat.py +0 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/models/agent/tools.py +0 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/models/audio/__init__.py +0 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/models/audio/speak.py +0 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/models/audio/transcribe.py +0 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/models/common.py +0 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/models/fallback.py +0 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/models/ws.py +0 -0
- {livellm-1.5.4 → livellm-1.6.1}/livellm/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: livellm
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.6.1
|
|
4
4
|
Summary: Python client for the LiveLLM Server
|
|
5
5
|
Project-URL: Homepage, https://github.com/qalby-tech/livellm-client-py
|
|
6
6
|
Project-URL: Repository, https://github.com/qalby-tech/livellm-client-py
|
|
@@ -19,10 +19,6 @@ Requires-Dist: httpx>=0.27.0
|
|
|
19
19
|
Requires-Dist: pydantic>=2.0.0
|
|
20
20
|
Requires-Dist: sounddevice>=0.5.3
|
|
21
21
|
Requires-Dist: websockets>=15.0.1
|
|
22
|
-
Provides-Extra: testing
|
|
23
|
-
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'testing'
|
|
24
|
-
Requires-Dist: pytest-cov>=4.1.0; extra == 'testing'
|
|
25
|
-
Requires-Dist: pytest>=8.4.2; extra == 'testing'
|
|
26
22
|
Description-Content-Type: text/markdown
|
|
27
23
|
|
|
28
24
|
# LiveLLM Python Client
|
|
@@ -39,6 +35,7 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
|
|
|
39
35
|
- 🎯 **Multi-provider** - OpenAI, Google, Anthropic, Groq, ElevenLabs
|
|
40
36
|
- 🔄 **Streaming** - Real-time streaming for agent and audio
|
|
41
37
|
- 🛠️ **Flexible API** - Use request objects or keyword arguments
|
|
38
|
+
- 📋 **Structured Output** - Get validated JSON responses with schema support (Pydantic, OutputSchema, or dict)
|
|
42
39
|
- 🎙️ **Audio services** - Text-to-speech and transcription
|
|
43
40
|
- 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
|
|
44
41
|
- ⚡ **Fallback strategies** - Sequential and parallel handling
|
|
@@ -302,6 +299,146 @@ if response.history:
|
|
|
302
299
|
- Auditing and logging complete conversations
|
|
303
300
|
- Building conversational UIs with full context visibility
|
|
304
301
|
|
|
302
|
+
#### Agent with Structured Output
|
|
303
|
+
|
|
304
|
+
Get structured JSON responses from the agent by providing an output schema. The agent will return a JSON string matching your schema in the `output` field.
|
|
305
|
+
|
|
306
|
+
**Three ways to define a schema:**
|
|
307
|
+
|
|
308
|
+
**1. Using Pydantic BaseModel (Recommended)**
|
|
309
|
+
```python
|
|
310
|
+
import json
|
|
311
|
+
from pydantic import BaseModel
|
|
312
|
+
from livellm.models import TextMessage
|
|
313
|
+
|
|
314
|
+
class Person(BaseModel):
|
|
315
|
+
name: str
|
|
316
|
+
age: int
|
|
317
|
+
occupation: str
|
|
318
|
+
|
|
319
|
+
response = await client.agent_run(
|
|
320
|
+
provider_uid="openai",
|
|
321
|
+
model="gpt-4",
|
|
322
|
+
messages=[TextMessage(role="user", content="Extract info: John is a 28-year-old engineer")],
|
|
323
|
+
output_schema=Person # Pass the BaseModel class directly
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
# response.output is a JSON string: '{"name": "John", "age": 28, "occupation": "engineer"}'
|
|
327
|
+
print(type(response.output)) # <class 'str'>
|
|
328
|
+
|
|
329
|
+
# Parse the JSON string yourself if needed
|
|
330
|
+
data = json.loads(response.output)
|
|
331
|
+
print(f"Name: {data['name']}")
|
|
332
|
+
print(f"Age: {data['age']}")
|
|
333
|
+
print(f"Occupation: {data['occupation']}")
|
|
334
|
+
|
|
335
|
+
# Or validate with your Pydantic model
|
|
336
|
+
person = Person.model_validate_json(response.output)
|
|
337
|
+
print(f"Name: {person.name}")
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
**2. Using OutputSchema**
|
|
341
|
+
```python
|
|
342
|
+
from livellm.models import OutputSchema, PropertyDef, TextMessage
|
|
343
|
+
|
|
344
|
+
schema = OutputSchema(
|
|
345
|
+
title="Person",
|
|
346
|
+
description="A person's information",
|
|
347
|
+
properties={
|
|
348
|
+
"name": PropertyDef(type="string", description="The person's name"),
|
|
349
|
+
"age": PropertyDef(type="integer", minimum=0, maximum=150, description="Age in years"),
|
|
350
|
+
"email": PropertyDef(type="string", pattern="^[^@]+@[^@]+\\.[^@]+$", description="Email address"),
|
|
351
|
+
},
|
|
352
|
+
required=["name", "age", "email"]
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
response = await client.agent_run(
|
|
356
|
+
provider_uid="openai",
|
|
357
|
+
model="gpt-4",
|
|
358
|
+
messages=[TextMessage(role="user", content="Tell me about a person")],
|
|
359
|
+
output_schema=schema
|
|
360
|
+
)
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
**3. Using a dictionary (JSON Schema)**
|
|
364
|
+
```python
|
|
365
|
+
schema_dict = {
|
|
366
|
+
"title": "Person",
|
|
367
|
+
"type": "object",
|
|
368
|
+
"properties": {
|
|
369
|
+
"name": {"type": "string", "description": "The person's name"},
|
|
370
|
+
"age": {"type": "integer", "minimum": 0, "maximum": 150},
|
|
371
|
+
"email": {"type": "string", "pattern": "^[^@]+@[^@]+\\.[^@]+$"}
|
|
372
|
+
},
|
|
373
|
+
"required": ["name", "age", "email"]
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
response = await client.agent_run(
|
|
377
|
+
provider_uid="openai",
|
|
378
|
+
model="gpt-4",
|
|
379
|
+
messages=[TextMessage(role="user", content="Extract person info")],
|
|
380
|
+
output_schema=schema_dict
|
|
381
|
+
)
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
**Complex nested schemas:**
|
|
385
|
+
```python
|
|
386
|
+
from pydantic import BaseModel
|
|
387
|
+
from typing import List, Optional
|
|
388
|
+
|
|
389
|
+
class Address(BaseModel):
|
|
390
|
+
street: str
|
|
391
|
+
city: str
|
|
392
|
+
zip_code: str
|
|
393
|
+
|
|
394
|
+
class Person(BaseModel):
|
|
395
|
+
name: str
|
|
396
|
+
age: int
|
|
397
|
+
addresses: List[Address]
|
|
398
|
+
phone: Optional[str] = None
|
|
399
|
+
|
|
400
|
+
response = await client.agent_run(
|
|
401
|
+
provider_uid="openai",
|
|
402
|
+
model="gpt-4",
|
|
403
|
+
messages=[TextMessage(role="user", content="Extract person with addresses")],
|
|
404
|
+
output_schema=Person # Nested models are automatically resolved
|
|
405
|
+
)
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
**With streaming:**
|
|
409
|
+
```python
|
|
410
|
+
from pydantic import BaseModel
|
|
411
|
+
|
|
412
|
+
class Summary(BaseModel):
|
|
413
|
+
title: str
|
|
414
|
+
key_points: List[str]
|
|
415
|
+
word_count: int
|
|
416
|
+
|
|
417
|
+
stream = client.agent_run_stream(
|
|
418
|
+
provider_uid="openai",
|
|
419
|
+
model="gpt-4",
|
|
420
|
+
messages=[TextMessage(role="user", content="Summarize this article")],
|
|
421
|
+
output_schema=Summary
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
async for chunk in stream:
|
|
425
|
+
print(chunk.output, end="", flush=True)
|
|
426
|
+
|
|
427
|
+
# After streaming completes, parse the full JSON output
|
|
428
|
+
full_output = "".join([chunk.output async for chunk in stream])
|
|
429
|
+
data = json.loads(full_output)
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
**Response fields:**
|
|
433
|
+
- `output` - The JSON string response matching your schema
|
|
434
|
+
|
|
435
|
+
**Use cases:**
|
|
436
|
+
- Data extraction and parsing
|
|
437
|
+
- API response formatting
|
|
438
|
+
- Structured data generation
|
|
439
|
+
- Type-safe responses
|
|
440
|
+
- Integration with type-checked code
|
|
441
|
+
|
|
305
442
|
### Audio Services
|
|
306
443
|
|
|
307
444
|
#### Text-to-Speech
|
|
@@ -411,11 +548,17 @@ async def transcribe_live_direct():
|
|
|
411
548
|
)
|
|
412
549
|
|
|
413
550
|
# Stream audio and receive transcriptions
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
551
|
+
# Each iteration yields a list of responses (oldest to newest)
|
|
552
|
+
async for responses in client.start_session(init_request, audio_source()):
|
|
553
|
+
# Get the latest transcription (last element)
|
|
554
|
+
latest = responses[-1]
|
|
555
|
+
print(f"Latest transcription: {latest.transcription}")
|
|
556
|
+
|
|
557
|
+
# Process all accumulated transcriptions if needed
|
|
558
|
+
if len(responses) > 1:
|
|
559
|
+
print(f" (received {len(responses)} chunks)")
|
|
560
|
+
for resp in responses:
|
|
561
|
+
print(f" - {resp.transcription}")
|
|
419
562
|
|
|
420
563
|
asyncio.run(transcribe_live_direct())
|
|
421
564
|
```
|
|
@@ -453,25 +596,25 @@ async def transcribe_and_chat():
|
|
|
453
596
|
gen_config={},
|
|
454
597
|
)
|
|
455
598
|
|
|
456
|
-
# Listen for transcriptions and, for each
|
|
457
|
-
|
|
458
|
-
|
|
599
|
+
# Listen for transcriptions and, for each batch, run an agent request
|
|
600
|
+
# Each iteration yields a list of responses - newest is last
|
|
601
|
+
async for responses in t_client.start_session(init_request, audio_source()):
|
|
602
|
+
# Use the latest transcription for the agent
|
|
603
|
+
latest = responses[-1]
|
|
604
|
+
print("User said:", latest.transcription)
|
|
459
605
|
|
|
460
606
|
# You can call agent_run (or speak, etc.) while the transcription stream is active
|
|
607
|
+
# Even if this is slow, transcriptions accumulate and won't stall the loop
|
|
461
608
|
agent_response = await realtime.agent_run(
|
|
462
609
|
provider_uid="openai",
|
|
463
610
|
model="gpt-4",
|
|
464
611
|
messages=[
|
|
465
|
-
TextMessage(role="user", content=
|
|
612
|
+
TextMessage(role="user", content=latest.transcription),
|
|
466
613
|
],
|
|
467
614
|
temperature=0.7,
|
|
468
615
|
)
|
|
469
616
|
print("Agent:", agent_response.output)
|
|
470
617
|
|
|
471
|
-
if resp.is_end:
|
|
472
|
-
print("Transcription session complete")
|
|
473
|
-
break
|
|
474
|
-
|
|
475
618
|
asyncio.run(transcribe_and_chat())
|
|
476
619
|
```
|
|
477
620
|
|
|
@@ -586,7 +729,7 @@ response = await client.ping()
|
|
|
586
729
|
**Real-Time Transcription (TranscriptionWsClient)**
|
|
587
730
|
- `connect()` - Establish WebSocket connection
|
|
588
731
|
- `disconnect()` - Close WebSocket connection
|
|
589
|
-
- `start_session(init_request, audio_source)` - Start bidirectional streaming transcription
|
|
732
|
+
- `start_session(init_request, audio_source)` - Start bidirectional streaming transcription; yields `list[TranscriptionWsResponse]` (accumulated responses, newest last)
|
|
590
733
|
- `async with client:` - Auto connection management (recommended)
|
|
591
734
|
|
|
592
735
|
**Cleanup**
|
|
@@ -607,7 +750,7 @@ response = await client.ping()
|
|
|
607
750
|
- `MessageRole` - `USER` | `MODEL` | `SYSTEM` | `TOOL_CALL` | `TOOL_RETURN` (or use strings)
|
|
608
751
|
|
|
609
752
|
**Requests**
|
|
610
|
-
- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?)` - Set `include_history=True` to get full conversation
|
|
753
|
+
- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output.
|
|
611
754
|
- `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
|
|
612
755
|
- `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
|
|
613
756
|
- `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
|
|
@@ -617,15 +760,20 @@ response = await client.ping()
|
|
|
617
760
|
- `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
|
|
618
761
|
- `MCPStreamableServerInput(kind=ToolKind.MCP_STREAMABLE_SERVER, url, prefix?, timeout?)`
|
|
619
762
|
|
|
763
|
+
**Structured Output**
|
|
764
|
+
- `OutputSchema(title, description?, properties, required?, additionalProperties?)` - JSON Schema for structured output
|
|
765
|
+
- `PropertyDef(type, description?, enum?, default?, minLength?, maxLength?, pattern?, minimum?, maximum?, items?, ...)` - Property definition with validation constraints
|
|
766
|
+
- `OutputSchema.from_pydantic(model)` - Convert a Pydantic BaseModel class to OutputSchema
|
|
767
|
+
|
|
620
768
|
**Fallback**
|
|
621
769
|
- `AgentFallbackRequest(strategy, requests, timeout_per_request?)`
|
|
622
770
|
- `AudioFallbackRequest(strategy, requests, timeout_per_request?)`
|
|
623
771
|
- `FallbackStrategy` - `SEQUENTIAL` | `PARALLEL`
|
|
624
772
|
|
|
625
773
|
**Responses**
|
|
626
|
-
- `AgentResponse(output, usage{input_tokens, output_tokens}, history?)` - `history` included when `include_history=True`
|
|
774
|
+
- `AgentResponse(output, usage{input_tokens, output_tokens}, history?)` - `history` included when `include_history=True`. `output` is a JSON string when `output_schema` is provided.
|
|
627
775
|
- `TranscribeResponse(text, language)`
|
|
628
|
-
- `TranscriptionWsResponse(transcription,
|
|
776
|
+
- `TranscriptionWsResponse(transcription, received_at)` - Real-time transcription result; yielded as `list[TranscriptionWsResponse]` with newest last
|
|
629
777
|
|
|
630
778
|
## Error Handling
|
|
631
779
|
|
|
@@ -12,6 +12,7 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
|
|
|
12
12
|
- 🎯 **Multi-provider** - OpenAI, Google, Anthropic, Groq, ElevenLabs
|
|
13
13
|
- 🔄 **Streaming** - Real-time streaming for agent and audio
|
|
14
14
|
- 🛠️ **Flexible API** - Use request objects or keyword arguments
|
|
15
|
+
- 📋 **Structured Output** - Get validated JSON responses with schema support (Pydantic, OutputSchema, or dict)
|
|
15
16
|
- 🎙️ **Audio services** - Text-to-speech and transcription
|
|
16
17
|
- 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
|
|
17
18
|
- ⚡ **Fallback strategies** - Sequential and parallel handling
|
|
@@ -275,6 +276,146 @@ if response.history:
|
|
|
275
276
|
- Auditing and logging complete conversations
|
|
276
277
|
- Building conversational UIs with full context visibility
|
|
277
278
|
|
|
279
|
+
#### Agent with Structured Output
|
|
280
|
+
|
|
281
|
+
Get structured JSON responses from the agent by providing an output schema. The agent will return a JSON string matching your schema in the `output` field.
|
|
282
|
+
|
|
283
|
+
**Three ways to define a schema:**
|
|
284
|
+
|
|
285
|
+
**1. Using Pydantic BaseModel (Recommended)**
|
|
286
|
+
```python
|
|
287
|
+
import json
|
|
288
|
+
from pydantic import BaseModel
|
|
289
|
+
from livellm.models import TextMessage
|
|
290
|
+
|
|
291
|
+
class Person(BaseModel):
|
|
292
|
+
name: str
|
|
293
|
+
age: int
|
|
294
|
+
occupation: str
|
|
295
|
+
|
|
296
|
+
response = await client.agent_run(
|
|
297
|
+
provider_uid="openai",
|
|
298
|
+
model="gpt-4",
|
|
299
|
+
messages=[TextMessage(role="user", content="Extract info: John is a 28-year-old engineer")],
|
|
300
|
+
output_schema=Person # Pass the BaseModel class directly
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# response.output is a JSON string: '{"name": "John", "age": 28, "occupation": "engineer"}'
|
|
304
|
+
print(type(response.output)) # <class 'str'>
|
|
305
|
+
|
|
306
|
+
# Parse the JSON string yourself if needed
|
|
307
|
+
data = json.loads(response.output)
|
|
308
|
+
print(f"Name: {data['name']}")
|
|
309
|
+
print(f"Age: {data['age']}")
|
|
310
|
+
print(f"Occupation: {data['occupation']}")
|
|
311
|
+
|
|
312
|
+
# Or validate with your Pydantic model
|
|
313
|
+
person = Person.model_validate_json(response.output)
|
|
314
|
+
print(f"Name: {person.name}")
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
**2. Using OutputSchema**
|
|
318
|
+
```python
|
|
319
|
+
from livellm.models import OutputSchema, PropertyDef, TextMessage
|
|
320
|
+
|
|
321
|
+
schema = OutputSchema(
|
|
322
|
+
title="Person",
|
|
323
|
+
description="A person's information",
|
|
324
|
+
properties={
|
|
325
|
+
"name": PropertyDef(type="string", description="The person's name"),
|
|
326
|
+
"age": PropertyDef(type="integer", minimum=0, maximum=150, description="Age in years"),
|
|
327
|
+
"email": PropertyDef(type="string", pattern="^[^@]+@[^@]+\\.[^@]+$", description="Email address"),
|
|
328
|
+
},
|
|
329
|
+
required=["name", "age", "email"]
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
response = await client.agent_run(
|
|
333
|
+
provider_uid="openai",
|
|
334
|
+
model="gpt-4",
|
|
335
|
+
messages=[TextMessage(role="user", content="Tell me about a person")],
|
|
336
|
+
output_schema=schema
|
|
337
|
+
)
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
**3. Using a dictionary (JSON Schema)**
|
|
341
|
+
```python
|
|
342
|
+
schema_dict = {
|
|
343
|
+
"title": "Person",
|
|
344
|
+
"type": "object",
|
|
345
|
+
"properties": {
|
|
346
|
+
"name": {"type": "string", "description": "The person's name"},
|
|
347
|
+
"age": {"type": "integer", "minimum": 0, "maximum": 150},
|
|
348
|
+
"email": {"type": "string", "pattern": "^[^@]+@[^@]+\\.[^@]+$"}
|
|
349
|
+
},
|
|
350
|
+
"required": ["name", "age", "email"]
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
response = await client.agent_run(
|
|
354
|
+
provider_uid="openai",
|
|
355
|
+
model="gpt-4",
|
|
356
|
+
messages=[TextMessage(role="user", content="Extract person info")],
|
|
357
|
+
output_schema=schema_dict
|
|
358
|
+
)
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
**Complex nested schemas:**
|
|
362
|
+
```python
|
|
363
|
+
from pydantic import BaseModel
|
|
364
|
+
from typing import List, Optional
|
|
365
|
+
|
|
366
|
+
class Address(BaseModel):
|
|
367
|
+
street: str
|
|
368
|
+
city: str
|
|
369
|
+
zip_code: str
|
|
370
|
+
|
|
371
|
+
class Person(BaseModel):
|
|
372
|
+
name: str
|
|
373
|
+
age: int
|
|
374
|
+
addresses: List[Address]
|
|
375
|
+
phone: Optional[str] = None
|
|
376
|
+
|
|
377
|
+
response = await client.agent_run(
|
|
378
|
+
provider_uid="openai",
|
|
379
|
+
model="gpt-4",
|
|
380
|
+
messages=[TextMessage(role="user", content="Extract person with addresses")],
|
|
381
|
+
output_schema=Person # Nested models are automatically resolved
|
|
382
|
+
)
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
**With streaming:**
|
|
386
|
+
```python
|
|
387
|
+
from pydantic import BaseModel
|
|
388
|
+
|
|
389
|
+
class Summary(BaseModel):
|
|
390
|
+
title: str
|
|
391
|
+
key_points: List[str]
|
|
392
|
+
word_count: int
|
|
393
|
+
|
|
394
|
+
stream = client.agent_run_stream(
|
|
395
|
+
provider_uid="openai",
|
|
396
|
+
model="gpt-4",
|
|
397
|
+
messages=[TextMessage(role="user", content="Summarize this article")],
|
|
398
|
+
output_schema=Summary
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
async for chunk in stream:
|
|
402
|
+
print(chunk.output, end="", flush=True)
|
|
403
|
+
|
|
404
|
+
# After streaming completes, parse the full JSON output
|
|
405
|
+
full_output = "".join([chunk.output async for chunk in stream])
|
|
406
|
+
data = json.loads(full_output)
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
**Response fields:**
|
|
410
|
+
- `output` - The JSON string response matching your schema
|
|
411
|
+
|
|
412
|
+
**Use cases:**
|
|
413
|
+
- Data extraction and parsing
|
|
414
|
+
- API response formatting
|
|
415
|
+
- Structured data generation
|
|
416
|
+
- Type-safe responses
|
|
417
|
+
- Integration with type-checked code
|
|
418
|
+
|
|
278
419
|
### Audio Services
|
|
279
420
|
|
|
280
421
|
#### Text-to-Speech
|
|
@@ -384,11 +525,17 @@ async def transcribe_live_direct():
|
|
|
384
525
|
)
|
|
385
526
|
|
|
386
527
|
# Stream audio and receive transcriptions
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
528
|
+
# Each iteration yields a list of responses (oldest to newest)
|
|
529
|
+
async for responses in client.start_session(init_request, audio_source()):
|
|
530
|
+
# Get the latest transcription (last element)
|
|
531
|
+
latest = responses[-1]
|
|
532
|
+
print(f"Latest transcription: {latest.transcription}")
|
|
533
|
+
|
|
534
|
+
# Process all accumulated transcriptions if needed
|
|
535
|
+
if len(responses) > 1:
|
|
536
|
+
print(f" (received {len(responses)} chunks)")
|
|
537
|
+
for resp in responses:
|
|
538
|
+
print(f" - {resp.transcription}")
|
|
392
539
|
|
|
393
540
|
asyncio.run(transcribe_live_direct())
|
|
394
541
|
```
|
|
@@ -426,25 +573,25 @@ async def transcribe_and_chat():
|
|
|
426
573
|
gen_config={},
|
|
427
574
|
)
|
|
428
575
|
|
|
429
|
-
# Listen for transcriptions and, for each
|
|
430
|
-
|
|
431
|
-
|
|
576
|
+
# Listen for transcriptions and, for each batch, run an agent request
|
|
577
|
+
# Each iteration yields a list of responses - newest is last
|
|
578
|
+
async for responses in t_client.start_session(init_request, audio_source()):
|
|
579
|
+
# Use the latest transcription for the agent
|
|
580
|
+
latest = responses[-1]
|
|
581
|
+
print("User said:", latest.transcription)
|
|
432
582
|
|
|
433
583
|
# You can call agent_run (or speak, etc.) while the transcription stream is active
|
|
584
|
+
# Even if this is slow, transcriptions accumulate and won't stall the loop
|
|
434
585
|
agent_response = await realtime.agent_run(
|
|
435
586
|
provider_uid="openai",
|
|
436
587
|
model="gpt-4",
|
|
437
588
|
messages=[
|
|
438
|
-
TextMessage(role="user", content=
|
|
589
|
+
TextMessage(role="user", content=latest.transcription),
|
|
439
590
|
],
|
|
440
591
|
temperature=0.7,
|
|
441
592
|
)
|
|
442
593
|
print("Agent:", agent_response.output)
|
|
443
594
|
|
|
444
|
-
if resp.is_end:
|
|
445
|
-
print("Transcription session complete")
|
|
446
|
-
break
|
|
447
|
-
|
|
448
595
|
asyncio.run(transcribe_and_chat())
|
|
449
596
|
```
|
|
450
597
|
|
|
@@ -559,7 +706,7 @@ response = await client.ping()
|
|
|
559
706
|
**Real-Time Transcription (TranscriptionWsClient)**
|
|
560
707
|
- `connect()` - Establish WebSocket connection
|
|
561
708
|
- `disconnect()` - Close WebSocket connection
|
|
562
|
-
- `start_session(init_request, audio_source)` - Start bidirectional streaming transcription
|
|
709
|
+
- `start_session(init_request, audio_source)` - Start bidirectional streaming transcription; yields `list[TranscriptionWsResponse]` (accumulated responses, newest last)
|
|
563
710
|
- `async with client:` - Auto connection management (recommended)
|
|
564
711
|
|
|
565
712
|
**Cleanup**
|
|
@@ -580,7 +727,7 @@ response = await client.ping()
|
|
|
580
727
|
- `MessageRole` - `USER` | `MODEL` | `SYSTEM` | `TOOL_CALL` | `TOOL_RETURN` (or use strings)
|
|
581
728
|
|
|
582
729
|
**Requests**
|
|
583
|
-
- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?)` - Set `include_history=True` to get full conversation
|
|
730
|
+
- `AgentRequest(provider_uid, model, messages, tools?, gen_config?, include_history?, output_schema?)` - Set `include_history=True` to get full conversation. Set `output_schema` for structured JSON output.
|
|
584
731
|
- `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
|
|
585
732
|
- `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
|
|
586
733
|
- `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
|
|
@@ -590,15 +737,20 @@ response = await client.ping()
|
|
|
590
737
|
- `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
|
|
591
738
|
- `MCPStreamableServerInput(kind=ToolKind.MCP_STREAMABLE_SERVER, url, prefix?, timeout?)`
|
|
592
739
|
|
|
740
|
+
**Structured Output**
|
|
741
|
+
- `OutputSchema(title, description?, properties, required?, additionalProperties?)` - JSON Schema for structured output
|
|
742
|
+
- `PropertyDef(type, description?, enum?, default?, minLength?, maxLength?, pattern?, minimum?, maximum?, items?, ...)` - Property definition with validation constraints
|
|
743
|
+
- `OutputSchema.from_pydantic(model)` - Convert a Pydantic BaseModel class to OutputSchema
|
|
744
|
+
|
|
593
745
|
**Fallback**
|
|
594
746
|
- `AgentFallbackRequest(strategy, requests, timeout_per_request?)`
|
|
595
747
|
- `AudioFallbackRequest(strategy, requests, timeout_per_request?)`
|
|
596
748
|
- `FallbackStrategy` - `SEQUENTIAL` | `PARALLEL`
|
|
597
749
|
|
|
598
750
|
**Responses**
|
|
599
|
-
- `AgentResponse(output, usage{input_tokens, output_tokens}, history?)` - `history` included when `include_history=True`
|
|
751
|
+
- `AgentResponse(output, usage{input_tokens, output_tokens}, history?)` - `history` included when `include_history=True`. `output` is a JSON string when `output_schema` is provided.
|
|
600
752
|
- `TranscribeResponse(text, language)`
|
|
601
|
-
- `TranscriptionWsResponse(transcription,
|
|
753
|
+
- `TranscriptionWsResponse(transcription, received_at)` - Real-time transcription result; yielded as `list[TranscriptionWsResponse]` with newest last
|
|
602
754
|
|
|
603
755
|
## Error Handling
|
|
604
756
|
|
|
@@ -3,9 +3,10 @@ import asyncio
|
|
|
3
3
|
import httpx
|
|
4
4
|
import json
|
|
5
5
|
import warnings
|
|
6
|
-
from typing import List, Optional, AsyncIterator, Union, overload, Dict
|
|
6
|
+
from typing import List, Optional, AsyncIterator, Union, overload, Dict, Any, Type
|
|
7
7
|
from .models.common import Settings, SuccessResponse
|
|
8
8
|
from .models.agent.agent import AgentRequest, AgentResponse
|
|
9
|
+
from .models.agent.output_schema import OutputSchema
|
|
9
10
|
from .models.audio.speak import SpeakRequest, EncodedSpeakResponse
|
|
10
11
|
from .models.audio.transcribe import TranscribeRequest, TranscribeResponse, File
|
|
11
12
|
from .models.fallback import AgentFallbackRequest, AudioFallbackRequest, TranscribeFallbackRequest
|
|
@@ -15,10 +16,19 @@ from .transcripton import TranscriptionWsClient
|
|
|
15
16
|
from uuid import uuid4
|
|
16
17
|
import logging
|
|
17
18
|
from abc import ABC, abstractmethod
|
|
19
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
20
|
+
from pydantic import BaseModel
|
|
18
21
|
|
|
19
22
|
|
|
20
23
|
logger = logging.getLogger(__name__)
|
|
21
24
|
|
|
25
|
+
try:
|
|
26
|
+
__version__ = version("livellm")
|
|
27
|
+
except PackageNotFoundError:
|
|
28
|
+
__version__ = "unknown"
|
|
29
|
+
|
|
30
|
+
DEFAULT_USER_AGENT = f"livellm-python/{__version__}"
|
|
31
|
+
|
|
22
32
|
class BaseLivellmClient(ABC):
|
|
23
33
|
|
|
24
34
|
@overload
|
|
@@ -37,6 +47,7 @@ class BaseLivellmClient(ABC):
|
|
|
37
47
|
messages: list,
|
|
38
48
|
tools: Optional[list] = None,
|
|
39
49
|
include_history: bool = False,
|
|
50
|
+
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
|
|
40
51
|
**kwargs
|
|
41
52
|
) -> AgentResponse:
|
|
42
53
|
...
|
|
@@ -55,6 +66,7 @@ class BaseLivellmClient(ABC):
|
|
|
55
66
|
messages: Optional[list] = None,
|
|
56
67
|
tools: Optional[list] = None,
|
|
57
68
|
include_history: bool = False,
|
|
69
|
+
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
|
|
58
70
|
**kwargs
|
|
59
71
|
) -> AgentResponse:
|
|
60
72
|
"""
|
|
@@ -72,7 +84,8 @@ class BaseLivellmClient(ABC):
|
|
|
72
84
|
model="gpt-4",
|
|
73
85
|
messages=[TextMessage(...)],
|
|
74
86
|
tools=[],
|
|
75
|
-
include_history=False
|
|
87
|
+
include_history=False,
|
|
88
|
+
output_schema=MyPydanticModel # or OutputSchema(...) or dict
|
|
76
89
|
)
|
|
77
90
|
|
|
78
91
|
Args:
|
|
@@ -83,9 +96,14 @@ class BaseLivellmClient(ABC):
|
|
|
83
96
|
tools: Optional list of tools
|
|
84
97
|
gen_config: Optional generation configuration
|
|
85
98
|
include_history: Whether to include full conversation history in the response
|
|
99
|
+
output_schema: Optional schema for structured output. Can be:
|
|
100
|
+
- An OutputSchema instance
|
|
101
|
+
- A dict representing a JSON schema
|
|
102
|
+
- A Pydantic BaseModel class (will be converted to OutputSchema)
|
|
86
103
|
|
|
87
104
|
Returns:
|
|
88
|
-
AgentResponse with the agent's output
|
|
105
|
+
AgentResponse with the agent's output. If output_schema was provided,
|
|
106
|
+
the output will be a JSON string matching the schema.
|
|
89
107
|
"""
|
|
90
108
|
# Check if first argument is a request object
|
|
91
109
|
if request is not None:
|
|
@@ -102,16 +120,39 @@ class BaseLivellmClient(ABC):
|
|
|
102
120
|
"Alternatively, pass an AgentRequest object as the first positional argument."
|
|
103
121
|
)
|
|
104
122
|
|
|
123
|
+
# Convert output_schema if it's a Pydantic BaseModel class
|
|
124
|
+
resolved_schema = self._resolve_output_schema(output_schema)
|
|
125
|
+
|
|
105
126
|
agent_request = AgentRequest(
|
|
106
127
|
provider_uid=provider_uid,
|
|
107
128
|
model=model,
|
|
108
129
|
messages=messages,
|
|
109
130
|
tools=tools or [],
|
|
110
131
|
gen_config=kwargs or None,
|
|
111
|
-
include_history=include_history
|
|
132
|
+
include_history=include_history,
|
|
133
|
+
output_schema=resolved_schema
|
|
112
134
|
)
|
|
113
135
|
return await self.handle_agent_run(agent_request)
|
|
114
136
|
|
|
137
|
+
def _resolve_output_schema(
|
|
138
|
+
self,
|
|
139
|
+
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]]
|
|
140
|
+
) -> Optional[Union[OutputSchema, Dict[str, Any]]]:
|
|
141
|
+
"""
|
|
142
|
+
Resolve the output_schema parameter to an OutputSchema or dict.
|
|
143
|
+
|
|
144
|
+
If a Pydantic BaseModel class is provided, convert it to OutputSchema.
|
|
145
|
+
"""
|
|
146
|
+
if output_schema is None:
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
# Check if it's a class (not an instance) that's a subclass of BaseModel
|
|
150
|
+
if isinstance(output_schema, type) and issubclass(output_schema, BaseModel):
|
|
151
|
+
return OutputSchema.from_pydantic(output_schema)
|
|
152
|
+
|
|
153
|
+
# Already an OutputSchema or dict, return as-is
|
|
154
|
+
return output_schema
|
|
155
|
+
|
|
115
156
|
@overload
|
|
116
157
|
def agent_run_stream(
|
|
117
158
|
self,
|
|
@@ -128,6 +169,7 @@ class BaseLivellmClient(ABC):
|
|
|
128
169
|
messages: list,
|
|
129
170
|
tools: Optional[list] = None,
|
|
130
171
|
include_history: bool = False,
|
|
172
|
+
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
|
|
131
173
|
**kwargs
|
|
132
174
|
) -> AsyncIterator[AgentResponse]:
|
|
133
175
|
...
|
|
@@ -146,6 +188,7 @@ class BaseLivellmClient(ABC):
|
|
|
146
188
|
messages: Optional[list] = None,
|
|
147
189
|
tools: Optional[list] = None,
|
|
148
190
|
include_history: bool = False,
|
|
191
|
+
output_schema: Optional[Union[OutputSchema, Dict[str, Any], Type[BaseModel]]] = None,
|
|
149
192
|
**kwargs
|
|
150
193
|
) -> AsyncIterator[AgentResponse]:
|
|
151
194
|
"""
|
|
@@ -165,7 +208,8 @@ class BaseLivellmClient(ABC):
|
|
|
165
208
|
model="gpt-4",
|
|
166
209
|
messages=[TextMessage(...)],
|
|
167
210
|
tools=[],
|
|
168
|
-
include_history=False
|
|
211
|
+
include_history=False,
|
|
212
|
+
output_schema=MyPydanticModel # or OutputSchema(...) or dict
|
|
169
213
|
):
|
|
170
214
|
...
|
|
171
215
|
|
|
@@ -177,9 +221,14 @@ class BaseLivellmClient(ABC):
|
|
|
177
221
|
tools: Optional list of tools
|
|
178
222
|
gen_config: Optional generation configuration
|
|
179
223
|
include_history: Whether to include full conversation history in the response
|
|
224
|
+
output_schema: Optional schema for structured output. Can be:
|
|
225
|
+
- An OutputSchema instance
|
|
226
|
+
- A dict representing a JSON schema
|
|
227
|
+
- A Pydantic BaseModel class (will be converted to OutputSchema)
|
|
180
228
|
|
|
181
229
|
Returns:
|
|
182
|
-
AsyncIterator of AgentResponse chunks
|
|
230
|
+
AsyncIterator of AgentResponse chunks. If output_schema was provided,
|
|
231
|
+
the output will be a JSON string matching the schema.
|
|
183
232
|
"""
|
|
184
233
|
# Check if first argument is a request object
|
|
185
234
|
if request is not None:
|
|
@@ -196,13 +245,17 @@ class BaseLivellmClient(ABC):
|
|
|
196
245
|
"Alternatively, pass an AgentRequest object as the first positional argument."
|
|
197
246
|
)
|
|
198
247
|
|
|
248
|
+
# Convert output_schema if it's a Pydantic BaseModel class
|
|
249
|
+
resolved_schema = self._resolve_output_schema(output_schema)
|
|
250
|
+
|
|
199
251
|
agent_request = AgentRequest(
|
|
200
252
|
provider_uid=provider_uid,
|
|
201
253
|
model=model,
|
|
202
254
|
messages=messages,
|
|
203
255
|
tools=tools or [],
|
|
204
256
|
gen_config=kwargs or None,
|
|
205
|
-
include_history=include_history
|
|
257
|
+
include_history=include_history,
|
|
258
|
+
output_schema=resolved_schema
|
|
206
259
|
)
|
|
207
260
|
stream = self.handle_agent_run_stream(agent_request)
|
|
208
261
|
|
|
@@ -505,7 +558,8 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
505
558
|
|
|
506
559
|
def __init__(
|
|
507
560
|
self,
|
|
508
|
-
base_url: str,
|
|
561
|
+
base_url: str,
|
|
562
|
+
user_agent: Optional[str] = None,
|
|
509
563
|
timeout: Optional[float] = None,
|
|
510
564
|
max_size: Optional[int] = None,
|
|
511
565
|
max_buffer_size: Optional[int] = None
|
|
@@ -523,6 +577,7 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
523
577
|
self._ws_root_base_url = ws_url
|
|
524
578
|
self.base_url = f"{ws_url}/livellm/ws"
|
|
525
579
|
self.timeout = timeout
|
|
580
|
+
self.user_agent = user_agent or DEFAULT_USER_AGENT
|
|
526
581
|
self.websocket = None
|
|
527
582
|
self.sessions: Dict[str, asyncio.Queue] = {}
|
|
528
583
|
self.max_buffer_size = max_buffer_size or 0 # None means unlimited buffer size
|
|
@@ -541,7 +596,8 @@ class LivellmWsClient(BaseLivellmClient):
|
|
|
541
596
|
self.base_url,
|
|
542
597
|
open_timeout=self.timeout,
|
|
543
598
|
close_timeout=self.timeout,
|
|
544
|
-
max_size=self.max_size
|
|
599
|
+
max_size=self.max_size,
|
|
600
|
+
additional_headers={"User-Agent": self.user_agent}
|
|
545
601
|
)
|
|
546
602
|
self.__listen_for_responses_task = asyncio.create_task(self.listen_for_responses())
|
|
547
603
|
|
|
@@ -680,7 +736,8 @@ class LivellmClient(BaseLivellmClient):
|
|
|
680
736
|
|
|
681
737
|
def __init__(
|
|
682
738
|
self,
|
|
683
|
-
base_url: str,
|
|
739
|
+
base_url: str,
|
|
740
|
+
user_agent: Optional[str] = None,
|
|
684
741
|
timeout: Optional[float] = None,
|
|
685
742
|
configs: Optional[List[Settings]] = None
|
|
686
743
|
):
|
|
@@ -689,11 +746,13 @@ class LivellmClient(BaseLivellmClient):
|
|
|
689
746
|
# HTTP API base URL for this client
|
|
690
747
|
self.base_url = f"{self._root_base_url}/livellm"
|
|
691
748
|
self.timeout = timeout
|
|
749
|
+
self.user_agent = user_agent or DEFAULT_USER_AGENT
|
|
692
750
|
self.client = httpx.AsyncClient(base_url=self.base_url, timeout=self.timeout) \
|
|
693
751
|
if self.timeout else httpx.AsyncClient(base_url=self.base_url)
|
|
694
752
|
self.settings = []
|
|
695
753
|
self.headers = {
|
|
696
754
|
"Content-Type": "application/json",
|
|
755
|
+
"User-Agent": self.user_agent,
|
|
697
756
|
}
|
|
698
757
|
# Lazily-created realtime (WebSocket) client
|
|
699
758
|
self._realtime = None
|
|
@@ -713,7 +772,7 @@ class LivellmClient(BaseLivellmClient):
|
|
|
713
772
|
"""
|
|
714
773
|
if self._realtime is None:
|
|
715
774
|
# Pass the same root base URL; LivellmWsClient will handle ws/wss conversion.
|
|
716
|
-
self._realtime = LivellmWsClient(self._root_base_url, timeout=self.timeout)
|
|
775
|
+
self._realtime = LivellmWsClient(self._root_base_url, user_agent=self.user_agent, timeout=self.timeout)
|
|
717
776
|
return self._realtime
|
|
718
777
|
|
|
719
778
|
def update_configs_post_init(self, configs: List[Settings]) -> SuccessResponse:
|
|
@@ -858,32 +917,32 @@ class LivellmClient(BaseLivellmClient):
|
|
|
858
917
|
if self._realtime is not None:
|
|
859
918
|
await self._realtime.disconnect()
|
|
860
919
|
|
|
861
|
-
def __del__(self):
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
920
|
+
# def __del__(self):
|
|
921
|
+
# """
|
|
922
|
+
# Destructor to clean up resources when the client is garbage collected.
|
|
923
|
+
# This will close the HTTP client and attempt to delete configs if cleanup wasn't called.
|
|
924
|
+
# Note: It's recommended to use the async context manager or call cleanup() explicitly.
|
|
925
|
+
# """
|
|
926
|
+
# # Warn user if cleanup wasn't called
|
|
927
|
+
# if self.settings:
|
|
928
|
+
# warnings.warn(
|
|
929
|
+
# "LivellmClient is being garbage collected without explicit cleanup. "
|
|
930
|
+
# "Provider configs may not be deleted from the server. "
|
|
931
|
+
# "Consider using 'async with' or calling 'await client.cleanup()' explicitly.",
|
|
932
|
+
# ResourceWarning,
|
|
933
|
+
# stacklevel=2
|
|
934
|
+
# )
|
|
876
935
|
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
936
|
+
# # Close the httpx client synchronously
|
|
937
|
+
# # httpx.AsyncClient stores a sync Transport that needs cleanup
|
|
938
|
+
# try:
|
|
939
|
+
# with httpx.Client(base_url=self.base_url) as client:
|
|
940
|
+
# for config in self.settings:
|
|
941
|
+
# config: Settings = config
|
|
942
|
+
# client.delete(f"providers/config/{config.uid}", headers=self.headers)
|
|
943
|
+
# except Exception:
|
|
944
|
+
# # Silently fail - we're in a destructor
|
|
945
|
+
# pass
|
|
887
946
|
|
|
888
947
|
# Implement abstract methods from BaseLivellmClient
|
|
889
948
|
|
|
@@ -3,6 +3,7 @@ from .fallback import AgentFallbackRequest, AudioFallbackRequest, TranscribeFall
|
|
|
3
3
|
from .agent.agent import AgentRequest, AgentResponse, AgentResponseUsage
|
|
4
4
|
from .agent.chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
|
|
5
5
|
from .agent.tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
|
|
6
|
+
from .agent.output_schema import OutputSchema, PropertyDef
|
|
6
7
|
from .audio.speak import SpeakMimeType, SpeakRequest, SpeakStreamResponse
|
|
7
8
|
from .audio.transcribe import TranscribeRequest, TranscribeResponse, File
|
|
8
9
|
from .transcription import TranscriptionInitWsRequest, TranscriptionAudioChunkWsRequest, TranscriptionWsResponse
|
|
@@ -34,6 +35,8 @@ __all__ = [
|
|
|
34
35
|
"ToolKind",
|
|
35
36
|
"WebSearchInput",
|
|
36
37
|
"MCPStreamableServerInput",
|
|
38
|
+
"OutputSchema",
|
|
39
|
+
"PropertyDef",
|
|
37
40
|
# Audio
|
|
38
41
|
"SpeakMimeType",
|
|
39
42
|
"SpeakRequest",
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from .agent import AgentRequest, AgentResponse, AgentResponseUsage
|
|
2
2
|
from .chat import Message, MessageRole, TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
|
|
3
3
|
from .tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
|
|
4
|
+
from .output_schema import OutputSchema, PropertyDef
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
@@ -18,4 +19,6 @@ __all__ = [
|
|
|
18
19
|
"ToolKind",
|
|
19
20
|
"WebSearchInput",
|
|
20
21
|
"MCPStreamableServerInput",
|
|
22
|
+
"OutputSchema",
|
|
23
|
+
"PropertyDef",
|
|
21
24
|
]
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
# models for full run: AgentRequest, AgentResponse
|
|
2
2
|
|
|
3
|
-
from pydantic import BaseModel, Field
|
|
4
|
-
from typing import Optional, List, Union
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
from typing import Optional, List, Union, Any, Dict
|
|
5
5
|
from .chat import TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage
|
|
6
6
|
from .tools import WebSearchInput, MCPStreamableServerInput
|
|
7
|
+
from .output_schema import OutputSchema, PropertyDef
|
|
7
8
|
from ..common import BaseRequest
|
|
8
9
|
|
|
9
10
|
|
|
@@ -13,12 +14,13 @@ class AgentRequest(BaseRequest):
|
|
|
13
14
|
tools: List[Union[WebSearchInput, MCPStreamableServerInput]] = Field(default_factory=list, description="The tools to use")
|
|
14
15
|
gen_config: Optional[dict] = Field(default=None, description="The configuration for the generation")
|
|
15
16
|
include_history: bool = Field(default=False, description="Whether to include full conversation history in the response")
|
|
17
|
+
output_schema: Optional[Union[OutputSchema, Dict[str, Any]]] = Field(default=None, description="JSON schema for structured output. Can be an OutputSchema, a dict representing a JSON schema, or will be converted from a Pydantic BaseModel.")
|
|
16
18
|
|
|
17
19
|
class AgentResponseUsage(BaseModel):
|
|
18
20
|
input_tokens: int = Field(..., description="The number of input tokens used")
|
|
19
21
|
output_tokens: int = Field(..., description="The number of output tokens used")
|
|
20
22
|
|
|
21
23
|
class AgentResponse(BaseModel):
|
|
22
|
-
output: str = Field(..., description="The output of the response")
|
|
24
|
+
output: str = Field(..., description="The output of the response (JSON string when using output_schema)")
|
|
23
25
|
usage: AgentResponseUsage = Field(..., description="The usage of the response")
|
|
24
|
-
history: Optional[List[Union[TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage]]] = Field(default=None, description="Full conversation history including tool calls and returns (only included when include_history=true)")
|
|
26
|
+
history: Optional[List[Union[TextMessage, BinaryMessage, ToolCallMessage, ToolReturnMessage]]] = Field(default=None, description="Full conversation history including tool calls and returns (only included when include_history=true)")
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Output schema models for structured output support."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
4
|
+
from typing import Optional, List, Dict, Any, Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PropertyDef(BaseModel):
|
|
8
|
+
"""Definition of a property in the output schema."""
|
|
9
|
+
model_config = ConfigDict(extra="allow")
|
|
10
|
+
|
|
11
|
+
type: Union[str, List[str]] = Field(..., description="Property type: string, integer, number, boolean, array, object, null")
|
|
12
|
+
description: Optional[str] = Field(default=None, description="Description of the property")
|
|
13
|
+
enum: Optional[List[Any]] = Field(default=None, description="Allowed values for the property")
|
|
14
|
+
default: Optional[Any] = Field(default=None, description="Default value")
|
|
15
|
+
# String constraints
|
|
16
|
+
minLength: Optional[int] = Field(default=None, description="Minimum string length")
|
|
17
|
+
maxLength: Optional[int] = Field(default=None, description="Maximum string length")
|
|
18
|
+
pattern: Optional[str] = Field(default=None, description="Regex pattern for string validation")
|
|
19
|
+
# Number constraints
|
|
20
|
+
minimum: Optional[float] = Field(default=None, description="Minimum number value")
|
|
21
|
+
maximum: Optional[float] = Field(default=None, description="Maximum number value")
|
|
22
|
+
exclusiveMinimum: Optional[float] = Field(default=None, description="Exclusive minimum number value")
|
|
23
|
+
exclusiveMaximum: Optional[float] = Field(default=None, description="Exclusive maximum number value")
|
|
24
|
+
# Array constraints
|
|
25
|
+
items: Optional[Union["PropertyDef", Dict[str, Any]]] = Field(default=None, description="Schema for array items")
|
|
26
|
+
minItems: Optional[int] = Field(default=None, description="Minimum array length")
|
|
27
|
+
maxItems: Optional[int] = Field(default=None, description="Maximum array length")
|
|
28
|
+
uniqueItems: Optional[bool] = Field(default=None, description="Whether array items must be unique")
|
|
29
|
+
# Object constraints
|
|
30
|
+
properties: Optional[Dict[str, Union["PropertyDef", Dict[str, Any]]]] = Field(default=None, description="Nested object properties")
|
|
31
|
+
required: Optional[List[str]] = Field(default=None, description="Required properties for nested objects")
|
|
32
|
+
additionalProperties: Optional[Union[bool, "PropertyDef", Dict[str, Any]]] = Field(default=None, description="Schema for additional properties")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class OutputSchema(BaseModel):
|
|
36
|
+
"""
|
|
37
|
+
Schema definition for structured output.
|
|
38
|
+
|
|
39
|
+
This model represents a JSON Schema that the AI model must follow when generating responses.
|
|
40
|
+
When provided, the agent will return a JSON string matching the specified schema.
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
schema = OutputSchema(
|
|
44
|
+
title="Person",
|
|
45
|
+
description="A person's information",
|
|
46
|
+
properties={
|
|
47
|
+
"name": PropertyDef(type="string", description="The person's name"),
|
|
48
|
+
"age": PropertyDef(type="integer", minimum=0, maximum=150),
|
|
49
|
+
},
|
|
50
|
+
required=["name", "age"]
|
|
51
|
+
)
|
|
52
|
+
"""
|
|
53
|
+
model_config = ConfigDict(extra="allow")
|
|
54
|
+
|
|
55
|
+
title: str = Field(..., description="Name of the schema, used as the output tool name")
|
|
56
|
+
description: Optional[str] = Field(default=None, description="Description to help the model understand what to output")
|
|
57
|
+
properties: Dict[str, Union[PropertyDef, Dict[str, Any]]] = Field(..., description="Dictionary of property definitions")
|
|
58
|
+
required: Optional[List[str]] = Field(default=None, description="List of required property names")
|
|
59
|
+
additionalProperties: Optional[Union[bool, PropertyDef, Dict[str, Any]]] = Field(default=None, description="Whether extra properties are allowed")
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def from_pydantic(cls, model: type[BaseModel]) -> "OutputSchema":
|
|
63
|
+
"""
|
|
64
|
+
Create an OutputSchema from a Pydantic BaseModel class.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
model: A Pydantic BaseModel class to convert to OutputSchema.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
An OutputSchema instance representing the model's schema.
|
|
71
|
+
|
|
72
|
+
Example:
|
|
73
|
+
class Person(BaseModel):
|
|
74
|
+
name: str
|
|
75
|
+
age: int
|
|
76
|
+
|
|
77
|
+
schema = OutputSchema.from_pydantic(Person)
|
|
78
|
+
"""
|
|
79
|
+
json_schema = model.model_json_schema()
|
|
80
|
+
|
|
81
|
+
# Extract the main properties
|
|
82
|
+
title = json_schema.get("title", model.__name__)
|
|
83
|
+
description = json_schema.get("description")
|
|
84
|
+
properties = json_schema.get("properties", {})
|
|
85
|
+
required = json_schema.get("required")
|
|
86
|
+
|
|
87
|
+
# Handle $defs for nested models (Pydantic generates these for complex models)
|
|
88
|
+
defs = json_schema.get("$defs", {})
|
|
89
|
+
if defs:
|
|
90
|
+
# Inline the definitions into properties
|
|
91
|
+
properties = cls._resolve_refs(properties, defs)
|
|
92
|
+
|
|
93
|
+
return cls(
|
|
94
|
+
title=title,
|
|
95
|
+
description=description,
|
|
96
|
+
properties=properties,
|
|
97
|
+
required=required,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def _resolve_refs(cls, obj: Any, defs: Dict[str, Any]) -> Any:
|
|
102
|
+
"""Recursively resolve $ref references in the schema."""
|
|
103
|
+
if isinstance(obj, dict):
|
|
104
|
+
if "$ref" in obj:
|
|
105
|
+
ref_path = obj["$ref"]
|
|
106
|
+
# Extract the definition name from "#/$defs/ModelName"
|
|
107
|
+
if ref_path.startswith("#/$defs/"):
|
|
108
|
+
def_name = ref_path[len("#/$defs/"):]
|
|
109
|
+
if def_name in defs:
|
|
110
|
+
# Return the resolved definition (also resolve any nested refs)
|
|
111
|
+
return cls._resolve_refs(defs[def_name], defs)
|
|
112
|
+
return obj
|
|
113
|
+
else:
|
|
114
|
+
return {k: cls._resolve_refs(v, defs) for k, v in obj.items()}
|
|
115
|
+
elif isinstance(obj, list):
|
|
116
|
+
return [cls._resolve_refs(item, defs) for item in obj]
|
|
117
|
+
else:
|
|
118
|
+
return obj
|
|
119
|
+
|
|
120
|
+
|
|
@@ -2,6 +2,7 @@ from pydantic import BaseModel, Field, field_validator
|
|
|
2
2
|
from livellm.models.audio.speak import SpeakMimeType
|
|
3
3
|
from typing import Optional
|
|
4
4
|
import base64
|
|
5
|
+
from datetime import datetime
|
|
5
6
|
|
|
6
7
|
class TranscriptionInitWsRequest(BaseModel):
|
|
7
8
|
provider_uid: str = Field(..., description="The provider uid")
|
|
@@ -33,3 +34,4 @@ class TranscriptionAudioChunkWsRequest(BaseModel):
|
|
|
33
34
|
|
|
34
35
|
class TranscriptionWsResponse(BaseModel):
|
|
35
36
|
transcription: str = Field(..., description="The transcription")
|
|
37
|
+
received_at: datetime = Field(default_factory=datetime.now, description="The datetime when the transcription was received")
|
|
@@ -47,7 +47,7 @@ class TranscriptionWsClient:
|
|
|
47
47
|
self,
|
|
48
48
|
request: TranscriptionInitWsRequest,
|
|
49
49
|
source: AsyncIterator[TranscriptionAudioChunkWsRequest]
|
|
50
|
-
) -> AsyncIterator[TranscriptionWsResponse]:
|
|
50
|
+
) -> AsyncIterator[list[TranscriptionWsResponse]]:
|
|
51
51
|
"""
|
|
52
52
|
Start a transcription session.
|
|
53
53
|
|
|
@@ -56,7 +56,10 @@ class TranscriptionWsClient:
|
|
|
56
56
|
source: An async iterator that yields audio chunks to transcribe.
|
|
57
57
|
|
|
58
58
|
Returns:
|
|
59
|
-
An async iterator of transcription
|
|
59
|
+
An async iterator that yields lists of transcription responses.
|
|
60
|
+
Each list contains all responses that accumulated since the last yield,
|
|
61
|
+
ordered from oldest to newest (last element is the most recent).
|
|
62
|
+
This prevents slow processing from stalling the entire loop.
|
|
60
63
|
|
|
61
64
|
Example:
|
|
62
65
|
```python
|
|
@@ -66,8 +69,14 @@ class TranscriptionWsClient:
|
|
|
66
69
|
yield TranscriptionAudioChunkWsRequest(audio=chunk)
|
|
67
70
|
|
|
68
71
|
async with TranscriptionWsClient(url) as client:
|
|
69
|
-
async for
|
|
70
|
-
|
|
72
|
+
async for responses in client.start_session(init_request, audio_source()):
|
|
73
|
+
# responses is a list, newest transcription is last
|
|
74
|
+
latest = responses[-1]
|
|
75
|
+
print(f"Latest: {latest.transcription}")
|
|
76
|
+
|
|
77
|
+
# Process all transcriptions if needed
|
|
78
|
+
for resp in responses:
|
|
79
|
+
print(resp.transcription)
|
|
71
80
|
```
|
|
72
81
|
"""
|
|
73
82
|
# Send initialization request as JSON
|
|
@@ -79,6 +88,10 @@ class TranscriptionWsClient:
|
|
|
79
88
|
if not init_response.success:
|
|
80
89
|
raise Exception(f"Failed to start transcription session: {init_response.error}")
|
|
81
90
|
|
|
91
|
+
# Queue to collect incoming transcription responses
|
|
92
|
+
response_queue: asyncio.Queue[TranscriptionWsResponse | None] = asyncio.Queue()
|
|
93
|
+
receiver_done = False
|
|
94
|
+
|
|
82
95
|
# Start sending audio chunks in background
|
|
83
96
|
async def send_chunks():
|
|
84
97
|
try:
|
|
@@ -93,23 +106,52 @@ class TranscriptionWsClient:
|
|
|
93
106
|
await self.websocket.close()
|
|
94
107
|
raise e
|
|
95
108
|
|
|
109
|
+
# Receive transcription responses in background
|
|
110
|
+
async def receive_responses():
|
|
111
|
+
nonlocal receiver_done
|
|
112
|
+
try:
|
|
113
|
+
while True:
|
|
114
|
+
try:
|
|
115
|
+
response_data = await self.websocket.recv()
|
|
116
|
+
transcription_response = TranscriptionWsResponse(**json.loads(response_data))
|
|
117
|
+
await response_queue.put(transcription_response)
|
|
118
|
+
except websockets.ConnectionClosed:
|
|
119
|
+
break
|
|
120
|
+
finally:
|
|
121
|
+
receiver_done = True
|
|
122
|
+
await response_queue.put(None) # Signal end of stream
|
|
123
|
+
|
|
96
124
|
send_task = asyncio.create_task(send_chunks())
|
|
125
|
+
receive_task = asyncio.create_task(receive_responses())
|
|
97
126
|
|
|
98
|
-
# Receive transcription responses
|
|
99
127
|
try:
|
|
100
|
-
while True:
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
except websockets.ConnectionClosed:
|
|
106
|
-
# Connection closed, stop receiving
|
|
128
|
+
while True:
|
|
129
|
+
# Wait for at least one response
|
|
130
|
+
first_response = await response_queue.get()
|
|
131
|
+
if first_response is None:
|
|
132
|
+
# End of stream
|
|
107
133
|
break
|
|
134
|
+
|
|
135
|
+
# Collect all additional responses that have accumulated (non-blocking)
|
|
136
|
+
responses = [first_response]
|
|
137
|
+
while True:
|
|
138
|
+
try:
|
|
139
|
+
additional = response_queue.get_nowait()
|
|
140
|
+
if additional is None:
|
|
141
|
+
# End of stream, yield what we have and exit
|
|
142
|
+
yield responses
|
|
143
|
+
return
|
|
144
|
+
responses.append(additional)
|
|
145
|
+
except asyncio.QueueEmpty:
|
|
146
|
+
break
|
|
147
|
+
|
|
148
|
+
yield responses
|
|
108
149
|
finally:
|
|
109
|
-
# Cancel
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
150
|
+
# Cancel tasks if still running
|
|
151
|
+
for task in [send_task, receive_task]:
|
|
152
|
+
if not task.done():
|
|
153
|
+
task.cancel()
|
|
154
|
+
try:
|
|
155
|
+
await task
|
|
156
|
+
except asyncio.CancelledError:
|
|
157
|
+
pass
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "livellm"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.6.1"
|
|
4
4
|
description = "Python client for the LiveLLM Server"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
@@ -24,8 +24,8 @@ classifiers = [
|
|
|
24
24
|
"Typing :: Typed",
|
|
25
25
|
]
|
|
26
26
|
|
|
27
|
-
[
|
|
28
|
-
|
|
27
|
+
[dependency-groups]
|
|
28
|
+
dev = [
|
|
29
29
|
"pytest>=8.4.2",
|
|
30
30
|
"pytest-asyncio>=0.21.0",
|
|
31
31
|
"pytest-cov>=4.1.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|