livellm 1.1.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livellm/__init__.py +6 -2
- livellm/livellm.py +671 -69
- livellm/models/__init__.py +5 -0
- livellm/models/agent/agent.py +3 -4
- livellm/models/audio/speak.py +13 -0
- livellm/models/audio/transcribe.py +7 -8
- livellm/models/fallback.py +3 -3
- livellm/models/transcription.py +32 -0
- livellm/models/ws.py +28 -0
- livellm/transcripton.py +114 -0
- livellm-1.3.0.dist-info/METADATA +634 -0
- livellm-1.3.0.dist-info/RECORD +20 -0
- livellm-1.1.1.dist-info/METADATA +0 -625
- livellm-1.1.1.dist-info/RECORD +0 -17
- {livellm-1.1.1.dist-info → livellm-1.3.0.dist-info}/WHEEL +0 -0
- {livellm-1.1.1.dist-info → livellm-1.3.0.dist-info}/licenses/LICENSE +0 -0
livellm/models/__init__.py
CHANGED
|
@@ -5,6 +5,7 @@ from .agent.chat import Message, MessageRole, TextMessage, BinaryMessage
|
|
|
5
5
|
from .agent.tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
|
|
6
6
|
from .audio.speak import SpeakMimeType, SpeakRequest, SpeakStreamResponse
|
|
7
7
|
from .audio.transcribe import TranscribeRequest, TranscribeResponse, File
|
|
8
|
+
from .transcription import TranscriptionInitWsRequest, TranscriptionAudioChunkWsRequest, TranscriptionWsResponse
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
__all__ = [
|
|
@@ -38,4 +39,8 @@ __all__ = [
|
|
|
38
39
|
"TranscribeRequest",
|
|
39
40
|
"TranscribeResponse",
|
|
40
41
|
"File",
|
|
42
|
+
# Real-time Transcription
|
|
43
|
+
"TranscriptionInitWsRequest",
|
|
44
|
+
"TranscriptionAudioChunkWsRequest",
|
|
45
|
+
"TranscriptionWsResponse",
|
|
41
46
|
]
|
livellm/models/agent/agent.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# models for full run: AgentRequest, AgentResponse
|
|
2
2
|
|
|
3
|
-
from pydantic import BaseModel, Field
|
|
3
|
+
from pydantic import BaseModel, Field, field_validator
|
|
4
4
|
from typing import Optional, List, Union
|
|
5
5
|
from .chat import TextMessage, BinaryMessage
|
|
6
6
|
from .tools import WebSearchInput, MCPStreamableServerInput
|
|
@@ -9,11 +9,10 @@ from ..common import BaseRequest
|
|
|
9
9
|
|
|
10
10
|
class AgentRequest(BaseRequest):
|
|
11
11
|
model: str = Field(..., description="The model to use")
|
|
12
|
-
messages: List[Union[TextMessage, BinaryMessage]]
|
|
13
|
-
tools: List[Union[WebSearchInput, MCPStreamableServerInput]]
|
|
12
|
+
messages: List[Union[TextMessage, BinaryMessage]] = Field(..., description="The messages to use")
|
|
13
|
+
tools: List[Union[WebSearchInput, MCPStreamableServerInput]] = Field(default_factory=list, description="The tools to use")
|
|
14
14
|
gen_config: Optional[dict] = Field(default=None, description="The configuration for the generation")
|
|
15
15
|
|
|
16
|
-
|
|
17
16
|
class AgentResponseUsage(BaseModel):
|
|
18
17
|
input_tokens: int = Field(..., description="The number of input tokens used")
|
|
19
18
|
output_tokens: int = Field(..., description="The number of output tokens used")
|
livellm/models/audio/speak.py
CHANGED
|
@@ -2,6 +2,7 @@ from pydantic import BaseModel, Field, field_validator
|
|
|
2
2
|
from typing import Optional, TypeAlias, Tuple, AsyncIterator
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from ..common import BaseRequest
|
|
5
|
+
import base64
|
|
5
6
|
|
|
6
7
|
SpeakStreamResponse: TypeAlias = Tuple[AsyncIterator[bytes], str, int]
|
|
7
8
|
|
|
@@ -21,3 +22,15 @@ class SpeakRequest(BaseRequest):
|
|
|
21
22
|
sample_rate: int = Field(..., description="The target sample rate of the output audio")
|
|
22
23
|
chunk_size: int = Field(default=20, description="Chunk size in milliseconds for streaming (default: 20ms)")
|
|
23
24
|
gen_config: Optional[dict] = Field(default=None, description="The configuration for the generation")
|
|
25
|
+
|
|
26
|
+
class EncodedSpeakResponse(BaseModel):
|
|
27
|
+
audio: bytes | str = Field(..., description="The audio data as a base64 encoded string")
|
|
28
|
+
content_type: SpeakMimeType = Field(..., description="The content type of the audio")
|
|
29
|
+
sample_rate: int = Field(..., description="The sample rate of the audio")
|
|
30
|
+
|
|
31
|
+
@field_validator('audio', mode='after')
|
|
32
|
+
@classmethod
|
|
33
|
+
def validate_audio(cls, v: bytes | str) -> bytes:
|
|
34
|
+
if isinstance(v, bytes):
|
|
35
|
+
return base64.b64decode(v) # decode from base64 string to bytes
|
|
36
|
+
return v # if bytes, assume it's already a base64 decoded bytes
|
|
@@ -30,17 +30,16 @@ class TranscribeRequest(BaseRequest):
|
|
|
30
30
|
|
|
31
31
|
# If content is already bytes, return as-is
|
|
32
32
|
if isinstance(content, bytes):
|
|
33
|
-
|
|
33
|
+
try:
|
|
34
|
+
encoded_content = base64.b64encode(content).decode("utf-8") # base64 encode the content
|
|
35
|
+
return (filename, encoded_content, content_type)
|
|
36
|
+
except Exception as e:
|
|
37
|
+
raise ValueError(f"Failed to encode base64 content: {str(e)}")
|
|
34
38
|
|
|
35
39
|
# If content is a string, assume it's base64 encoded
|
|
36
40
|
elif isinstance(content, str):
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
return (filename, decoded_content, content_type)
|
|
40
|
-
except Exception as e:
|
|
41
|
-
raise ValueError(f"Failed to decode base64 content: {str(e)}")
|
|
42
|
-
else:
|
|
43
|
-
raise ValueError(f"file content must be either bytes or base64 string, got {type(content)}")
|
|
41
|
+
# assume it's already base64 encoded
|
|
42
|
+
return (filename, content, content_type)
|
|
44
43
|
|
|
45
44
|
|
|
46
45
|
class TranscribeResponse(BaseModel):
|
livellm/models/fallback.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pydantic import BaseModel, Field
|
|
1
|
+
from pydantic import BaseModel, Field, model_validator
|
|
2
2
|
from typing import List
|
|
3
3
|
from .common import BaseRequest
|
|
4
4
|
from .audio.speak import SpeakRequest
|
|
@@ -6,7 +6,7 @@ from .audio.transcribe import TranscribeRequest
|
|
|
6
6
|
from .agent.agent import AgentRequest
|
|
7
7
|
from enum import Enum
|
|
8
8
|
|
|
9
|
-
class FallbackStrategy(Enum):
|
|
9
|
+
class FallbackStrategy(str, Enum):
|
|
10
10
|
SEQUENTIAL = "sequential"
|
|
11
11
|
PARALLEL = "parallel"
|
|
12
12
|
|
|
@@ -14,7 +14,7 @@ class FallbackRequest(BaseModel):
|
|
|
14
14
|
requests: List[BaseRequest] = Field(..., description="List of requests to try as fallbacks")
|
|
15
15
|
strategy: FallbackStrategy = Field(FallbackStrategy.SEQUENTIAL, description="The strategy to use for fallback")
|
|
16
16
|
timeout_per_request: int = Field(default=360, description="The timeout to use for each request")
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
class AgentFallbackRequest(FallbackRequest):
|
|
19
19
|
requests: List[AgentRequest] = Field(..., description="List of agent requests to try as fallbacks")
|
|
20
20
|
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field, field_validator
|
|
2
|
+
from livellm.models.audio.speak import SpeakMimeType
|
|
3
|
+
import base64
|
|
4
|
+
|
|
5
|
+
class TranscriptionInitWsRequest(BaseModel):
|
|
6
|
+
provider_uid: str = Field(..., description="The provider uid")
|
|
7
|
+
model: str = Field(..., description="The model")
|
|
8
|
+
language: str = Field(default="auto", description="The language")
|
|
9
|
+
input_sample_rate: int = Field(default=24000, description="The input sample rate")
|
|
10
|
+
input_audio_format: SpeakMimeType = Field(default=SpeakMimeType.PCM, description="The input audio format (pcm, ulaw, alaw)")
|
|
11
|
+
gen_config: dict = Field(default={}, description="The generation configuration")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TranscriptionAudioChunkWsRequest(BaseModel):
|
|
15
|
+
audio: str = Field(..., description="The audio (base64 encoded)")
|
|
16
|
+
|
|
17
|
+
@field_validator('audio', mode='before')
|
|
18
|
+
@classmethod
|
|
19
|
+
def validate_audio(cls, v: str | bytes) -> str:
|
|
20
|
+
"""
|
|
21
|
+
encode audio to base64 string if needed
|
|
22
|
+
"""
|
|
23
|
+
if isinstance(v, bytes):
|
|
24
|
+
return base64.b64encode(v).decode("utf-8")
|
|
25
|
+
elif isinstance(v, str):
|
|
26
|
+
return v # already base64 encoded
|
|
27
|
+
else:
|
|
28
|
+
raise ValueError(f"Invalid audio type: {type(v)}")
|
|
29
|
+
|
|
30
|
+
class TranscriptionWsResponse(BaseModel):
|
|
31
|
+
transcription: str = Field(..., description="The transcription")
|
|
32
|
+
is_end: bool = Field(..., description="Whether the response is the end of the transcription")
|
livellm/models/ws.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Union, Optional
|
|
4
|
+
|
|
5
|
+
class WsAction(str, Enum):
|
|
6
|
+
AGENT_RUN = "agent_run"
|
|
7
|
+
AGENT_RUN_STREAM = "agent_run_stream"
|
|
8
|
+
AUDIO_SPEAK = "audio_speak"
|
|
9
|
+
AUDIO_SPEAK_STREAM = "audio_speak_stream"
|
|
10
|
+
AUDIO_TRANSCRIBE = "audio_transcribe"
|
|
11
|
+
TRANSCRIPTION_SESSION = "transcription_session"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class WsStatus(str, Enum):
|
|
15
|
+
STREAMING = "streaming"
|
|
16
|
+
SUCCESS = "success"
|
|
17
|
+
ERROR = "error"
|
|
18
|
+
|
|
19
|
+
class WsRequest(BaseModel):
|
|
20
|
+
action: WsAction = Field(..., description="The action to perform")
|
|
21
|
+
payload: Union[dict, BaseModel] = Field(..., description="The payload for the action")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class WsResponse(BaseModel):
|
|
25
|
+
status: WsStatus = Field(..., description="The status of the response")
|
|
26
|
+
action: WsAction = Field(..., description="The action that was performed")
|
|
27
|
+
data: Union[dict, BaseModel] = Field(..., description="The data for the response")
|
|
28
|
+
error: Optional[str] = Field(default=None, description="The error message if the response is an error")
|
livellm/transcripton.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from livellm.models.transcription import (
|
|
2
|
+
TranscriptionInitWsRequest,
|
|
3
|
+
TranscriptionAudioChunkWsRequest,
|
|
4
|
+
TranscriptionWsResponse)
|
|
5
|
+
from livellm.models.ws import WsResponse, WsStatus
|
|
6
|
+
from typing import Optional, AsyncIterator
|
|
7
|
+
import websockets
|
|
8
|
+
import asyncio
|
|
9
|
+
import json
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TranscriptionWsClient:
|
|
13
|
+
def __init__(self, base_url: str, timeout: Optional[float] = None):
|
|
14
|
+
self.base_url = base_url.rstrip("/")
|
|
15
|
+
self.url = f"{base_url}/livellm/ws/transcription"
|
|
16
|
+
self.timeout = timeout
|
|
17
|
+
self.websocket = None
|
|
18
|
+
|
|
19
|
+
async def connect(self):
|
|
20
|
+
"""
|
|
21
|
+
Connect to the transcription websocket server.
|
|
22
|
+
"""
|
|
23
|
+
self.websocket = await websockets.connect(
|
|
24
|
+
self.url,
|
|
25
|
+
open_timeout=self.timeout,
|
|
26
|
+
close_timeout=self.timeout
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
async def disconnect(self):
|
|
30
|
+
"""
|
|
31
|
+
Disconnect from the transcription websocket server.
|
|
32
|
+
"""
|
|
33
|
+
if self.websocket is not None:
|
|
34
|
+
await self.websocket.close()
|
|
35
|
+
self.websocket = None
|
|
36
|
+
|
|
37
|
+
async def __aenter__(self):
|
|
38
|
+
await self.connect()
|
|
39
|
+
return self
|
|
40
|
+
|
|
41
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
42
|
+
await self.disconnect()
|
|
43
|
+
|
|
44
|
+
async def start_session(
|
|
45
|
+
self,
|
|
46
|
+
request: TranscriptionInitWsRequest,
|
|
47
|
+
source: AsyncIterator[TranscriptionAudioChunkWsRequest]
|
|
48
|
+
) -> AsyncIterator[TranscriptionWsResponse]:
|
|
49
|
+
"""
|
|
50
|
+
Start a transcription session.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
request: The initialization request for the transcription session.
|
|
54
|
+
source: An async iterator that yields audio chunks to transcribe.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
An async iterator of transcription session responses.
|
|
58
|
+
|
|
59
|
+
Example:
|
|
60
|
+
```python
|
|
61
|
+
async def audio_source():
|
|
62
|
+
with open("audio.pcm", "rb") as f:
|
|
63
|
+
while chunk := f.read(4096):
|
|
64
|
+
yield TranscriptionAudioChunkWsRequest(audio=chunk)
|
|
65
|
+
|
|
66
|
+
async with TranscriptionWsClient(url) as client:
|
|
67
|
+
async for response in client.start_session(init_request, audio_source()):
|
|
68
|
+
print(response.transcription)
|
|
69
|
+
if response.is_end:
|
|
70
|
+
break
|
|
71
|
+
```
|
|
72
|
+
"""
|
|
73
|
+
# Send initialization request
|
|
74
|
+
await self.websocket.send(request.model_dump_json())
|
|
75
|
+
|
|
76
|
+
# Wait for initialization response
|
|
77
|
+
response_data = await self.websocket.recv()
|
|
78
|
+
response = WsResponse(**json.loads(response_data))
|
|
79
|
+
if response.status == WsStatus.ERROR:
|
|
80
|
+
raise Exception(f"Failed to start transcription session: {response.error}")
|
|
81
|
+
|
|
82
|
+
# Start sending audio chunks in background
|
|
83
|
+
async def send_chunks():
|
|
84
|
+
try:
|
|
85
|
+
async for chunk in source:
|
|
86
|
+
await self.websocket.send(chunk.model_dump_json())
|
|
87
|
+
except Exception as e:
|
|
88
|
+
# If there's an error sending chunks, close the websocket
|
|
89
|
+
print(f"Error sending chunks: {e}")
|
|
90
|
+
await self.websocket.close()
|
|
91
|
+
raise e
|
|
92
|
+
|
|
93
|
+
send_task = asyncio.create_task(send_chunks())
|
|
94
|
+
|
|
95
|
+
# Receive transcription responses
|
|
96
|
+
try:
|
|
97
|
+
while not send_task.done():
|
|
98
|
+
response_data = await self.websocket.recv()
|
|
99
|
+
transcription_response = TranscriptionWsResponse(**json.loads(response_data))
|
|
100
|
+
yield transcription_response
|
|
101
|
+
|
|
102
|
+
# Stop if we received the final transcription
|
|
103
|
+
if transcription_response.is_end:
|
|
104
|
+
break
|
|
105
|
+
except websockets.ConnectionClosed:
|
|
106
|
+
pass
|
|
107
|
+
finally:
|
|
108
|
+
# Cancel the send task if still running
|
|
109
|
+
if not send_task.done():
|
|
110
|
+
send_task.cancel()
|
|
111
|
+
try:
|
|
112
|
+
await send_task
|
|
113
|
+
except asyncio.CancelledError:
|
|
114
|
+
pass
|