PyPI - livellm - Versions diffs - 1.1.1__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

livellm 1.1.1py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

livellm/__init__.py +6 -2
livellm/livellm.py +671 -69
livellm/models/__init__.py +5 -0
livellm/models/agent/agent.py +3 -4
livellm/models/audio/speak.py +13 -0
livellm/models/audio/transcribe.py +7 -8
livellm/models/fallback.py +3 -3
livellm/models/transcription.py +32 -0
livellm/models/ws.py +28 -0
livellm/transcripton.py +114 -0
livellm-1.3.0.dist-info/METADATA +634 -0
livellm-1.3.0.dist-info/RECORD +20 -0
livellm-1.1.1.dist-info/METADATA +0 -625
livellm-1.1.1.dist-info/RECORD +0 -17
{livellm-1.1.1.dist-info → livellm-1.3.0.dist-info}/WHEEL +0 -0
{livellm-1.1.1.dist-info → livellm-1.3.0.dist-info}/licenses/LICENSE +0 -0

livellm/models/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ from .agent.chat import Message, MessageRole, TextMessage, BinaryMessage
 from .agent.tools import Tool, ToolInput, ToolKind, WebSearchInput, MCPStreamableServerInput
 from .audio.speak import SpeakMimeType, SpeakRequest, SpeakStreamResponse
 from .audio.transcribe import TranscribeRequest, TranscribeResponse, File
+from .transcription import TranscriptionInitWsRequest, TranscriptionAudioChunkWsRequest, TranscriptionWsResponse
 __all__ = [
@@ -38,4 +39,8 @@ __all__ = [
     "TranscribeRequest",
     "TranscribeResponse",
     "File",
+    # Real-time Transcription
+    "TranscriptionInitWsRequest",
+    "TranscriptionAudioChunkWsRequest",
+    "TranscriptionWsResponse",
 ]

livellm/models/agent/agent.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # models for full run: AgentRequest, AgentResponse
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 from typing import Optional, List, Union
 from .chat import TextMessage, BinaryMessage
 from .tools import WebSearchInput, MCPStreamableServerInput
@@ -9,11 +9,10 @@ from ..common import BaseRequest
 class AgentRequest(BaseRequest):
     model: str = Field(..., description="The model to use")
-    messages: List[Union[TextMessage, BinaryMessage]]
-    tools: List[Union[WebSearchInput, MCPStreamableServerInput]]
+    messages: List[Union[TextMessage, BinaryMessage]] = Field(..., description="The messages to use")
+    tools: List[Union[WebSearchInput, MCPStreamableServerInput]] = Field(default_factory=list, description="The tools to use")
     gen_config: Optional[dict] = Field(default=None, description="The configuration for the generation")
 class AgentResponseUsage(BaseModel):
     input_tokens: int = Field(..., description="The number of input tokens used")
     output_tokens: int = Field(..., description="The number of output tokens used")

livellm/models/audio/speak.py CHANGED Viewed

@@ -2,6 +2,7 @@ from pydantic import BaseModel, Field, field_validator
 from typing import Optional, TypeAlias, Tuple, AsyncIterator
 from enum import Enum
 from ..common import BaseRequest
+import base64
 SpeakStreamResponse: TypeAlias = Tuple[AsyncIterator[bytes], str, int]
@@ -21,3 +22,15 @@ class SpeakRequest(BaseRequest):
     sample_rate: int = Field(..., description="The target sample rate of the output audio")
     chunk_size: int = Field(default=20, description="Chunk size in milliseconds for streaming (default: 20ms)")
     gen_config: Optional[dict] = Field(default=None, description="The configuration for the generation")
+class EncodedSpeakResponse(BaseModel):
+    audio: bytes | str = Field(..., description="The audio data as a base64 encoded string")
+    content_type: SpeakMimeType = Field(..., description="The content type of the audio")
+    sample_rate: int = Field(..., description="The sample rate of the audio")
+    @field_validator('audio', mode='after')
+    @classmethod
+    def validate_audio(cls, v: bytes | str) -> bytes:
+        if isinstance(v, bytes):
+            return base64.b64decode(v) # decode from base64 string to bytes
+        return v # if bytes, assume it's already a base64 decoded bytes

livellm/models/audio/transcribe.py CHANGED Viewed

@@ -30,17 +30,16 @@ class TranscribeRequest(BaseRequest):
         # If content is already bytes, return as-is
         if isinstance(content, bytes):
-            return (filename, content, content_type)
+            try:
+                encoded_content = base64.b64encode(content).decode("utf-8") # base64 encode the content
+                return (filename, encoded_content, content_type)
+            except Exception as e:
+                raise ValueError(f"Failed to encode base64 content: {str(e)}")
         # If content is a string, assume it's base64 encoded
         elif isinstance(content, str):
-            try:
-                decoded_content = base64.b64decode(content)
-                return (filename, decoded_content, content_type)
-            except Exception as e:
-                raise ValueError(f"Failed to decode base64 content: {str(e)}")
-        else:
-            raise ValueError(f"file content must be either bytes or base64 string, got {type(content)}")
+            # assume it's already base64 encoded
+            return (filename, content, content_type)
 class TranscribeResponse(BaseModel):

livellm/models/fallback.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
 from typing import List
 from .common import BaseRequest
 from .audio.speak import SpeakRequest
@@ -6,7 +6,7 @@ from .audio.transcribe import TranscribeRequest
 from .agent.agent import AgentRequest
 from enum import Enum
-class FallbackStrategy(Enum):
+class FallbackStrategy(str, Enum):
     SEQUENTIAL = "sequential"
     PARALLEL = "parallel"
@@ -14,7 +14,7 @@ class FallbackRequest(BaseModel):
     requests: List[BaseRequest] = Field(..., description="List of requests to try as fallbacks")
     strategy: FallbackStrategy = Field(FallbackStrategy.SEQUENTIAL, description="The strategy to use for fallback")
     timeout_per_request: int = Field(default=360, description="The timeout to use for each request")
 class AgentFallbackRequest(FallbackRequest):
     requests: List[AgentRequest] = Field(..., description="List of agent requests to try as fallbacks")

livellm/models/transcription.py ADDED Viewed

@@ -0,0 +1,32 @@
+from pydantic import BaseModel, Field, field_validator
+from livellm.models.audio.speak import SpeakMimeType
+import base64
+class TranscriptionInitWsRequest(BaseModel):
+    provider_uid: str = Field(..., description="The provider uid")
+    model: str = Field(..., description="The model")
+    language: str = Field(default="auto", description="The language")
+    input_sample_rate: int = Field(default=24000, description="The input sample rate")
+    input_audio_format: SpeakMimeType = Field(default=SpeakMimeType.PCM, description="The input audio format (pcm, ulaw, alaw)")
+    gen_config: dict = Field(default={}, description="The generation configuration")
+class TranscriptionAudioChunkWsRequest(BaseModel):
+    audio: str = Field(..., description="The audio (base64 encoded)")
+    @field_validator('audio', mode='before')
+    @classmethod
+    def validate_audio(cls, v: str | bytes) -> str:
+        """
+        encode audio to base64 string if needed
+        """
+        if isinstance(v, bytes):
+            return base64.b64encode(v).decode("utf-8")
+        elif isinstance(v, str):
+            return v # already base64 encoded
+        else:
+            raise ValueError(f"Invalid audio type: {type(v)}")
+class TranscriptionWsResponse(BaseModel):
+    transcription: str = Field(..., description="The transcription")
+    is_end: bool = Field(..., description="Whether the response is the end of the transcription")

livellm/models/ws.py ADDED Viewed

@@ -0,0 +1,28 @@
+from pydantic import BaseModel, Field
+from enum import Enum
+from typing import Union, Optional
+class WsAction(str, Enum):
+    AGENT_RUN = "agent_run"
+    AGENT_RUN_STREAM = "agent_run_stream"
+    AUDIO_SPEAK = "audio_speak"
+    AUDIO_SPEAK_STREAM = "audio_speak_stream"
+    AUDIO_TRANSCRIBE = "audio_transcribe"
+    TRANSCRIPTION_SESSION = "transcription_session"
+class WsStatus(str, Enum):
+    STREAMING = "streaming"
+    SUCCESS = "success"
+    ERROR = "error"
+class WsRequest(BaseModel):
+    action: WsAction = Field(..., description="The action to perform")
+    payload: Union[dict, BaseModel] = Field(..., description="The payload for the action")
+class WsResponse(BaseModel):
+    status: WsStatus = Field(..., description="The status of the response")
+    action: WsAction = Field(..., description="The action that was performed")
+    data: Union[dict, BaseModel] = Field(..., description="The data for the response")
+    error: Optional[str] = Field(default=None, description="The error message if the response is an error")

livellm/transcripton.py ADDED Viewed

@@ -0,0 +1,114 @@
+from livellm.models.transcription import (
+    TranscriptionInitWsRequest,
+    TranscriptionAudioChunkWsRequest,
+    TranscriptionWsResponse)
+from livellm.models.ws import WsResponse, WsStatus
+from typing import Optional, AsyncIterator
+import websockets
+import asyncio
+import json
+class TranscriptionWsClient:
+    def __init__(self, base_url: str, timeout: Optional[float] = None):
+        self.base_url = base_url.rstrip("/")
+        self.url = f"{base_url}/livellm/ws/transcription"
+        self.timeout = timeout
+        self.websocket = None
+    async def connect(self):
+        """
+        Connect to the transcription websocket server.
+        """
+        self.websocket = await websockets.connect(
+            self.url,
+            open_timeout=self.timeout,
+            close_timeout=self.timeout
+        )
+    async def disconnect(self):
+        """
+        Disconnect from the transcription websocket server.
+        """
+        if self.websocket is not None:
+            await self.websocket.close()
+            self.websocket = None
+    async def __aenter__(self):
+        await self.connect()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.disconnect()
+    async def start_session(
+        self,
+        request: TranscriptionInitWsRequest,
+        source: AsyncIterator[TranscriptionAudioChunkWsRequest]
+    ) -> AsyncIterator[TranscriptionWsResponse]:
+        """
+        Start a transcription session.
+        Args:
+            request: The initialization request for the transcription session.
+            source: An async iterator that yields audio chunks to transcribe.
+        Returns:
+            An async iterator of transcription session responses.
+        Example:
+            ```python
+            async def audio_source():
+                with open("audio.pcm", "rb") as f:
+                    while chunk := f.read(4096):
+                        yield TranscriptionAudioChunkWsRequest(audio=chunk)
+            async with TranscriptionWsClient(url) as client:
+                async for response in client.start_session(init_request, audio_source()):
+                    print(response.transcription)
+                    if response.is_end:
+                        break
+            ```
+        """
+        # Send initialization request
+        await self.websocket.send(request.model_dump_json())
+        # Wait for initialization response
+        response_data = await self.websocket.recv()
+        response = WsResponse(**json.loads(response_data))
+        if response.status == WsStatus.ERROR:
+            raise Exception(f"Failed to start transcription session: {response.error}")
+        # Start sending audio chunks in background
+        async def send_chunks():
+            try:
+                async for chunk in source:
+                    await self.websocket.send(chunk.model_dump_json())
+            except Exception as e:
+                # If there's an error sending chunks, close the websocket
+                print(f"Error sending chunks: {e}")
+                await self.websocket.close()
+                raise e
+        send_task = asyncio.create_task(send_chunks())
+        # Receive transcription responses
+        try:
+            while not send_task.done():
+                response_data = await self.websocket.recv()
+                transcription_response = TranscriptionWsResponse(**json.loads(response_data))
+                yield transcription_response
+                # Stop if we received the final transcription
+                if transcription_response.is_end:
+                    break
+        except websockets.ConnectionClosed:
+            pass
+        finally:
+            # Cancel the send task if still running
+            if not send_task.done():
+                send_task.cancel()
+                try:
+                    await send_task
+                except asyncio.CancelledError:
+                    pass

livellm 1.1.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

livellm 1.1.1py3-none-any.whl → 1.3.0py3-none-any.whl