PyPI - livellm - Versions diffs - 1.3.6__tar.gz → 1.4.5__tar.gz - Mend

livellm 1.3.6tar.gz → 1.4.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{livellm-1.3.6 → livellm-1.4.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livellm
-Version: 1.3.6
+Version: 1.4.5
 Summary: Python client for the LiveLLM Server
 Project-URL: Homepage, https://github.com/qalby-tech/livellm-client-py
 Project-URL: Repository, https://github.com/qalby-tech/livellm-client-py
@@ -17,6 +17,7 @@ Classifier: Typing :: Typed
 Requires-Python: >=3.10
 Requires-Dist: httpx>=0.27.0
 Requires-Dist: pydantic>=2.0.0
+Requires-Dist: sounddevice>=0.5.3
 Requires-Dist: websockets>=15.0.1
 Provides-Extra: testing
 Requires-Dist: pytest-asyncio>=0.21.0; extra == 'testing'

{livellm-1.3.6 → livellm-1.4.5}/livellm/livellm.py RENAMED Viewed

@@ -3,7 +3,7 @@ import asyncio
 import httpx
 import json
 import warnings
-from typing import List, Optional, AsyncIterator, Union, overload
+from typing import List, Optional, AsyncIterator, Union, overload, Dict
 from .models.common import Settings, SuccessResponse
 from .models.agent.agent import AgentRequest, AgentResponse
 from .models.audio.speak import SpeakRequest, EncodedSpeakResponse
@@ -12,9 +12,12 @@ from .models.fallback import AgentFallbackRequest, AudioFallbackRequest, Transcr
 import websockets
 from .models.ws import WsRequest, WsResponse, WsStatus, WsAction
 from .transcripton import TranscriptionWsClient
+from uuid import uuid4
+import logging
 from abc import ABC, abstractmethod
+logger = logging.getLogger(__name__)
 class BaseLivellmClient(ABC):
@@ -494,7 +497,8 @@ class LivellmWsClient(BaseLivellmClient):
         self,
         base_url: str,
         timeout: Optional[float] = None,
-        max_size: Optional[int] = None
+        max_size: Optional[int] = None,
+        max_buffer_size: Optional[int] = None
     ):
         # Convert HTTP(S) URL to WS(S) URL
         base_url = base_url.rstrip("/")
@@ -510,9 +514,13 @@ class LivellmWsClient(BaseLivellmClient):
         self.base_url = f"{ws_url}/livellm/ws"
         self.timeout = timeout
         self.websocket = None
+        self.sessions: Dict[str, asyncio.Queue] = {}
+        self.max_buffer_size = max_buffer_size or 0 # None means unlimited buffer size
         # Lazily-created clients
         self._transcription = None
         self.max_size = max_size or 1024 * 1024 * 10 # 10MB is default max size
+        self.__listen_for_responses_task = None
     async def connect(self):
         """Establish WebSocket connection."""
@@ -525,50 +533,78 @@ class LivellmWsClient(BaseLivellmClient):
             close_timeout=self.timeout,
             max_size=self.max_size
         )
+        self.__listen_for_responses_task = asyncio.create_task(self.listen_for_responses())
         return self.websocket
+    async def listen_for_responses(self):
+        while True:
+            response_data = await self.websocket.recv()
+            response = WsResponse(**json.loads(response_data))
+            try:
+                self.sessions[response.session_id].put_nowait(response)
+            except asyncio.QueueFull:
+                self.sessions[response.session_id].get_nowait()
+                logger.warning(f"Session {response.session_id} buffer is full, dropping oldest message")
+    async def get_or_update_session(self, session_id: str):
+        if session_id not in self.sessions:
+            self.sessions[session_id] = asyncio.Queue(maxsize=self.max_buffer_size)
+        return self.sessions[session_id]
     async def disconnect(self):
         """Close WebSocket connection."""
         if self.websocket is not None:
             await self.websocket.close()
             self.websocket = None
+        if self.__listen_for_responses_task is not None:
+            self.__listen_for_responses_task.cancel()
+            self.__listen_for_responses_task = None
+        self.sessions.clear()
-    async def get_response(self, action: WsAction, payload: dict) -> WsResponse:
+    async def get_response(self, action: WsAction, payload: dict) -> dict:
         """Send a request and wait for response."""
         if self.websocket is None:
             await self.connect()
-        request = WsRequest(action=action, payload=payload)
+        session_id = uuid4().hex
+        request = WsRequest(session_id=session_id, action=action, payload=payload)
+        q = await self.get_or_update_session(session_id)
         await self.websocket.send(json.dumps(request.model_dump()))
-        response_data = await self.websocket.recv()
-        response = WsResponse(**json.loads(response_data))
+        response: WsResponse = await q.get()
+        self.sessions.pop(session_id)
         if response.status == WsStatus.ERROR:
-            raise Exception(f"WebSocket request failed: {response.error}")
-        return response
+            raise Exception(f"WebSocket failed: {response.error}")
+        elif response.status == WsStatus.SUCCESS:
+            return response.data
+        else:
+            raise Exception(f"WebSocket failed with unknown status: {response}")
-    async def get_response_stream(self, action: WsAction, payload: dict) -> AsyncIterator[WsResponse]:
+    async def get_response_stream(self, action: WsAction, payload: dict) -> AsyncIterator[dict]:
         """Send a request and stream responses."""
         if self.websocket is None:
             await self.connect()
-        request = WsRequest(action=action, payload=payload)
+        session_id = uuid4().hex
+        request = WsRequest(session_id=session_id, action=action, payload=payload)
+        q = await self.get_or_update_session(session_id)
         await self.websocket.send(json.dumps(request.model_dump()))
         while True:
-            response_data = await self.websocket.recv()
-            response = WsResponse(**json.loads(response_data))
+            response: WsResponse = await q.get()
-            if response.status == WsStatus.ERROR:
-                raise Exception(f"WebSocket stream failed: {response.error}")
-            if response.status == WsStatus.SUCCESS:
+            if response.status == WsStatus.STREAMING:
+                yield response.data
+            elif response.status == WsStatus.SUCCESS:
+                self.sessions.pop(session_id)
                 break
-            yield response
+            elif response.status == WsStatus.ERROR:
+                self.sessions.pop(session_id)
+                raise Exception(f"WebSocket failed: {response.error}")
+            else:
+                self.sessions.pop(session_id)
+                raise Exception(f"WebSocket failed with unknown status: {response}")
     # Implement abstract methods from BaseLivellmClient
@@ -578,25 +614,25 @@ class LivellmWsClient(BaseLivellmClient):
             WsAction.AGENT_RUN,
             request.model_dump()
         )
-        return AgentResponse(**response.data)
+        return AgentResponse(**response)
     async def handle_agent_run_stream(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AsyncIterator[AgentResponse]:
         """Handle streaming agent run via WebSocket."""
         async for response in self.get_response_stream(WsAction.AGENT_RUN_STREAM, request.model_dump()):
-            yield AgentResponse(**response.data)
+            yield AgentResponse(**response)
-    async def handle_speak(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> EncodedSpeakResponse:
+    async def handle_speak(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> bytes:
         """Handle speak request via WebSocket."""
         response = await self.get_response(
             WsAction.AUDIO_SPEAK,
             request.model_dump()
         )
-        return EncodedSpeakResponse(**response.data)
+        return EncodedSpeakResponse(**response).audio
-    async def handle_speak_stream(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> AsyncIterator[EncodedSpeakResponse]:
+    async def handle_speak_stream(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> AsyncIterator[bytes]:
         """Handle streaming speak request via WebSocket."""
         async for response in self.get_response_stream(WsAction.AUDIO_SPEAK_STREAM, request.model_dump()):
-            yield EncodedSpeakResponse(**response.data)
+            yield EncodedSpeakResponse(**response).audio
     async def handle_transcribe(self, request: Union[TranscribeRequest, TranscribeFallbackRequest]) -> TranscribeResponse:
         """Handle transcribe request via WebSocket."""
@@ -604,10 +640,9 @@ class LivellmWsClient(BaseLivellmClient):
             WsAction.AUDIO_TRANSCRIBE,
             request.model_dump()
         )
-        return TranscribeResponse(**response.data)
+        return TranscribeResponse(**response)
     # Context manager support
     async def __aenter__(self):
         await self.connect()
         return self

{livellm-1.3.6 → livellm-1.4.5}/livellm/models/common.py RENAMED Viewed

@@ -14,6 +14,7 @@ class ProviderKind(Enum):
     """Unified provider types for both agent and audio services"""
     # Agent providers
     OPENAI = "openai"
+    OPENAI_CHAT = "openai_chat"
     GOOGLE = "google"
     ANTHROPIC = "anthropic"
     GROQ = "groq"

{livellm-1.3.6 → livellm-1.4.5}/livellm/models/transcription.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from pydantic import BaseModel, Field, field_validator
 from livellm.models.audio.speak import SpeakMimeType
+from typing import Optional
 import base64
 class TranscriptionInitWsRequest(BaseModel):
@@ -10,6 +11,9 @@ class TranscriptionInitWsRequest(BaseModel):
     input_audio_format: SpeakMimeType = Field(default=SpeakMimeType.PCM, description="The input audio format (pcm, ulaw, alaw)")
     gen_config: dict = Field(default={}, description="The generation configuration")
+class TranscriptionInitWsResponse(BaseModel):
+    success: bool = Field(..., description="Whether the initialization was successful")
+    error: Optional[str] = Field(default=None, description="The error message if the initialization was not successful")
 class TranscriptionAudioChunkWsRequest(BaseModel):
     audio: str = Field(..., description="The audio (base64 encoded)")
@@ -29,4 +33,3 @@ class TranscriptionAudioChunkWsRequest(BaseModel):
 class TranscriptionWsResponse(BaseModel):
     transcription: str = Field(..., description="The transcription")
-    is_end: bool = Field(..., description="Whether the response is the end of the transcription")

{livellm-1.3.6 → livellm-1.4.5}/livellm/models/ws.py RENAMED Viewed

@@ -8,7 +8,6 @@ class WsAction(str, Enum):
     AUDIO_SPEAK = "audio_speak"
     AUDIO_SPEAK_STREAM = "audio_speak_stream"
     AUDIO_TRANSCRIBE = "audio_transcribe"
-    TRANSCRIPTION_SESSION = "transcription_session"
 class WsStatus(str, Enum):
@@ -17,11 +16,13 @@ class WsStatus(str, Enum):
     ERROR = "error"
 class WsRequest(BaseModel):
+    session_id: str = Field(..., description="The session ID")
     action: WsAction = Field(..., description="The action to perform")
     payload: Union[dict, BaseModel] = Field(..., description="The payload for the action")
 class WsResponse(BaseModel):
+    session_id: str = Field(..., description="The session ID")
     status: WsStatus = Field(..., description="The status of the response")
     action: WsAction = Field(..., description="The action that was performed")
     data: Union[dict, BaseModel] = Field(..., description="The data for the response")

{livellm-1.3.6 → livellm-1.4.5}/livellm/transcripton.py RENAMED Viewed

@@ -1,8 +1,8 @@
 from livellm.models.transcription import (
-    TranscriptionInitWsRequest,
+    TranscriptionInitWsRequest,
+    TranscriptionInitWsResponse,
     TranscriptionAudioChunkWsRequest,
     TranscriptionWsResponse)
-from livellm.models.ws import WsResponse, WsStatus
 from typing import Optional, AsyncIterator
 import websockets
 import asyncio
@@ -68,24 +68,25 @@ class TranscriptionWsClient:
             async with TranscriptionWsClient(url) as client:
                 async for response in client.start_session(init_request, audio_source()):
                     print(response.transcription)
-                    if response.is_end:
-                        break
             ```
         """
-        # Send initialization request
+        # Send initialization request as JSON
         await self.websocket.send(request.model_dump_json())
         # Wait for initialization response
         response_data = await self.websocket.recv()
-        response = WsResponse(**json.loads(response_data))
-        if response.status == WsStatus.ERROR:
-            raise Exception(f"Failed to start transcription session: {response.error}")
+        init_response = TranscriptionInitWsResponse(**json.loads(response_data))
+        if not init_response.success:
+            raise Exception(f"Failed to start transcription session: {init_response.error}")
         # Start sending audio chunks in background
         async def send_chunks():
             try:
                 async for chunk in source:
                     await self.websocket.send(chunk.model_dump_json())
+            except websockets.ConnectionClosed:
+                # Connection closed, stop sending
+                pass
             except Exception as e:
                 # If there's an error sending chunks, close the websocket
                 print(f"Error sending chunks: {e}")
@@ -96,16 +97,14 @@ class TranscriptionWsClient:
         # Receive transcription responses
         try:
-            while not send_task.done():
-                response_data = await self.websocket.recv()
-                transcription_response = TranscriptionWsResponse(**json.loads(response_data))
-                yield transcription_response
-                # Stop if we received the final transcription
-                if transcription_response.is_end:
+            while True:
+                try:
+                    response_data = await self.websocket.recv()
+                    transcription_response = TranscriptionWsResponse(**json.loads(response_data))
+                    yield transcription_response
+                except websockets.ConnectionClosed:
+                    # Connection closed, stop receiving
                     break
-        except websockets.ConnectionClosed:
-            pass
         finally:
             # Cancel the send task if still running
             if not send_task.done():

{livellm-1.3.6 → livellm-1.4.5}/pyproject.toml RENAMED Viewed

@@ -1,12 +1,13 @@
 [project]
 name = "livellm"
-version = "1.3.6"
+version = "1.4.5"
 description = "Python client for the LiveLLM Server"
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "httpx>=0.27.0",
     "pydantic>=2.0.0",
+    "sounddevice>=0.5.3",
     "websockets>=15.0.1",
 ]
 authors = [