livellm 1.3.5__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livellm
3
- Version: 1.3.5
3
+ Version: 1.4.0
4
4
  Summary: Python client for the LiveLLM Server
5
5
  Project-URL: Homepage, https://github.com/qalby-tech/livellm-client-py
6
6
  Project-URL: Repository, https://github.com/qalby-tech/livellm-client-py
@@ -17,6 +17,7 @@ Classifier: Typing :: Typed
17
17
  Requires-Python: >=3.10
18
18
  Requires-Dist: httpx>=0.27.0
19
19
  Requires-Dist: pydantic>=2.0.0
20
+ Requires-Dist: sounddevice>=0.5.3
20
21
  Requires-Dist: websockets>=15.0.1
21
22
  Provides-Extra: testing
22
23
  Requires-Dist: pytest-asyncio>=0.21.0; extra == 'testing'
@@ -3,7 +3,7 @@ import asyncio
3
3
  import httpx
4
4
  import json
5
5
  import warnings
6
- from typing import List, Optional, AsyncIterator, Union, overload
6
+ from typing import List, Optional, AsyncIterator, Union, overload, Dict
7
7
  from .models.common import Settings, SuccessResponse
8
8
  from .models.agent.agent import AgentRequest, AgentResponse
9
9
  from .models.audio.speak import SpeakRequest, EncodedSpeakResponse
@@ -12,9 +12,12 @@ from .models.fallback import AgentFallbackRequest, AudioFallbackRequest, Transcr
12
12
  import websockets
13
13
  from .models.ws import WsRequest, WsResponse, WsStatus, WsAction
14
14
  from .transcripton import TranscriptionWsClient
15
+ from uuid import uuid4
16
+ import logging
15
17
  from abc import ABC, abstractmethod
16
18
 
17
19
 
20
+ logger = logging.getLogger(__name__)
18
21
 
19
22
  class BaseLivellmClient(ABC):
20
23
 
@@ -494,7 +497,8 @@ class LivellmWsClient(BaseLivellmClient):
494
497
  self,
495
498
  base_url: str,
496
499
  timeout: Optional[float] = None,
497
- max_size: Optional[int] = None
500
+ max_size: Optional[int] = None,
501
+ max_buffer_size: Optional[int] = None
498
502
  ):
499
503
  # Convert HTTP(S) URL to WS(S) URL
500
504
  base_url = base_url.rstrip("/")
@@ -510,9 +514,13 @@ class LivellmWsClient(BaseLivellmClient):
510
514
  self.base_url = f"{ws_url}/livellm/ws"
511
515
  self.timeout = timeout
512
516
  self.websocket = None
517
+ self.sessions: Dict[str, asyncio.Queue] = {}
518
+ self.max_buffer_size = max_buffer_size or 0 # None means unlimited buffer size
513
519
  # Lazily-created clients
514
520
  self._transcription = None
515
521
  self.max_size = max_size or 1024 * 1024 * 10 # 10MB is default max size
522
+
523
+ self.__listen_for_responses_task = None
516
524
 
517
525
  async def connect(self):
518
526
  """Establish WebSocket connection."""
@@ -525,50 +533,78 @@ class LivellmWsClient(BaseLivellmClient):
525
533
  close_timeout=self.timeout,
526
534
  max_size=self.max_size
527
535
  )
536
+ self.__listen_for_responses_task = asyncio.create_task(self.listen_for_responses())
528
537
 
529
538
  return self.websocket
530
-
539
+
540
+ async def listen_for_responses(self):
541
+ while True:
542
+ response_data = await self.websocket.recv()
543
+ response = WsResponse(**json.loads(response_data))
544
+ try:
545
+ self.sessions[response.session_id].put_nowait(response)
546
+ except asyncio.QueueFull:
547
+ self.sessions[response.session_id].get_nowait()
548
+ logger.warning(f"Session {response.session_id} buffer is full, dropping oldest message")
549
+
550
+ async def get_or_update_session(self, session_id: str):
551
+ if session_id not in self.sessions:
552
+ self.sessions[session_id] = asyncio.Queue(maxsize=self.max_buffer_size)
553
+ return self.sessions[session_id]
554
+
531
555
  async def disconnect(self):
532
556
  """Close WebSocket connection."""
533
557
  if self.websocket is not None:
534
558
  await self.websocket.close()
535
559
  self.websocket = None
560
+ if self.__listen_for_responses_task is not None:
561
+ self.__listen_for_responses_task.cancel()
562
+ self.__listen_for_responses_task = None
563
+ self.sessions.clear()
536
564
 
537
- async def get_response(self, action: WsAction, payload: dict) -> WsResponse:
565
+ async def get_response(self, action: WsAction, payload: dict) -> dict:
538
566
  """Send a request and wait for response."""
539
567
  if self.websocket is None:
540
568
  await self.connect()
541
569
 
542
- request = WsRequest(action=action, payload=payload)
570
+ session_id = uuid4().hex
571
+ request = WsRequest(session_id=session_id, action=action, payload=payload)
572
+ q = await self.get_or_update_session(session_id)
543
573
  await self.websocket.send(json.dumps(request.model_dump()))
544
574
 
545
- response_data = await self.websocket.recv()
546
- response = WsResponse(**json.loads(response_data))
547
-
575
+ response: WsResponse = await q.get()
576
+ self.sessions.pop(session_id)
548
577
  if response.status == WsStatus.ERROR:
549
- raise Exception(f"WebSocket request failed: {response.error}")
550
-
551
- return response
578
+ raise Exception(f"WebSocket failed: {response.error}")
579
+ elif response.status == WsStatus.SUCCESS:
580
+ return response.data
581
+ else:
582
+ raise Exception(f"WebSocket failed with unknown status: {response}")
552
583
 
553
- async def get_response_stream(self, action: WsAction, payload: dict) -> AsyncIterator[WsResponse]:
584
+ async def get_response_stream(self, action: WsAction, payload: dict) -> AsyncIterator[dict]:
554
585
  """Send a request and stream responses."""
555
586
  if self.websocket is None:
556
587
  await self.connect()
557
588
 
558
- request = WsRequest(action=action, payload=payload)
589
+ session_id = uuid4().hex
590
+ request = WsRequest(session_id=session_id, action=action, payload=payload)
591
+ q = await self.get_or_update_session(session_id)
559
592
  await self.websocket.send(json.dumps(request.model_dump()))
560
593
 
561
594
  while True:
562
- response_data = await self.websocket.recv()
563
- response = WsResponse(**json.loads(response_data))
564
-
565
- if response.status == WsStatus.ERROR:
566
- raise Exception(f"WebSocket stream failed: {response.error}")
567
-
568
- yield response
595
+ response: WsResponse = await q.get()
569
596
 
570
- if response.status == WsStatus.SUCCESS:
597
+ if response.status == WsStatus.STREAMING:
598
+ yield response.data
599
+ elif response.status == WsStatus.SUCCESS:
600
+ self.sessions.pop(session_id)
571
601
  break
602
+ elif response.status == WsStatus.ERROR:
603
+ self.sessions.pop(session_id)
604
+ raise Exception(f"WebSocket failed: {response.error}")
605
+ else:
606
+ self.sessions.pop(session_id)
607
+ raise Exception(f"WebSocket failed with unknown status: {response}")
572
608
 
573
609
  # Implement abstract methods from BaseLivellmClient
574
610
 
@@ -578,25 +614,25 @@ class LivellmWsClient(BaseLivellmClient):
578
614
  WsAction.AGENT_RUN,
579
615
  request.model_dump()
580
616
  )
581
- return AgentResponse(**response.data)
617
+ return AgentResponse(**response)
582
618
 
583
619
  async def handle_agent_run_stream(self, request: Union[AgentRequest, AgentFallbackRequest]) -> AsyncIterator[AgentResponse]:
584
620
  """Handle streaming agent run via WebSocket."""
585
621
  async for response in self.get_response_stream(WsAction.AGENT_RUN_STREAM, request.model_dump()):
586
- yield AgentResponse(**response.data)
622
+ yield AgentResponse(**response)
587
623
 
588
- async def handle_speak(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> EncodedSpeakResponse:
624
+ async def handle_speak(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> bytes:
589
625
  """Handle speak request via WebSocket."""
590
626
  response = await self.get_response(
591
627
  WsAction.AUDIO_SPEAK,
592
628
  request.model_dump()
593
629
  )
594
- return EncodedSpeakResponse(**response.data)
630
+ return EncodedSpeakResponse(**response).audio
595
631
 
596
- async def handle_speak_stream(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> AsyncIterator[EncodedSpeakResponse]:
632
+ async def handle_speak_stream(self, request: Union[SpeakRequest, AudioFallbackRequest]) -> AsyncIterator[bytes]:
597
633
  """Handle streaming speak request via WebSocket."""
598
634
  async for response in self.get_response_stream(WsAction.AUDIO_SPEAK_STREAM, request.model_dump()):
599
- yield EncodedSpeakResponse(**response.data)
635
+ yield EncodedSpeakResponse(**response).audio
600
636
 
601
637
  async def handle_transcribe(self, request: Union[TranscribeRequest, TranscribeFallbackRequest]) -> TranscribeResponse:
602
638
  """Handle transcribe request via WebSocket."""
@@ -604,10 +640,9 @@ class LivellmWsClient(BaseLivellmClient):
604
640
  WsAction.AUDIO_TRANSCRIBE,
605
641
  request.model_dump()
606
642
  )
607
- return TranscribeResponse(**response.data)
643
+ return TranscribeResponse(**response)
608
644
 
609
645
  # Context manager support
610
-
611
646
  async def __aenter__(self):
612
647
  await self.connect()
613
648
  return self
@@ -1,5 +1,6 @@
1
1
  from pydantic import BaseModel, Field, field_validator
2
2
  from livellm.models.audio.speak import SpeakMimeType
3
+ from typing import Optional
3
4
  import base64
4
5
 
5
6
  class TranscriptionInitWsRequest(BaseModel):
@@ -10,6 +11,9 @@ class TranscriptionInitWsRequest(BaseModel):
10
11
  input_audio_format: SpeakMimeType = Field(default=SpeakMimeType.PCM, description="The input audio format (pcm, ulaw, alaw)")
11
12
  gen_config: dict = Field(default={}, description="The generation configuration")
12
13
 
14
+ class TranscriptionInitWsResponse(BaseModel):
15
+ success: bool = Field(..., description="Whether the initialization was successful")
16
+ error: Optional[str] = Field(default=None, description="The error message if the initialization was not successful")
13
17
 
14
18
  class TranscriptionAudioChunkWsRequest(BaseModel):
15
19
  audio: str = Field(..., description="The audio (base64 encoded)")
@@ -29,4 +33,3 @@ class TranscriptionAudioChunkWsRequest(BaseModel):
29
33
 
30
34
  class TranscriptionWsResponse(BaseModel):
31
35
  transcription: str = Field(..., description="The transcription")
32
- is_end: bool = Field(..., description="Whether the response is the end of the transcription")
@@ -8,7 +8,6 @@ class WsAction(str, Enum):
8
8
  AUDIO_SPEAK = "audio_speak"
9
9
  AUDIO_SPEAK_STREAM = "audio_speak_stream"
10
10
  AUDIO_TRANSCRIBE = "audio_transcribe"
11
- TRANSCRIPTION_SESSION = "transcription_session"
12
11
 
13
12
 
14
13
  class WsStatus(str, Enum):
@@ -17,11 +16,13 @@ class WsStatus(str, Enum):
17
16
  ERROR = "error"
18
17
 
19
18
  class WsRequest(BaseModel):
19
+ session_id: str = Field(..., description="The session ID")
20
20
  action: WsAction = Field(..., description="The action to perform")
21
21
  payload: Union[dict, BaseModel] = Field(..., description="The payload for the action")
22
22
 
23
23
 
24
24
  class WsResponse(BaseModel):
25
+ session_id: str = Field(..., description="The session ID")
25
26
  status: WsStatus = Field(..., description="The status of the response")
26
27
  action: WsAction = Field(..., description="The action that was performed")
27
28
  data: Union[dict, BaseModel] = Field(..., description="The data for the response")
@@ -1,8 +1,8 @@
1
1
  from livellm.models.transcription import (
2
- TranscriptionInitWsRequest,
2
+ TranscriptionInitWsRequest,
3
+ TranscriptionInitWsResponse,
3
4
  TranscriptionAudioChunkWsRequest,
4
5
  TranscriptionWsResponse)
5
- from livellm.models.ws import WsResponse, WsStatus
6
6
  from typing import Optional, AsyncIterator
7
7
  import websockets
8
8
  import asyncio
@@ -68,24 +68,25 @@ class TranscriptionWsClient:
68
68
  async with TranscriptionWsClient(url) as client:
69
69
  async for response in client.start_session(init_request, audio_source()):
70
70
  print(response.transcription)
71
- if response.is_end:
72
- break
73
71
  ```
74
72
  """
75
- # Send initialization request
73
+ # Send initialization request as JSON
76
74
  await self.websocket.send(request.model_dump_json())
77
75
 
78
76
  # Wait for initialization response
79
77
  response_data = await self.websocket.recv()
80
- response = WsResponse(**json.loads(response_data))
81
- if response.status == WsStatus.ERROR:
82
- raise Exception(f"Failed to start transcription session: {response.error}")
78
+ init_response = TranscriptionInitWsResponse(**json.loads(response_data))
79
+ if not init_response.success:
80
+ raise Exception(f"Failed to start transcription session: {init_response.error}")
83
81
 
84
82
  # Start sending audio chunks in background
85
83
  async def send_chunks():
86
84
  try:
87
85
  async for chunk in source:
88
86
  await self.websocket.send(chunk.model_dump_json())
87
+ except websockets.ConnectionClosed:
88
+ # Connection closed, stop sending
89
+ pass
89
90
  except Exception as e:
90
91
  # If there's an error sending chunks, close the websocket
91
92
  print(f"Error sending chunks: {e}")
@@ -96,16 +97,14 @@ class TranscriptionWsClient:
96
97
 
97
98
  # Receive transcription responses
98
99
  try:
99
- while not send_task.done():
100
- response_data = await self.websocket.recv()
101
- transcription_response = TranscriptionWsResponse(**json.loads(response_data))
102
- yield transcription_response
103
-
104
- # Stop if we received the final transcription
105
- if transcription_response.is_end:
100
+ while True:
101
+ try:
102
+ response_data = await self.websocket.recv()
103
+ transcription_response = TranscriptionWsResponse(**json.loads(response_data))
104
+ yield transcription_response
105
+ except websockets.ConnectionClosed:
106
+ # Connection closed, stop receiving
106
107
  break
107
- except websockets.ConnectionClosed:
108
- pass
109
108
  finally:
110
109
  # Cancel the send task if still running
111
110
  if not send_task.done():
@@ -1,12 +1,13 @@
1
1
  [project]
2
2
  name = "livellm"
3
- version = "1.3.5"
3
+ version = "1.4.0"
4
4
  description = "Python client for the LiveLLM Server"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
7
7
  dependencies = [
8
8
  "httpx>=0.27.0",
9
9
  "pydantic>=2.0.0",
10
+ "sounddevice>=0.5.3",
10
11
  "websockets>=15.0.1",
11
12
  ]
12
13
  authors = [
File without changes
File without changes
File without changes
File without changes
File without changes