dv-pipecat-ai 0.0.82.dev884__py3-none-any.whl → 0.0.85.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,10 +2,11 @@
2
2
 
3
3
  import asyncio
4
4
  import json
5
+ import random
5
6
  import time
6
7
  import uuid
7
8
  from dataclasses import dataclass
8
- from typing import Any, AsyncGenerator, Dict, Optional
9
+ from typing import Any, AsyncGenerator, Dict, List, Optional
9
10
  from urllib.parse import urlencode
10
11
 
11
12
  import httpx
@@ -13,6 +14,8 @@ from loguru import logger
13
14
  from pydantic import BaseModel, Field
14
15
 
15
16
  from pipecat.frames.frames import (
17
+ EndFrame,
18
+ CancelFrame,
16
19
  Frame,
17
20
  LLMFullResponseEndFrame,
18
21
  LLMFullResponseStartFrame,
@@ -53,12 +56,13 @@ class VistaarLLMService(LLMService):
53
56
  source_lang: Source language code (e.g., 'mr' for Marathi, 'hi' for Hindi).
54
57
  target_lang: Target language code for responses.
55
58
  session_id: Session ID for maintaining conversation context.
56
- extra: Additional model-specific parameters.
59
+ extra: Additional model-specific parameters
57
60
  """
58
61
 
59
62
  source_lang: Optional[str] = Field(default="mr")
60
63
  target_lang: Optional[str] = Field(default="mr")
61
64
  session_id: Optional[str] = Field(default=None)
65
+ pre_query_response_phrases: Optional[List[str]] = Field(default_factory=list)
62
66
  extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
63
67
 
64
68
  def __init__(
@@ -68,7 +72,6 @@ class VistaarLLMService(LLMService):
68
72
  params: Optional[InputParams] = None,
69
73
  timeout: float = 30.0,
70
74
  interim_timeout: float = 5.0,
71
- interim_message: str = "एक क्षण थांबा, मी बघतो. ",
72
75
  **kwargs,
73
76
  ):
74
77
  """Initialize Vistaar LLM service.
@@ -77,8 +80,7 @@ class VistaarLLMService(LLMService):
77
80
  base_url: The base URL for Vistaar API. Defaults to "https://vistaar.kenpath.ai/api".
78
81
  params: Input parameters for model configuration and behavior.
79
82
  timeout: Request timeout in seconds. Defaults to 30.0 seconds.
80
- interim_timeout: Time in seconds before sending interim message. Defaults to 3.0 seconds.
81
- interim_message: Message to send if API takes longer than interim_timeout. Defaults to "एक क्षण थांबा, मी बघतो. ".
83
+ interim_timeout: Time in seconds before sending interim message. Defaults to 5.0 seconds.
82
84
  **kwargs: Additional arguments passed to the parent LLMService.
83
85
  """
84
86
  super().__init__(**kwargs)
@@ -89,10 +91,10 @@ class VistaarLLMService(LLMService):
89
91
  self._source_lang = params.source_lang
90
92
  self._target_lang = params.target_lang
91
93
  self._session_id = params.session_id or str(uuid.uuid4())
94
+ self._pre_query_response_phrases = params.pre_query_response_phrases or []
92
95
  self._extra = params.extra if isinstance(params.extra, dict) else {}
93
96
  self._timeout = timeout
94
97
  self._interim_timeout = interim_timeout
95
- self._interim_message = interim_message
96
98
 
97
99
  # Create an async HTTP client
98
100
  self._client = httpx.AsyncClient(timeout=httpx.Timeout(self._timeout), verify=False)
@@ -103,6 +105,8 @@ class VistaarLLMService(LLMService):
103
105
  self._partial_response = [] # Track what was actually sent before interruption
104
106
  self._interim_sent = False # Track if interim message was sent
105
107
  self._interim_task = None # Track interim message task
108
+ self._interim_completion_event = asyncio.Event() # Track interim message completion
109
+ self._interim_in_progress = False # Track if interim message is being spoken
106
110
 
107
111
  logger.info(
108
112
  f"Vistaar LLM initialized - Base URL: {self._base_url}, Session ID: {self._session_id}, Source Lang: {self._source_lang}, Target Lang: {self._target_lang}, Timeout: {self._timeout}s"
@@ -161,6 +165,10 @@ class VistaarLLMService(LLMService):
161
165
  # Set interruption flag
162
166
  self._is_interrupted = True
163
167
 
168
+ # Reset interim state on interruption
169
+ self._interim_in_progress = False
170
+ self._interim_completion_event.set() # Unblock any waiting LLM responses
171
+
164
172
  # Cancel interim message task if active
165
173
  await self._cancel_interim_message_task(
166
174
  "Cancelled interim message task - handling interruption"
@@ -193,11 +201,28 @@ class VistaarLLMService(LLMService):
193
201
  if not self._is_interrupted and not self._interim_sent:
194
202
  logger.info(f"Sending interim message after {self._interim_timeout}s timeout")
195
203
  self._interim_sent = True
196
- await self.push_frame(LLMTextFrame(text=self._interim_message))
204
+ self._interim_in_progress = True
205
+
206
+ # Use random selection from pre_query_response_phrases if available, otherwise fallback to default
207
+ if self._pre_query_response_phrases:
208
+ message = random.choice(self._pre_query_response_phrases)
209
+ else:
210
+ message = "एक क्षण थांबा, मी बघतो. "
211
+
212
+ await self.push_frame(LLMTextFrame(text=message))
213
+
214
+ # Wait for estimated TTS duration before marking as complete
215
+ estimated_tts_duration = max(2.0, len(message) * 0.08) # ~80ms per character
216
+ logger.info(f"Waiting {estimated_tts_duration:.2f}s for interim TTS completion")
217
+ await asyncio.sleep(estimated_tts_duration)
197
218
  except asyncio.CancelledError:
198
219
  logger.debug("Interim message task cancelled")
199
220
  except Exception as e:
200
221
  logger.error(f"Error sending interim message: {e}")
222
+ finally:
223
+ # Signal that interim message handling is complete
224
+ self._interim_completion_event.set()
225
+ self._interim_in_progress = False
201
226
 
202
227
  async def _stream_response(self, query: str) -> AsyncGenerator[str, None]:
203
228
  """Stream response from Vistaar API using Server-Sent Events.
@@ -231,6 +256,8 @@ class VistaarLLMService(LLMService):
231
256
  self._is_interrupted = False
232
257
  self._partial_response = []
233
258
  self._interim_sent = False
259
+ self._interim_in_progress = False
260
+ self._interim_completion_event.clear() # Reset the event for new request
234
261
 
235
262
  try:
236
263
  # Use httpx to handle SSE streaming
@@ -291,6 +318,7 @@ class VistaarLLMService(LLMService):
291
318
 
292
319
  # Start response
293
320
  await self.push_frame(LLMFullResponseStartFrame())
321
+ await self.push_frame(LLMFullResponseStartFrame(), FrameDirection.UPSTREAM)
294
322
  await self.start_processing_metrics()
295
323
  await self.start_ttfb_metrics()
296
324
 
@@ -307,6 +335,15 @@ class VistaarLLMService(LLMService):
307
335
  if first_chunk:
308
336
  await self.stop_ttfb_metrics()
309
337
  first_chunk = False
338
+
339
+ # Wait for interim message to complete if it was sent and is in progress
340
+ if self._interim_sent:
341
+ logger.debug(
342
+ "Waiting for interim message completion before sending LLM response"
343
+ )
344
+ await self._interim_completion_event.wait()
345
+ logger.debug("Interim message completed, proceeding with LLM response")
346
+
310
347
  # Cancel interim message task since we got first response
311
348
  await self._cancel_interim_message_task(
312
349
  "Cancelled interim message task - got first response"
@@ -334,6 +371,7 @@ class VistaarLLMService(LLMService):
334
371
  )
335
372
  await self.stop_processing_metrics()
336
373
  await self.push_frame(LLMFullResponseEndFrame())
374
+ await self.push_frame(LLMFullResponseEndFrame(), FrameDirection.UPSTREAM)
337
375
 
338
376
  async def process_frame(self, frame: Frame, direction: FrameDirection):
339
377
  """Process frames for LLM completion requests.
@@ -1,95 +0,0 @@
1
- import base64
2
- import json
3
- from typing import Optional
4
-
5
- from pydantic import BaseModel
6
-
7
- from pipecat.audio.utils import create_default_resampler, pcm_to_ulaw, ulaw_to_pcm
8
- from pipecat.frames.frames import (
9
- AudioRawFrame,
10
- Frame,
11
- InputAudioRawFrame,
12
- InputDTMFFrame,
13
- KeypadEntry,
14
- StartFrame,
15
- StartInterruptionFrame,
16
- TransportMessageFrame,
17
- TransportMessageUrgentFrame,
18
- )
19
- from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
20
-
21
-
22
- class GenesysFrameSerializer(FrameSerializer):
23
- class InputParams(BaseModel):
24
- genesys_sample_rate: int = 8000 # Default Genesys rate (8kHz)
25
- sample_rate: Optional[int] = None # Pipeline input rate
26
-
27
- def __init__(self, session_id: str, params: InputParams = InputParams()):
28
- self._session_id = session_id
29
- self._params = params
30
- self._genesys_sample_rate = self._params.genesys_sample_rate
31
- self._sample_rate = 0 # Pipeline input rate
32
- self._resampler = create_default_resampler()
33
- self._seq = 1 # Sequence number for outgoing messages
34
-
35
- @property
36
- def type(self) -> FrameSerializerType:
37
- return FrameSerializerType.TEXT
38
-
39
- async def setup(self, frame: StartFrame):
40
- self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
41
-
42
- async def serialize(self, frame: Frame) -> str | bytes | None:
43
- if isinstance(frame, StartInterruptionFrame):
44
- answer = {
45
- "version": "2",
46
- "type": "clearAudio", # Or appropriate event for interruption
47
- "seq": self._seq,
48
- "id": self._session_id,
49
- }
50
- self._seq += 1
51
- return json.dumps(answer)
52
- elif isinstance(frame, AudioRawFrame):
53
- data = frame.audio
54
- # Convert PCM to 8kHz μ-law for Genesys
55
- serialized_data = await pcm_to_ulaw(
56
- data, frame.sample_rate, self._genesys_sample_rate, self._resampler
57
- )
58
- payload = base64.b64encode(serialized_data).decode("utf-8")
59
- answer = {
60
- "version": "2",
61
- "type": "audio",
62
- "seq": self._seq,
63
- "id": self._session_id,
64
- "media": {
65
- "payload": payload,
66
- "format": "PCMU",
67
- "rate": self._genesys_sample_rate,
68
- },
69
- }
70
- self._seq += 1
71
- return json.dumps(answer)
72
- elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
73
- return json.dumps(frame.message)
74
-
75
- async def deserialize(self, data: str | bytes) -> Frame | None:
76
- message = json.loads(data)
77
- if message.get("type") == "audio":
78
- payload_base64 = message["media"]["payload"]
79
- payload = base64.b64decode(payload_base64)
80
- # Convert Genesys 8kHz μ-law to PCM at pipeline input rate
81
- deserialized_data = await ulaw_to_pcm(
82
- payload, self._genesys_sample_rate, self._sample_rate, self._resampler
83
- )
84
- audio_frame = InputAudioRawFrame(
85
- audio=deserialized_data, num_channels=1, sample_rate=self._sample_rate
86
- )
87
- return audio_frame
88
- elif message.get("type") == "dtmf":
89
- digit = message.get("dtmf", {}).get("digit")
90
- try:
91
- return InputDTMFFrame(KeypadEntry(digit))
92
- except ValueError:
93
- return None
94
- else:
95
- return None
@@ -1,45 +0,0 @@
1
- import asyncio
2
- import os
3
-
4
- from pipecat.frames.frames import TTSAudioRawFrame
5
- from pipecat.services.google.tts import GoogleTTSService
6
-
7
-
8
- async def test_chirp_tts():
9
- # Get credentials from environment variable
10
- credentials_path = (
11
- "/Users/kalicharanvemuru/Documents/Code/pipecat/examples/ringg-chatbot/creds.json"
12
- )
13
-
14
- if not credentials_path or not os.path.exists(credentials_path):
15
- raise ValueError(
16
- "Please set GOOGLE_APPLICATION_CREDENTIALS environment variable to your service account key file"
17
- )
18
-
19
- # Initialize the TTS service with Chirp voice
20
- tts = GoogleTTSService(
21
- credentials_path=credentials_path,
22
- voice_id="en-US-Chirp3-HD-Charon", # Using Chirp3 HD Charon voice
23
- sample_rate=24000,
24
- )
25
-
26
- # Test text
27
- test_text = "Hello, this is a test of the Google TTS service with Chirp voice."
28
-
29
- print(f"Testing TTS with text: {test_text}")
30
-
31
- # Generate speech
32
- try:
33
- async for frame in tts.run_tts(test_text):
34
- if isinstance(frame, TTSAudioRawFrame):
35
- print(f"Received audio chunk of size: {len(frame.audio)} bytes")
36
- else:
37
- print(f"Received frame: {frame.__class__.__name__}")
38
-
39
- print("TTS generation completed successfully!")
40
- except Exception as e:
41
- print(f"Error during TTS generation: {str(e)}")
42
-
43
-
44
- if __name__ == "__main__":
45
- asyncio.run(test_chirp_tts())